diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp
index 0d74a4158ad..4ad760d278f 100644
--- a/cpp/include/cudf/io/nvcomp_adapter.hpp
+++ b/cpp/include/cudf/io/nvcomp_adapter.hpp
@@ -22,7 +22,7 @@
 #include <string>
 
 namespace CUDF_EXPORT cudf {
-namespace io::nvcomp {
+namespace io::detail::nvcomp {
 
 enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP };
 
@@ -88,5 +88,5 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para
 [[nodiscard]] std::optional<std::string> is_decompression_disabled(
   compression_type compression, feature_status_parameters params = feature_status_parameters());
 
-}  // namespace io::nvcomp
+}  // namespace io::detail::nvcomp
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/io/comp/common.hpp b/cpp/src/io/comp/common.hpp
new file mode 100644
index 00000000000..a81ac60e03a
--- /dev/null
+++ b/cpp/src/io/comp/common.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace cudf::io::detail {
+
+/**
+ * @brief The size used for padding a data buffer's size to a multiple of the padding.
+ *
+ * Padding is necessary for input/output buffers of several compression/decompression kernels
+ * (inflate_kernel and nvcomp snappy). Such kernels operate on aligned data pointers, which require
+ * padding to the buffers so that the pointers can shift along the address space to satisfy their
+ * alignment requirement.
+ *
+ * In the meantime, it is not entirely clear why such padding is needed. We need to further
+ * investigate and implement a better fix rather than just padding the buffer.
+ * See https://github.com/rapidsai/cudf/issues/13605.
+ */
+constexpr std::size_t BUFFER_PADDING_MULTIPLE{8};
+
+}  // namespace cudf::io::detail
diff --git a/cpp/src/io/comp/comp.cpp b/cpp/src/io/comp/comp.cpp
index 2dda2287e09..26535bed43b 100644
--- a/cpp/src/io/comp/comp.cpp
+++ b/cpp/src/io/comp/comp.cpp
@@ -87,15 +87,14 @@ std::vector<std::uint8_t> compress_snappy(host_span<uint8_t const> src,
   outputs[0] = d_dst;
   outputs.host_to_device_async(stream);
 
-  cudf::detail::hostdevice_vector<cudf::io::compression_result> hd_status(1, stream);
+  cudf::detail::hostdevice_vector<compression_result> hd_status(1, stream);
   hd_status[0] = {};
   hd_status.host_to_device_async(stream);
 
   nvcomp::batched_compress(nvcomp::compression_type::SNAPPY, inputs, outputs, hd_status, stream);
 
   hd_status.device_to_host_sync(stream);
-  CUDF_EXPECTS(hd_status[0].status == cudf::io::compression_status::SUCCESS,
-               "snappy compression failed");
+  CUDF_EXPECTS(hd_status[0].status == compression_status::SUCCESS, "snappy compression failed");
   return cudf::detail::make_std_vector_sync<uint8_t>(d_dst, stream);
 }
 
diff --git a/cpp/src/io/comp/comp.hpp b/cpp/src/io/comp/comp.hpp
index 652abbbeda6..e16f26e1f06 100644
--- a/cpp/src/io/comp/comp.hpp
+++ b/cpp/src/io/comp/comp.hpp
@@ -16,16 +16,34 @@
 
 #pragma once
 
+#include "common.hpp"
+
 #include <cudf/io/types.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <memory>
-#include <string>
 #include <vector>
 
 namespace CUDF_EXPORT cudf {
 namespace io::detail {
 
+/**
+ * @brief Status of a compression/decompression operation.
+ */
+enum class compression_status : uint8_t {
+  SUCCESS,          ///< Successful, output is valid
+  FAILURE,          ///< Failed, output is invalid (e.g. input is unsupported in some way)
+  SKIPPED,          ///< Operation skipped (if conversion, uncompressed data can be used)
+  OUTPUT_OVERFLOW,  ///< Output buffer is too small; operation can succeed with larger output
+};
+
+/**
+ * @brief Descriptor of compression/decompression result.
+ */
+struct compression_result {
+  uint64_t bytes_written;
+  compression_status status;
+};
+
 /**
  * @brief Compresses a system memory buffer.
  *
diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu
index 72649dbe427..151f72d262e 100644
--- a/cpp/src/io/comp/debrotli.cu
+++ b/cpp/src/io/comp/debrotli.cu
@@ -63,8 +63,8 @@ THE SOFTWARE.
 
 #include <rmm/cuda_stream_view.hpp>
 
-namespace cudf {
-namespace io {
+namespace cudf::io::detail {
+
 constexpr uint32_t huffman_lookup_table_width      = 8;
 constexpr int8_t brotli_code_length_codes          = 18;
 constexpr uint32_t brotli_num_distance_short_codes = 16;
@@ -2020,7 +2020,6 @@ CUDF_KERNEL void __launch_bounds__(block_size, 2)
     results[block_id].status =
       (s->error == 0) ? compression_status::SUCCESS : compression_status::FAILURE;
     // Return ext heap used by last block (statistics)
-    results[block_id].reserved = s->fb_size;
   }
 }
 
@@ -2115,5 +2114,4 @@ void gpu_debrotli(device_span<device_span<uint8_t const> const> inputs,
 #endif
 }
 
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail
diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu
index 090ea1430b5..6e5ce4ce6c3 100644
--- a/cpp/src/io/comp/gpuinflate.cu
+++ b/cpp/src/io/comp/gpuinflate.cu
@@ -49,8 +49,7 @@ Mark Adler    madler@alumni.caltech.edu
 
 #include <rmm/cuda_stream_view.hpp>
 
-namespace cudf {
-namespace io {
+namespace cudf::io::detail {
 
 constexpr int max_bits    = 15;   // maximum bits in a code
 constexpr int max_l_codes = 286;  // maximum number of literal/length codes
@@ -1139,7 +1138,6 @@ CUDF_KERNEL void __launch_bounds__(block_size)
         default: return compression_status::FAILURE;
       }
     }();
-    results[z].reserved = (int)(state->end - state->cur);  // Here mainly for debug purposes
   }
 }
 
@@ -1224,5 +1222,4 @@ void gpu_copy_uncompressed_blocks(device_span<device_span<uint8_t const> const>
   }
 }
 
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail
diff --git a/cpp/src/io/comp/gpuinflate.hpp b/cpp/src/io/comp/gpuinflate.hpp
index 8bfca2b30df..4b09bd5a84c 100644
--- a/cpp/src/io/comp/gpuinflate.hpp
+++ b/cpp/src/io/comp/gpuinflate.hpp
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include "io/comp/comp.hpp"
+
 #include <cudf/io/types.hpp>
 #include <cudf/utilities/export.hpp>
 #include <cudf/utilities/span.hpp>
@@ -24,44 +26,10 @@
 
 #include <cstdint>
 
-namespace cudf {
-namespace io {
-
-/**
- * @brief Status of a compression/decompression operation.
- */
-enum class compression_status : uint8_t {
-  SUCCESS,          ///< Successful, output is valid
-  FAILURE,          ///< Failed, output is invalid (e.g. input is unsupported in some way)
-  SKIPPED,          ///< Operation skipped (if conversion, uncompressed data can be used)
-  OUTPUT_OVERFLOW,  ///< Output buffer is too small; operation can succeed with larger output
-};
-
-/**
- * @brief Descriptor of compression/decompression result.
- */
-struct compression_result {
-  uint64_t bytes_written;
-  compression_status status;
-  uint32_t reserved;
-};
+namespace cudf::io::detail {
 
 enum class gzip_header_included { NO, YES };
 
-/**
- * @brief The value used for padding a data buffer such that its size will be multiple of it.
- *
- * Padding is necessary for input/output buffers of several compression/decompression kernels
- * (inflate_kernel and nvcomp snappy). Such kernels operate on aligned data pointers, which require
- * padding to the buffers so that the pointers can shift along the address space to satisfy their
- * alignment requirement.
- *
- * In the meantime, it is not entirely clear why such padding is needed. We need to further
- * investigate and implement a better fix rather than just padding the buffer.
- * See https://github.com/rapidsai/cudf/issues/13605.
- */
-constexpr std::size_t BUFFER_PADDING_MULTIPLE{8};
-
 /**
  * @brief Interface for decompressing GZIP-compressed data
  *
@@ -169,5 +137,4 @@ void gpu_snap(device_span<device_span<uint8_t const> const> inputs,
   device_span<compression_result const> results,
   rmm::cuda_stream_view stream);
 
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail
diff --git a/cpp/src/io/comp/io_uncomp.hpp b/cpp/src/io/comp/io_uncomp.hpp
index ca722a9b7ee..711a1c3274f 100644
--- a/cpp/src/io/comp/io_uncomp.hpp
+++ b/cpp/src/io/comp/io_uncomp.hpp
@@ -16,15 +16,13 @@
 
 #pragma once
 
+#include "common.hpp"
+
 #include <cudf/io/types.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <memory>
-#include <string>
 #include <vector>
 
-using cudf::host_span;
-
 namespace CUDF_EXPORT cudf {
 namespace io::detail {
 
diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index d45c02f374f..3a4e315348c 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -30,7 +30,7 @@
 
 #include <mutex>
 
-namespace cudf::io::nvcomp {
+namespace cudf::io::detail::nvcomp {
 namespace {
 
 // Dispatcher for nvcompBatched<format>DecompressGetTempSizeEx
@@ -478,4 +478,4 @@ std::optional<size_t> compress_max_allowed_chunk_size(compression_type compressi
   }
 }
 
-}  // namespace cudf::io::nvcomp
+}  // namespace cudf::io::detail::nvcomp
diff --git a/cpp/src/io/comp/nvcomp_adapter.cu b/cpp/src/io/comp/nvcomp_adapter.cu
index 794d452ebf2..cf5996dfd93 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cu
+++ b/cpp/src/io/comp/nvcomp_adapter.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
-namespace cudf::io::nvcomp {
+namespace cudf::io::detail::nvcomp {
 
 batched_args create_batched_nvcomp_args(device_span<device_span<uint8_t const> const> inputs,
                                         device_span<device_span<uint8_t> const> outputs,
@@ -127,4 +127,4 @@ std::pair<size_t, size_t> max_chunk_and_total_input_size(device_span<size_t cons
   return {max, sum};
 }
 
-}  // namespace cudf::io::nvcomp
+}  // namespace cudf::io::detail::nvcomp
diff --git a/cpp/src/io/comp/nvcomp_adapter.cuh b/cpp/src/io/comp/nvcomp_adapter.cuh
index 4a7b6463fa0..1b303d88915 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cuh
+++ b/cpp/src/io/comp/nvcomp_adapter.cuh
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include "gpuinflate.hpp"
+#include "comp.hpp"
 
 #include <cudf/utilities/span.hpp>
 
@@ -27,7 +27,7 @@
 
 #include <optional>
 
-namespace cudf::io::nvcomp {
+namespace cudf::io::detail::nvcomp {
 
 struct batched_args {
   rmm::device_uvector<void const*> input_data_ptrs;
@@ -76,4 +76,4 @@ void skip_unsupported_inputs(device_span<size_t> input_sizes,
 std::pair<size_t, size_t> max_chunk_and_total_input_size(device_span<size_t const> input_sizes,
                                                          rmm::cuda_stream_view stream);
 
-}  // namespace cudf::io::nvcomp
+}  // namespace cudf::io::detail::nvcomp
diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp
index 2e1cda2d6b7..5c402523168 100644
--- a/cpp/src/io/comp/nvcomp_adapter.hpp
+++ b/cpp/src/io/comp/nvcomp_adapter.hpp
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include "gpuinflate.hpp"
+#include "io/comp/comp.hpp"
 
 #include <cudf/io/nvcomp_adapter.hpp>
 #include <cudf/utilities/span.hpp>
@@ -25,7 +25,7 @@
 
 #include <optional>
 
-namespace cudf::io::nvcomp {
+namespace cudf::io::detail::nvcomp {
 /**
  * @brief Device batch decompression of given type.
  *
@@ -103,4 +103,4 @@ void batched_compress(compression_type compression,
                       device_span<compression_result> results,
                       rmm::cuda_stream_view stream);
 
-}  // namespace cudf::io::nvcomp
+}  // namespace cudf::io::detail::nvcomp
diff --git a/cpp/src/io/comp/snap.cu b/cpp/src/io/comp/snap.cu
index 7d4dcffa713..1443bfd38a2 100644
--- a/cpp/src/io/comp/snap.cu
+++ b/cpp/src/io/comp/snap.cu
@@ -19,8 +19,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-namespace cudf {
-namespace io {
+namespace cudf::io::detail {
 constexpr int hash_bits = 12;
 
 // TBD: Tentatively limits to 2-byte codes to prevent long copy search followed by long literal
@@ -329,7 +328,6 @@ CUDF_KERNEL void __launch_bounds__(128)
     results[blockIdx.x].bytes_written = s->dst - s->dst_base;
     results[blockIdx.x].status =
       (s->dst > s->end) ? compression_status::FAILURE : compression_status::SUCCESS;
-    results[blockIdx.x].reserved = 0;
   }
 }
 
@@ -345,5 +343,4 @@ void gpu_snap(device_span<device_span<uint8_t const> const> inputs,
   }
 }
 
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail
diff --git a/cpp/src/io/comp/statistics.cu b/cpp/src/io/comp/statistics.cu
index faf967041bc..caee9145d2c 100644
--- a/cpp/src/io/comp/statistics.cu
+++ b/cpp/src/io/comp/statistics.cu
@@ -21,7 +21,7 @@
 #include <cuda/functional>
 #include <thrust/transform_reduce.h>
 
-namespace cudf::io {
+namespace cudf::io::detail {
 
 writer_compression_statistics collect_compression_statistics(
   device_span<device_span<uint8_t const> const> inputs,
@@ -61,4 +61,4 @@ writer_compression_statistics collect_compression_statistics(
                                        output_size_successful};
 }
 
-}  // namespace cudf::io
+}  // namespace cudf::io::detail
diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu
index 9b01272ac70..cf841c435a3 100644
--- a/cpp/src/io/comp/unsnap.cu
+++ b/cpp/src/io/comp/unsnap.cu
@@ -21,12 +21,10 @@
 
 #include <cub/cub.cuh>
 
-namespace cudf {
-namespace io {
+namespace cudf::io::detail {
 constexpr int32_t batch_size    = (1 << 5);
 constexpr int32_t batch_count   = (1 << 2);
 constexpr int32_t prefetch_size = (1 << 9);  // 512B, in 32B chunks
-constexpr bool log_cyclecount   = false;
 
 void __device__ busy_wait(size_t cycles)
 {
@@ -647,7 +645,6 @@ CUDF_KERNEL void __launch_bounds__(block_size)
     auto cur       = s->src.begin();
     auto const end = s->src.end();
     s->error       = 0;
-    if (log_cyclecount) { s->tstart = clock(); }
     if (cur < end) {
       // Read uncompressed size (varint), limited to 32-bit
       uint32_t uncompressed_size = *cur++;
@@ -705,11 +702,6 @@ CUDF_KERNEL void __launch_bounds__(block_size)
     results[strm_id].bytes_written = s->uncompressed_size - s->bytes_left;
     results[strm_id].status =
       (s->error == 0) ? compression_status::SUCCESS : compression_status::FAILURE;
-    if (log_cyclecount) {
-      results[strm_id].reserved = clock() - s->tstart;
-    } else {
-      results[strm_id].reserved = 0;
-    }
   }
 }
 
@@ -724,5 +716,4 @@ void gpu_unsnap(device_span<device_span<uint8_t const> const> inputs,
   unsnap_kernel<128><<<dim_grid, dim_block, 0, stream.value()>>>(inputs, outputs, results);
 }
 
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail
diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp
index 654ee1e012c..f4e75f78dec 100644
--- a/cpp/src/io/orc/orc_gpu.hpp
+++ b/cpp/src/io/orc/orc_gpu.hpp
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include "io/comp/gpuinflate.hpp"
+#include "io/comp/comp.hpp"
 #include "io/statistics/statistics.cuh"
 #include "io/utilities/column_buffer.hpp"
 #include "orc.hpp"
@@ -73,14 +73,14 @@ struct CompressedStreamInfo {
   uint8_t const* compressed_data{};  // [in] base ptr to compressed stream data
   uint8_t*
     uncompressed_data{};  // [in] base ptr to uncompressed stream data or NULL if not known yet
-  size_t compressed_data_size{};              // [in] compressed data size for this stream
-  device_span<uint8_t const>* dec_in_ctl{};   // [in] input buffer to decompress
-  device_span<uint8_t>* dec_out_ctl{};        // [in] output buffer to decompress into
-  device_span<compression_result> dec_res{};  // [in] results of decompression
-  device_span<uint8_t const>* copy_in_ctl{};  // [out] input buffer to copy
-  device_span<uint8_t>* copy_out_ctl{};       // [out] output buffer to copy to
-  uint32_t num_compressed_blocks{};           // [in,out] number of entries in decctl(in), number of
-                                              // compressed blocks(out)
+  size_t compressed_data_size{};             // [in] compressed data size for this stream
+  device_span<uint8_t const>* dec_in_ctl{};  // [in] input buffer to decompress
+  device_span<uint8_t>* dec_out_ctl{};       // [in] output buffer to decompress into
+  device_span<cudf::io::detail::compression_result> dec_res{};  // [in] results of decompression
+  device_span<uint8_t const>* copy_in_ctl{};                    // [out] input buffer to copy
+  device_span<uint8_t>* copy_out_ctl{};                         // [out] output buffer to copy to
+  uint32_t num_compressed_blocks{};    // [in,out] number of entries in decctl(in), number of
+                                       // compressed blocks(out)
   uint32_t num_uncompressed_blocks{};  // [in,out] number of entries in dec_in_ctl(in), number of
                                        // uncompressed blocks(out)
   uint64_t max_uncompressed_size{};    // [out] maximum uncompressed data size of stream
@@ -414,7 +414,7 @@ std::optional<writer_compression_statistics> CompressOrcDataStreams(
   bool collect_statistics,
   device_2dspan<StripeStream> strm_desc,
   device_2dspan<encoder_chunk_streams> enc_streams,
-  device_span<compression_result> comp_res,
+  device_span<cudf::io::detail::compression_result> comp_res,
   rmm::cuda_stream_view stream);
 
 /**
diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu
index 0081ed30d17..b661bb4ff90 100644
--- a/cpp/src/io/orc/reader_impl_decode.cu
+++ b/cpp/src/io/orc/reader_impl_decode.cu
@@ -269,7 +269,7 @@ rmm::device_buffer decompress_stripe_data(
                                                          num_uncompressed_blocks};
     device_span<device_span<uint8_t>> copy_out_view{inflate_out.data() + num_compressed_blocks,
                                                     num_uncompressed_blocks};
-    gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream);
+    cudf::io::detail::gpu_copy_uncompressed_blocks(copy_in_view, copy_out_view, stream);
   }
 
   // Copy without stream sync, thus need to wait for stream sync below to access.
diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu
index 07172b6b7f7..79ecca0ca99 100644
--- a/cpp/src/io/orc/stripe_enc.cu
+++ b/cpp/src/io/orc/stripe_enc.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "io/comp/gpuinflate.hpp"
 #include "io/comp/nvcomp_adapter.hpp"
 #include "io/utilities/block_utils.cuh"
 #include "io/utilities/time_utils.cuh"
@@ -44,7 +45,11 @@ namespace io {
 namespace orc {
 namespace gpu {
 
+namespace nvcomp = cudf::io::detail::nvcomp;
+
 using cudf::detail::device_2dspan;
+using cudf::io::detail::compression_result;
+using cudf::io::detail::compression_status;
 
 constexpr int scratch_buffer_size        = 512 * 4;
 constexpr int compact_streams_block_size = 1024;
@@ -1385,7 +1390,7 @@ std::optional<writer_compression_statistics> CompressOrcDataStreams(
   if (compression == SNAPPY) {
     try {
       if (nvcomp::is_compression_disabled(nvcomp::compression_type::SNAPPY)) {
-        gpu_snap(comp_in, comp_out, comp_res, stream);
+        cudf::io::detail::gpu_snap(comp_in, comp_out, comp_res, stream);
       } else {
         nvcomp::batched_compress(
           nvcomp::compression_type::SNAPPY, comp_in, comp_out, comp_res, stream);
@@ -1429,7 +1434,7 @@ std::optional<writer_compression_statistics> CompressOrcDataStreams(
     strm_desc, comp_in, comp_out, comp_res, compressed_data, comp_blk_size, max_comp_blk_size);
 
   if (collect_statistics) {
-    return cudf::io::collect_compression_statistics(comp_in, comp_res, stream);
+    return cudf::io::detail::collect_compression_statistics(comp_in, comp_res, stream);
   } else {
     return std::nullopt;
   }
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 6b9c19368dc..ce868b83c04 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -71,6 +71,8 @@
 
 namespace cudf::io::orc::detail {
 
+namespace nvcomp = cudf::io::detail::nvcomp;
+
 template <typename T>
 [[nodiscard]] constexpr int varint_size(T val)
 {
@@ -2023,8 +2025,8 @@ size_t max_compression_output_size(CompressionKind compression_kind, uint32_t co
 {
   if (compression_kind == NONE) return 0;
 
-  return compress_max_output_chunk_size(to_nvcomp_compression_type(compression_kind),
-                                        compression_blocksize);
+  return nvcomp::compress_max_output_chunk_size(to_nvcomp_compression_type(compression_kind),
+                                                compression_blocksize);
 }
 
 std::unique_ptr<table_input_metadata> make_table_meta(table_view const& input)
diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index e9558735929..a1edd21f8a2 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -51,6 +51,9 @@ namespace {
 
 using ::cudf::detail::device_2dspan;
 
+using cudf::io::detail::compression_result;
+using cudf::io::detail::compression_status;
+
 constexpr int encode_block_size = 128;
 constexpr int rle_buffer_size   = 2 * encode_block_size;
 constexpr int num_encode_warps  = encode_block_size / cudf::detail::warp_size;
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index ce9d48693ec..b2563ab5065 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -17,7 +17,7 @@
 #pragma once
 
 #include "error.hpp"
-#include "io/comp/gpuinflate.hpp"
+#include "io/comp/comp.hpp"
 #include "io/parquet/parquet.hpp"
 #include "io/parquet/parquet_common.hpp"
 #include "io/statistics/statistics.cuh"
@@ -599,12 +599,12 @@ struct EncColumnChunk {
  */
 struct EncPage {
   // all pointers at the top to keep things properly aligned
-  uint8_t* page_data;            //!< Ptr to uncompressed page
-  uint8_t* compressed_data;      //!< Ptr to compressed page
-  EncColumnChunk* chunk;         //!< Chunk that this page belongs to
-  compression_result* comp_res;  //!< Ptr to compression result
-  uint32_t* def_histogram;       //!< Histogram of counts for each definition level
-  uint32_t* rep_histogram;       //!< Histogram of counts for each repetition level
+  uint8_t* page_data;                              //!< Ptr to uncompressed page
+  uint8_t* compressed_data;                        //!< Ptr to compressed page
+  EncColumnChunk* chunk;                           //!< Chunk that this page belongs to
+  cudf::io::detail::compression_result* comp_res;  //!< Ptr to compression result
+  uint32_t* def_histogram;  //!< Histogram of counts for each definition level
+  uint32_t* rep_histogram;  //!< Histogram of counts for each repetition level
   // put this here in case it's ever made 64-bit
   encode_kernel_mask kernel_mask;  //!< Mask used to control which encoding kernels to run
   // the rest can be 4 byte aligned
@@ -1023,7 +1023,7 @@ void EncodePages(device_span<EncPage> pages,
                  bool write_v2_headers,
                  device_span<device_span<uint8_t const>> comp_in,
                  device_span<device_span<uint8_t>> comp_out,
-                 device_span<compression_result> comp_res,
+                 device_span<cudf::io::detail::compression_result> comp_res,
                  rmm::cuda_stream_view stream);
 
 /**
@@ -1046,7 +1046,7 @@ void DecideCompression(device_span<EncColumnChunk> chunks, rmm::cuda_stream_view
  * @param[in] stream CUDA stream to use
  */
 void EncodePageHeaders(device_span<EncPage> pages,
-                       device_span<compression_result const> comp_res,
+                       device_span<cudf::io::detail::compression_result const> comp_res,
                        device_span<statistics_chunk const> page_stats,
                        statistics_chunk const* chunk_stats,
                        rmm::cuda_stream_view stream);
diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu
index 27312a4da89..933be889b1a 100644
--- a/cpp/src/io/parquet/reader_impl_chunking.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking.cu
@@ -15,6 +15,8 @@
  */
 
 #include "compact_protocol_reader.hpp"
+#include "io/comp/comp.hpp"
+#include "io/comp/gpuinflate.hpp"
 #include "io/comp/nvcomp_adapter.hpp"
 #include "io/utilities/time_utils.cuh"
 #include "reader_impl.hpp"
@@ -44,6 +46,10 @@ namespace cudf::io::parquet::detail {
 
 namespace {
 
+namespace nvcomp = cudf::io::detail::nvcomp;
+using cudf::io::detail::compression_result;
+using cudf::io::detail::compression_status;
+
 struct split_info {
   row_range rows;
   int64_t split_pos;
@@ -795,14 +801,16 @@ std::vector<row_range> compute_page_splits_by_row(device_span<cumulative_page_in
       num_comp_pages++;
     });
     if (codec.compression_type == BROTLI && codec.num_pages > 0) {
-      debrotli_scratch.resize(get_gpu_debrotli_scratch_size(codec.num_pages), stream);
+      debrotli_scratch.resize(cudf::io::detail::get_gpu_debrotli_scratch_size(codec.num_pages),
+                              stream);
     }
   }
 
   // Dispatch batches of pages to decompress for each codec.
   // Buffer needs to be padded, required by `gpuDecodePageData`.
   rmm::device_buffer decomp_pages(
-    cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream);
+    cudf::util::round_up_safe(total_decomp_size, cudf::io::detail::BUFFER_PADDING_MULTIPLE),
+    stream);
 
   auto comp_in =
     cudf::detail::make_empty_host_vector<device_span<uint8_t const>>(num_comp_pages, stream);
@@ -874,8 +882,11 @@ std::vector<row_range> compute_page_splits_by_row(device_span<cumulative_page_in
                                      codec.total_decomp_size,
                                      stream);
         } else {
-          gpuinflate(
-            d_comp_in_view, d_comp_out_view, d_comp_res_view, gzip_header_included::YES, stream);
+          gpuinflate(d_comp_in_view,
+                     d_comp_out_view,
+                     d_comp_res_view,
+                     cudf::io::detail::gzip_header_included::YES,
+                     stream);
         }
         break;
       case SNAPPY:
@@ -937,7 +948,7 @@ std::vector<row_range> compute_page_splits_by_row(device_span<cumulative_page_in
     auto const d_copy_out = cudf::detail::make_device_uvector_async(
       copy_out, stream, cudf::get_current_device_resource_ref());
 
-    gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, stream);
+    cudf::io::detail::gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, stream);
     stream.synchronize();
   }
 
@@ -1085,32 +1096,29 @@ struct get_decomp_scratch {
       case UNCOMPRESSED:
       case GZIP: return 0;
 
-      case BROTLI: return get_gpu_debrotli_scratch_size(di.num_pages);
+      case BROTLI: return cudf::io::detail::get_gpu_debrotli_scratch_size(di.num_pages);
 
       case SNAPPY:
         if (cudf::io::nvcomp_integration::is_stable_enabled()) {
-          return cudf::io::nvcomp::batched_decompress_temp_size(
-            cudf::io::nvcomp::compression_type::SNAPPY,
-            di.num_pages,
-            di.max_page_decompressed_size,
-            di.total_decompressed_size);
+          return nvcomp::batched_decompress_temp_size(nvcomp::compression_type::SNAPPY,
+                                                      di.num_pages,
+                                                      di.max_page_decompressed_size,
+                                                      di.total_decompressed_size);
         } else {
           return 0;
         }
         break;
 
       case ZSTD:
-        return cudf::io::nvcomp::batched_decompress_temp_size(
-          cudf::io::nvcomp::compression_type::ZSTD,
-          di.num_pages,
-          di.max_page_decompressed_size,
-          di.total_decompressed_size);
+        return nvcomp::batched_decompress_temp_size(nvcomp::compression_type::ZSTD,
+                                                    di.num_pages,
+                                                    di.max_page_decompressed_size,
+                                                    di.total_decompressed_size);
       case LZ4_RAW:
-        return cudf::io::nvcomp::batched_decompress_temp_size(
-          cudf::io::nvcomp::compression_type::LZ4,
-          di.num_pages,
-          di.max_page_decompressed_size,
-          di.total_decompressed_size);
+        return nvcomp::batched_decompress_temp_size(nvcomp::compression_type::LZ4,
+                                                    di.num_pages,
+                                                    di.max_page_decompressed_size,
+                                                    di.total_decompressed_size);
 
       default: CUDF_FAIL("Invalid compression codec for parquet decompression");
     }
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index d9ea10a695e..326232ced60 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -15,6 +15,7 @@
  */
 
 #include "error.hpp"
+#include "io/comp/common.hpp"
 #include "reader_impl.hpp"
 
 #include <cudf/detail/iterator.cuh>
@@ -251,8 +252,8 @@ void generate_depth_remappings(
       if (source->is_device_read_preferred(io_size)) {
         // Buffer needs to be padded.
         // Required by `gpuDecodePageData`.
-        page_data[chunk] =
-          rmm::device_buffer(cudf::util::round_up_safe(io_size, BUFFER_PADDING_MULTIPLE), stream);
+        page_data[chunk] = rmm::device_buffer(
+          cudf::util::round_up_safe(io_size, cudf::io::detail::BUFFER_PADDING_MULTIPLE), stream);
         auto fut_read_size = source->device_read_async(
           io_offset, io_size, static_cast<uint8_t*>(page_data[chunk].data()), stream);
         read_tasks.emplace_back(std::move(fut_read_size));
@@ -261,7 +262,8 @@ void generate_depth_remappings(
         // Buffer needs to be padded.
         // Required by `gpuDecodePageData`.
         page_data[chunk] = rmm::device_buffer(
-          cudf::util::round_up_safe(read_buffer->size(), BUFFER_PADDING_MULTIPLE), stream);
+          cudf::util::round_up_safe(read_buffer->size(), cudf::io::detail::BUFFER_PADDING_MULTIPLE),
+          stream);
         CUDF_CUDA_TRY(cudaMemcpyAsync(page_data[chunk].data(),
                                       read_buffer->data(),
                                       read_buffer->size(),
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 6db92462498..6b1a20701f9 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -23,6 +23,7 @@
 #include "compact_protocol_reader.hpp"
 #include "compact_protocol_writer.hpp"
 #include "interop/decimal_conversion_utilities.cuh"
+#include "io/comp/gpuinflate.hpp"
 #include "io/comp/nvcomp_adapter.hpp"
 #include "io/parquet/parquet.hpp"
 #include "io/parquet/parquet_gpu.hpp"
diff --git a/cpp/src/io/parquet/writer_impl_helpers.cpp b/cpp/src/io/parquet/writer_impl_helpers.cpp
index 396d44c0763..f15ea1f3c37 100644
--- a/cpp/src/io/parquet/writer_impl_helpers.cpp
+++ b/cpp/src/io/parquet/writer_impl_helpers.cpp
@@ -21,6 +21,8 @@
 
 #include "writer_impl_helpers.hpp"
 
+#include "io/comp/nvcomp_adapter.hpp"
+
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/strings_column_view.hpp>
diff --git a/cpp/src/io/parquet/writer_impl_helpers.hpp b/cpp/src/io/parquet/writer_impl_helpers.hpp
index a85411594e9..14a9a0ed5b7 100644
--- a/cpp/src/io/parquet/writer_impl_helpers.hpp
+++ b/cpp/src/io/parquet/writer_impl_helpers.hpp
@@ -20,11 +20,11 @@
  */
 
 #pragma once
-#include "io/comp/nvcomp_adapter.hpp"
 #include "parquet_common.hpp"
 
 #include <cudf/detail/utilities/linked_column.hpp>
 #include <cudf/io/detail/parquet.hpp>
+#include <cudf/io/nvcomp_adapter.hpp>
 
 namespace cudf::io::parquet::detail {
 
@@ -42,7 +42,7 @@ Compression to_parquet_compression(compression_type compression);
  * @param codec Compression codec
  * @return Translated nvcomp compression type
  */
-nvcomp::compression_type to_nvcomp_compression_type(Compression codec);
+cudf::io::detail::nvcomp::compression_type to_nvcomp_compression_type(Compression codec);
 
 /**
  * @brief Function that computes input alignment requirements for the given compression type.
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index 06069630685..162da62ef03 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "io/comp/gpuinflate.hpp"
 #include "io/comp/nvcomp_adapter.hpp"
 #include "io/text/device_data_chunks.hpp"
 
@@ -41,6 +42,8 @@
 namespace cudf::io::text {
 namespace {
 
+namespace nvcomp = cudf::io::detail::nvcomp;
+
 /**
  * @brief Transforms offset tuples of the form [compressed_begin, compressed_end,
  * decompressed_begin, decompressed_end] into span tuples of the form [compressed_device_span,
@@ -73,7 +76,8 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
   {
     // Buffer needs to be padded.
     // Required by `inflate_kernel`.
-    device.resize(cudf::util::round_up_safe(host.size(), BUFFER_PADDING_MULTIPLE), stream);
+    device.resize(cudf::util::round_up_safe(host.size(), cudf::io::detail::BUFFER_PADDING_MULTIPLE),
+                  stream);
     cudf::detail::cuda_memcpy_async<T>(
       device_span<T>{device}.subspan(0, host.size()), host, stream);
   }
@@ -94,7 +98,7 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
     rmm::device_uvector<std::size_t> d_decompressed_offsets;
     rmm::device_uvector<device_span<uint8_t const>> d_compressed_spans;
     rmm::device_uvector<device_span<uint8_t>> d_decompressed_spans;
-    rmm::device_uvector<compression_result> d_decompression_results;
+    rmm::device_uvector<cudf::io::detail::compression_result> d_decompression_results;
     std::size_t compressed_size_with_headers{};
     std::size_t max_decompressed_size{};
     // this is usually equal to decompressed_size()
@@ -152,16 +156,16 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
           gpuinflate(d_compressed_spans,
                      d_decompressed_spans,
                      d_decompression_results,
-                     gzip_header_included::NO,
+                     cudf::io::detail::gzip_header_included::NO,
                      stream);
         } else {
-          cudf::io::nvcomp::batched_decompress(cudf::io::nvcomp::compression_type::DEFLATE,
-                                               d_compressed_spans,
-                                               d_decompressed_spans,
-                                               d_decompression_results,
-                                               max_decompressed_size,
-                                               decompressed_size(),
-                                               stream);
+          nvcomp::batched_decompress(nvcomp::compression_type::DEFLATE,
+                                     d_compressed_spans,
+                                     d_decompressed_spans,
+                                     d_decompression_results,
+                                     max_decompressed_size,
+                                     decompressed_size(),
+                                     stream);
         }
       }
       is_decompressed = true;
diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp
index 54262dc3b44..5bbe8b63c47 100644
--- a/cpp/tests/io/comp/decomp_test.cpp
+++ b/cpp/tests/io/comp/decomp_test.cpp
@@ -30,6 +30,9 @@
 #include <vector>
 
 using cudf::device_span;
+using cudf::io::detail::compression_result;
+using cudf::io::detail::compression_status;
+namespace nvcomp = cudf::io::detail::nvcomp;
 
 /**
  * @brief Base test fixture for decompression
@@ -61,7 +64,7 @@ struct DecompressTest : public cudf::test::BaseFixture {
     inf_out[0] = dst;
     inf_out.host_to_device_async(stream);
 
-    cudf::detail::hostdevice_vector<cudf::io::compression_result> inf_stat(1, stream);
+    cudf::detail::hostdevice_vector<compression_result> inf_stat(1, stream);
     inf_stat[0] = {};
     inf_stat.host_to_device_async(stream);
 
@@ -69,7 +72,7 @@ struct DecompressTest : public cudf::test::BaseFixture {
     CUDF_CUDA_TRY(cudaMemcpyAsync(
       decompressed.data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
     inf_stat.device_to_host_sync(stream);
-    ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS);
+    ASSERT_EQ(inf_stat[0].status, compression_status::SUCCESS);
   }
 };
 
@@ -79,13 +82,13 @@ struct DecompressTest : public cudf::test::BaseFixture {
 struct GzipDecompressTest : public DecompressTest<GzipDecompressTest> {
   void dispatch(device_span<device_span<uint8_t const>> d_inf_in,
                 device_span<device_span<uint8_t>> d_inf_out,
-                device_span<cudf::io::compression_result> d_inf_stat)
+                device_span<compression_result> d_inf_stat)
   {
-    cudf::io::gpuinflate(d_inf_in,
-                         d_inf_out,
-                         d_inf_stat,
-                         cudf::io::gzip_header_included::YES,
-                         cudf::get_default_stream());
+    cudf::io::detail::gpuinflate(d_inf_in,
+                                 d_inf_out,
+                                 d_inf_stat,
+                                 cudf::io::detail::gzip_header_included::YES,
+                                 cudf::get_default_stream());
   }
 };
 
@@ -95,9 +98,9 @@ struct GzipDecompressTest : public DecompressTest<GzipDecompressTest> {
 struct SnappyDecompressTest : public DecompressTest<SnappyDecompressTest> {
   void dispatch(device_span<device_span<uint8_t const>> d_inf_in,
                 device_span<device_span<uint8_t>> d_inf_out,
-                device_span<cudf::io::compression_result> d_inf_stat)
+                device_span<compression_result> d_inf_stat)
   {
-    cudf::io::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, cudf::get_default_stream());
+    cudf::io::detail::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, cudf::get_default_stream());
   }
 };
 
@@ -107,17 +110,17 @@ struct SnappyDecompressTest : public DecompressTest<SnappyDecompressTest> {
 struct BrotliDecompressTest : public DecompressTest<BrotliDecompressTest> {
   void dispatch(device_span<device_span<uint8_t const>> d_inf_in,
                 device_span<device_span<uint8_t>> d_inf_out,
-                device_span<cudf::io::compression_result> d_inf_stat)
+                device_span<compression_result> d_inf_stat)
   {
-    rmm::device_buffer d_scratch{cudf::io::get_gpu_debrotli_scratch_size(1),
+    rmm::device_buffer d_scratch{cudf::io::detail::get_gpu_debrotli_scratch_size(1),
                                  cudf::get_default_stream()};
 
-    cudf::io::gpu_debrotli(d_inf_in,
-                           d_inf_out,
-                           d_inf_stat,
-                           d_scratch.data(),
-                           d_scratch.size(),
-                           cudf::get_default_stream());
+    cudf::io::detail::gpu_debrotli(d_inf_in,
+                                   d_inf_out,
+                                   d_inf_stat,
+                                   d_scratch.data(),
+                                   d_scratch.size(),
+                                   cudf::get_default_stream());
   }
 };
 
@@ -181,8 +184,8 @@ TEST_F(BrotliDecompressTest, HelloWorld)
 
 TEST_F(NvcompConfigTest, Compression)
 {
-  using cudf::io::nvcomp::compression_type;
-  auto const& comp_disabled = cudf::io::nvcomp::is_compression_disabled;
+  using nvcomp::compression_type;
+  auto const& comp_disabled = nvcomp::is_compression_disabled;
 
   EXPECT_FALSE(comp_disabled(compression_type::DEFLATE, {true, true}));
   // all integrations enabled required
@@ -201,8 +204,8 @@ TEST_F(NvcompConfigTest, Compression)
 
 TEST_F(NvcompConfigTest, Decompression)
 {
-  using cudf::io::nvcomp::compression_type;
-  auto const& decomp_disabled = cudf::io::nvcomp::is_decompression_disabled;
+  using nvcomp::compression_type;
+  auto const& decomp_disabled = nvcomp::is_decompression_disabled;
 
   EXPECT_FALSE(decomp_disabled(compression_type::DEFLATE, {true, true}));
   // all integrations enabled required
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index fce99187516..2209a30149d 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -40,6 +40,8 @@
 #include <array>
 #include <type_traits>
 
+namespace nvcomp = cudf::io::detail::nvcomp;
+
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
   std::conditional_t<std::is_same_v<T, cudf::string_view>,
@@ -1135,7 +1137,7 @@ TEST_F(OrcReaderTest, SingleInputs)
 
 TEST_F(OrcReaderTest, zstdCompressionRegression)
 {
-  if (cudf::io::nvcomp::is_decompression_disabled(cudf::io::nvcomp::compression_type::ZSTD)) {
+  if (nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD)) {
     GTEST_SKIP() << "Newer nvCOMP version is required";
   }
 
@@ -1700,8 +1702,8 @@ TEST_F(OrcMetadataReaderTest, TestNested)
 
 TEST_F(OrcReaderTest, ZstdMaxCompressionRate)
 {
-  if (cudf::io::nvcomp::is_decompression_disabled(cudf::io::nvcomp::compression_type::ZSTD) or
-      cudf::io::nvcomp::is_compression_disabled(cudf::io::nvcomp::compression_type::ZSTD)) {
+  if (nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD) or
+      nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD)) {
     GTEST_SKIP() << "Newer nvCOMP version is required";
   }
 
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index da4faabf189..ff6fba1c3e8 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx utils.pyx)
+set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx)
 set(linked_libraries cudf::cudf)
 
 rapids_cython_create_modules(
diff --git a/python/cudf/cudf/_lib/__init__.pxd b/python/cudf/cudf/_lib/__init__.pxd
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index 10f9d813ccc..11473d60698 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -1,9 +1,2 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
-import numpy as np
-
 from . import strings_udf
-
-MAX_COLUMN_SIZE = np.iinfo(np.int32).max
-MAX_COLUMN_SIZE_STR = "INT32_MAX"
-MAX_STRING_COLUMN_BYTES = np.iinfo(np.int32).max
-MAX_STRING_COLUMN_BYTES_STR = "INT32_MAX"
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 245a5d03981..f7dcd89ea48 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -31,12 +31,12 @@ from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 from cudf._lib.types cimport (
     dtype_from_column_view,
-    dtype_to_data_type,
     dtype_to_pylibcudf_type,
 )
 
 from cudf._lib.types import dtype_from_pylibcudf_column
 
+from pylibcudf cimport DataType as plc_DataType
 cimport pylibcudf.libcudf.copying as cpp_copying
 cimport pylibcudf.libcudf.types as libcudf_types
 cimport pylibcudf.libcudf.unary as libcudf_unary
@@ -361,7 +361,7 @@ cdef class Column:
             col = self
             data_dtype = col.dtype
 
-        cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype)
+        cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
         cdef libcudf_types.size_type offset = self.offset
         cdef vector[mutable_column_view] children
         cdef void* data
@@ -398,7 +398,7 @@ cdef class Column:
         self._data = None
 
         return mutable_column_view(
-            dtype,
+            dtype.c_obj,
             self.size,
             data,
             mask,
@@ -424,7 +424,7 @@ cdef class Column:
             col = self
             data_dtype = col.dtype
 
-        cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype)
+        cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
         cdef libcudf_types.size_type offset = self.offset
         cdef vector[column_view] children
         cdef void* data
@@ -450,7 +450,7 @@ cdef class Column:
         cdef libcudf_types.size_type c_null_count = null_count
 
         return column_view(
-            dtype,
+            dtype.c_obj,
             self.size,
             data,
             mask,
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 3d3bdd730a8..40bd50acf16 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -10,24 +10,22 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-import pylibcudf
+import pylibcudf as plc
 
 import cudf
-from cudf._lib.types import LIBCUDF_TO_SUPPORTED_NUMPY_TYPES
 from cudf.core.dtypes import ListDtype, StructDtype
+from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
+from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
 from cudf.core.missing import NA, NaT
 
-cimport pylibcudf.libcudf.types as libcudf_types
 # We currently need this cimport because some of the implementations here
 # access the c_obj of the scalar, and because we need to be able to call
 # pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until
 # DeviceScalar is phased out entirely from cuDF Cython (at which point
 # cudf.Scalar will be directly backed by pylibcudf.Scalar).
-from pylibcudf cimport Scalar as plc_Scalar
+from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID
 from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar
 
-from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
-
 
 def _replace_nested(obj, check, replacement):
     if isinstance(obj, list):
@@ -62,12 +60,12 @@ def gather_metadata(dtypes):
     """
     out = []
     for name, dtype in dtypes.items():
-        v = pylibcudf.interop.ColumnMetadata(name)
+        v = plc.interop.ColumnMetadata(name)
         if isinstance(dtype, cudf.StructDtype):
             v.children_meta = gather_metadata(dtype.fields)
         elif isinstance(dtype, cudf.ListDtype):
             # Offsets column is unnamed and has no children
-            v.children_meta.append(pylibcudf.interop.ColumnMetadata(""))
+            v.children_meta.append(plc.interop.ColumnMetadata(""))
             v.children_meta.extend(
                 gather_metadata({"": dtype.element_type})
             )
@@ -81,7 +79,7 @@ cdef class DeviceScalar:
     # that from_unique_ptr is implemented is probably dereferencing this in an
     # invalid state. See what the best way to fix that is.
     def __cinit__(self, *args, **kwargs):
-        self.c_value = pylibcudf.Scalar.__new__(pylibcudf.Scalar)
+        self.c_value = plc.Scalar.__new__(plc.Scalar)
 
     def __init__(self, value, dtype):
         """
@@ -127,20 +125,20 @@ cdef class DeviceScalar:
             pa_array = pa.array([pa.scalar(value, type=pa_type)])
 
         pa_table = pa.Table.from_arrays([pa_array], names=[""])
-        table = pylibcudf.interop.from_arrow(pa_table)
+        table = plc.interop.from_arrow(pa_table)
 
         column = table.columns()[0]
         if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
             if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype):
-                column = pylibcudf.unary.cast(
-                    column, pylibcudf.DataType(pylibcudf.TypeId.DECIMAL32, -dtype.scale)
+                column = plc.unary.cast(
+                    column, plc.DataType(plc.TypeId.DECIMAL32, -dtype.scale)
                 )
             elif isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
-                column = pylibcudf.unary.cast(
-                    column, pylibcudf.DataType(pylibcudf.TypeId.DECIMAL64, -dtype.scale)
+                column = plc.unary.cast(
+                    column, plc.DataType(plc.TypeId.DECIMAL64, -dtype.scale)
                 )
 
-        self.c_value = pylibcudf.copying.get_element(column, 0)
+        self.c_value = plc.copying.get_element(column, 0)
         self._dtype = dtype
 
     def _to_host_scalar(self):
@@ -150,7 +148,7 @@ cdef class DeviceScalar:
         null_type = NaT if is_datetime or is_timedelta else NA
 
         metadata = gather_metadata({"": self.dtype})[0]
-        ps = pylibcudf.interop.to_arrow(self.c_value, metadata)
+        ps = plc.interop.to_arrow(self.c_value, metadata)
         if not ps.is_valid:
             return null_type
 
@@ -225,34 +223,33 @@ cdef class DeviceScalar:
         return s
 
     cdef void _set_dtype(self, dtype=None):
-        cdef libcudf_types.data_type cdtype = self.get_raw_ptr()[0].type()
-
+        cdef plc_TypeID cdtype_id = self.c_value.type().id()
         if dtype is not None:
             self._dtype = dtype
-        elif cdtype.id() in {
-            libcudf_types.type_id.DECIMAL32,
-            libcudf_types.type_id.DECIMAL64,
-            libcudf_types.type_id.DECIMAL128,
+        elif cdtype_id in {
+            plc_TypeID.DECIMAL32,
+            plc_TypeID.DECIMAL64,
+            plc_TypeID.DECIMAL128,
         }:
             raise TypeError(
                 "Must pass a dtype when constructing from a fixed-point scalar"
             )
-        elif cdtype.id() == libcudf_types.type_id.STRUCT:
+        elif cdtype_id == plc_TypeID.STRUCT:
             struct_table_view = (<struct_scalar*>self.get_raw_ptr())[0].view()
             self._dtype = StructDtype({
                 str(i): dtype_from_column_view(struct_table_view.column(i))
                 for i in range(struct_table_view.num_columns())
             })
-        elif cdtype.id() == libcudf_types.type_id.LIST:
+        elif cdtype_id == plc_TypeID.LIST:
             if (
                 <list_scalar*>self.get_raw_ptr()
-            )[0].view().type().id() == libcudf_types.type_id.LIST:
+            )[0].view().type().id() == plc_TypeID.LIST:
                 self._dtype = dtype_from_column_view(
                     (<list_scalar*>self.get_raw_ptr())[0].view()
                 )
             else:
                 self._dtype = ListDtype(
-                    LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
+                    PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
                         <underlying_type_t_type_id>(
                             (<list_scalar*>self.get_raw_ptr())[0]
                             .view().type().id()
@@ -260,8 +257,8 @@ cdef class DeviceScalar:
                     ]
                 )
         else:
-            self._dtype = LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
-                <underlying_type_t_type_id>(cdtype.id())
+            self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
+                <underlying_type_t_type_id>(cdtype_id)
             ]
 
 
diff --git a/python/cudf/cudf/_lib/types.pxd b/python/cudf/cudf/_lib/types.pxd
index c2b760490c1..18b1d26e4db 100644
--- a/python/cudf/cudf/_lib/types.pxd
+++ b/python/cudf/cudf/_lib/types.pxd
@@ -1,16 +1,11 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t
-from libcpp cimport bool
 
-cimport pylibcudf.libcudf.types as libcudf_types
 from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 
 ctypedef int32_t underlying_type_t_type_id
 
 cdef dtype_from_column_view(column_view cv)
 
-cdef libcudf_types.data_type dtype_to_data_type(dtype) except *
 cpdef dtype_to_pylibcudf_type(dtype)
-cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index f169ea12b10..777bd070b32 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -1,7 +1,5 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from enum import IntEnum
-
 import numpy as np
 import pandas as pd
 
@@ -11,138 +9,46 @@ cimport pylibcudf.libcudf.types as libcudf_types
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 
-import pylibcudf
+import pylibcudf as plc
 
 import cudf
 
 
-class TypeId(IntEnum):
-    EMPTY = <underlying_type_t_type_id> libcudf_types.type_id.EMPTY
-    INT8 = <underlying_type_t_type_id> libcudf_types.type_id.INT8
-    INT16 = <underlying_type_t_type_id> libcudf_types.type_id.INT16
-    INT32 = <underlying_type_t_type_id> libcudf_types.type_id.INT32
-    INT64 = <underlying_type_t_type_id> libcudf_types.type_id.INT64
-    UINT8 = <underlying_type_t_type_id> libcudf_types.type_id.UINT8
-    UINT16 = <underlying_type_t_type_id> libcudf_types.type_id.UINT16
-    UINT32 = <underlying_type_t_type_id> libcudf_types.type_id.UINT32
-    UINT64 = <underlying_type_t_type_id> libcudf_types.type_id.UINT64
-    FLOAT32 = <underlying_type_t_type_id> libcudf_types.type_id.FLOAT32
-    FLOAT64 = <underlying_type_t_type_id> libcudf_types.type_id.FLOAT64
-    BOOL8 = <underlying_type_t_type_id> libcudf_types.type_id.BOOL8
-    TIMESTAMP_DAYS = (
-        <underlying_type_t_type_id> libcudf_types.type_id.TIMESTAMP_DAYS
-    )
-    TIMESTAMP_SECONDS = (
-        <underlying_type_t_type_id> libcudf_types.type_id.TIMESTAMP_SECONDS
-    )
-    TIMESTAMP_MILLISECONDS = (
-        <underlying_type_t_type_id> (
-            libcudf_types.type_id.TIMESTAMP_MILLISECONDS
-        )
-    )
-    TIMESTAMP_MICROSECONDS = (
-        <underlying_type_t_type_id> (
-            libcudf_types.type_id.TIMESTAMP_MICROSECONDS
-        )
-    )
-    TIMESTAMP_NANOSECONDS = (
-        <underlying_type_t_type_id> libcudf_types.type_id.TIMESTAMP_NANOSECONDS
-    )
-    DURATION_SECONDS = (
-        <underlying_type_t_type_id> libcudf_types.type_id.DURATION_SECONDS
-    )
-    DURATION_MILLISECONDS = (
-        <underlying_type_t_type_id> libcudf_types.type_id.DURATION_MILLISECONDS
-    )
-    DURATION_MICROSECONDS = (
-        <underlying_type_t_type_id> libcudf_types.type_id.DURATION_MICROSECONDS
-    )
-    DURATION_NANOSECONDS = (
-        <underlying_type_t_type_id> libcudf_types.type_id.DURATION_NANOSECONDS
-    )
-    STRING = <underlying_type_t_type_id> libcudf_types.type_id.STRING
-    DECIMAL32 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL32
-    DECIMAL64 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL64
-    DECIMAL128 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL128
-    STRUCT = <underlying_type_t_type_id> libcudf_types.type_id.STRUCT
-
-
-SUPPORTED_NUMPY_TO_LIBCUDF_TYPES = {
-    np.dtype("int8"): TypeId.INT8,
-    np.dtype("int16"): TypeId.INT16,
-    np.dtype("int32"): TypeId.INT32,
-    np.dtype("int64"): TypeId.INT64,
-    np.dtype("uint8"): TypeId.UINT8,
-    np.dtype("uint16"): TypeId.UINT16,
-    np.dtype("uint32"): TypeId.UINT32,
-    np.dtype("uint64"): TypeId.UINT64,
-    np.dtype("float32"): TypeId.FLOAT32,
-    np.dtype("float64"): TypeId.FLOAT64,
-    np.dtype("datetime64[s]"): TypeId.TIMESTAMP_SECONDS,
-    np.dtype("datetime64[ms]"): TypeId.TIMESTAMP_MILLISECONDS,
-    np.dtype("datetime64[us]"): TypeId.TIMESTAMP_MICROSECONDS,
-    np.dtype("datetime64[ns]"): TypeId.TIMESTAMP_NANOSECONDS,
-    np.dtype("object"): TypeId.STRING,
-    np.dtype("bool"): TypeId.BOOL8,
-    np.dtype("timedelta64[s]"): TypeId.DURATION_SECONDS,
-    np.dtype("timedelta64[ms]"): TypeId.DURATION_MILLISECONDS,
-    np.dtype("timedelta64[us]"): TypeId.DURATION_MICROSECONDS,
-    np.dtype("timedelta64[ns]"): TypeId.DURATION_NANOSECONDS,
-}
-
 SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = {
-    k: pylibcudf.TypeId(v).value
-    for k, v in SUPPORTED_NUMPY_TO_LIBCUDF_TYPES.items()
+    np.dtype("int8"): plc.types.TypeId.INT8,
+    np.dtype("int16"): plc.types.TypeId.INT16,
+    np.dtype("int32"): plc.types.TypeId.INT32,
+    np.dtype("int64"): plc.types.TypeId.INT64,
+    np.dtype("uint8"): plc.types.TypeId.UINT8,
+    np.dtype("uint16"): plc.types.TypeId.UINT16,
+    np.dtype("uint32"): plc.types.TypeId.UINT32,
+    np.dtype("uint64"): plc.types.TypeId.UINT64,
+    np.dtype("float32"): plc.types.TypeId.FLOAT32,
+    np.dtype("float64"): plc.types.TypeId.FLOAT64,
+    np.dtype("datetime64[s]"): plc.types.TypeId.TIMESTAMP_SECONDS,
+    np.dtype("datetime64[ms]"): plc.types.TypeId.TIMESTAMP_MILLISECONDS,
+    np.dtype("datetime64[us]"): plc.types.TypeId.TIMESTAMP_MICROSECONDS,
+    np.dtype("datetime64[ns]"): plc.types.TypeId.TIMESTAMP_NANOSECONDS,
+    np.dtype("object"): plc.types.TypeId.STRING,
+    np.dtype("bool"): plc.types.TypeId.BOOL8,
+    np.dtype("timedelta64[s]"): plc.types.TypeId.DURATION_SECONDS,
+    np.dtype("timedelta64[ms]"): plc.types.TypeId.DURATION_MILLISECONDS,
+    np.dtype("timedelta64[us]"): plc.types.TypeId.DURATION_MICROSECONDS,
+    np.dtype("timedelta64[ns]"): plc.types.TypeId.DURATION_NANOSECONDS,
 }
-
-LIBCUDF_TO_SUPPORTED_NUMPY_TYPES = {
-    # There's no equivalent to EMPTY in cudf.  We translate EMPTY
-    # columns from libcudf to ``int8`` columns of all nulls in Python.
-    # ``int8`` is chosen because it uses the least amount of memory.
-    TypeId.EMPTY: np.dtype("int8"),
-    TypeId.INT8: np.dtype("int8"),
-    TypeId.INT16: np.dtype("int16"),
-    TypeId.INT32: np.dtype("int32"),
-    TypeId.INT64: np.dtype("int64"),
-    TypeId.UINT8: np.dtype("uint8"),
-    TypeId.UINT16: np.dtype("uint16"),
-    TypeId.UINT32: np.dtype("uint32"),
-    TypeId.UINT64: np.dtype("uint64"),
-    TypeId.FLOAT32: np.dtype("float32"),
-    TypeId.FLOAT64: np.dtype("float64"),
-    TypeId.BOOL8: np.dtype("bool"),
-    TypeId.TIMESTAMP_SECONDS: np.dtype("datetime64[s]"),
-    TypeId.TIMESTAMP_MILLISECONDS: np.dtype("datetime64[ms]"),
-    TypeId.TIMESTAMP_MICROSECONDS: np.dtype("datetime64[us]"),
-    TypeId.TIMESTAMP_NANOSECONDS: np.dtype("datetime64[ns]"),
-    TypeId.DURATION_SECONDS: np.dtype("timedelta64[s]"),
-    TypeId.DURATION_MILLISECONDS: np.dtype("timedelta64[ms]"),
-    TypeId.DURATION_MICROSECONDS: np.dtype("timedelta64[us]"),
-    TypeId.DURATION_NANOSECONDS: np.dtype("timedelta64[ns]"),
-    TypeId.STRING: np.dtype("object"),
-    TypeId.STRUCT: np.dtype("object"),
-}
-
 PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES = {
-    pylibcudf.TypeId(k).value: v
-    for k, v in LIBCUDF_TO_SUPPORTED_NUMPY_TYPES.items()
+    plc_type: np_type
+    for np_type, plc_type in SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES.items()
 }
+# There's no equivalent to EMPTY in cudf.  We translate EMPTY
+# columns from libcudf to ``int8`` columns of all nulls in Python.
+# ``int8`` is chosen because it uses the least amount of memory.
+PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.EMPTY] = np.dtype("int8")
+PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.STRUCT] = np.dtype("object")
+PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.LIST] = np.dtype("object")
 
-duration_unit_map = {
-    TypeId.DURATION_SECONDS: "s",
-    TypeId.DURATION_MILLISECONDS: "ms",
-    TypeId.DURATION_MICROSECONDS: "us",
-    TypeId.DURATION_NANOSECONDS: "ns"
-}
-
-datetime_unit_map = {
-    TypeId.TIMESTAMP_SECONDS: "s",
-    TypeId.TIMESTAMP_MILLISECONDS: "ms",
-    TypeId.TIMESTAMP_MICROSECONDS: "us",
-    TypeId.TIMESTAMP_NANOSECONDS: "ns",
-}
 
-size_type_dtype = LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[pylibcudf.types.SIZE_TYPE_ID]
+size_type_dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.SIZE_TYPE_ID]
 
 
 cdef dtype_from_lists_column_view(column_view cv):
@@ -190,71 +96,40 @@ cdef dtype_from_column_view(column_view cv):
             scale=-cv.type().scale()
         )
     else:
-        return LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
+        return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
             <underlying_type_t_type_id>(tid)
         ]
 
-cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:
-    # Note: This function is to be phased out in favor of
-    # dtype_to_pylibcudf_type which will return a pylibcudf
-    # DataType object
-    cdef libcudf_types.type_id tid
-    if isinstance(dtype, cudf.ListDtype):
-        tid = libcudf_types.type_id.LIST
-    elif isinstance(dtype, cudf.StructDtype):
-        tid = libcudf_types.type_id.STRUCT
-    elif isinstance(dtype, cudf.Decimal128Dtype):
-        tid = libcudf_types.type_id.DECIMAL128
-    elif isinstance(dtype, cudf.Decimal64Dtype):
-        tid = libcudf_types.type_id.DECIMAL64
-    elif isinstance(dtype, cudf.Decimal32Dtype):
-        tid = libcudf_types.type_id.DECIMAL32
-    else:
-        tid = <libcudf_types.type_id> (
-            <underlying_type_t_type_id> (
-                SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[np.dtype(dtype)]))
-
-    if is_decimal_type_id(tid):
-        return libcudf_types.data_type(tid, -dtype.scale)
-    else:
-        return libcudf_types.data_type(tid)
 
 cpdef dtype_to_pylibcudf_type(dtype):
     if isinstance(dtype, cudf.ListDtype):
-        return pylibcudf.DataType(pylibcudf.TypeId.LIST)
+        return plc.DataType(plc.TypeId.LIST)
     elif isinstance(dtype, cudf.StructDtype):
-        return pylibcudf.DataType(pylibcudf.TypeId.STRUCT)
+        return plc.DataType(plc.TypeId.STRUCT)
     elif isinstance(dtype, cudf.Decimal128Dtype):
-        tid = pylibcudf.TypeId.DECIMAL128
-        return pylibcudf.DataType(tid, -dtype.scale)
+        tid = plc.TypeId.DECIMAL128
+        return plc.DataType(tid, -dtype.scale)
     elif isinstance(dtype, cudf.Decimal64Dtype):
-        tid = pylibcudf.TypeId.DECIMAL64
-        return pylibcudf.DataType(tid, -dtype.scale)
+        tid = plc.TypeId.DECIMAL64
+        return plc.DataType(tid, -dtype.scale)
     elif isinstance(dtype, cudf.Decimal32Dtype):
-        tid = pylibcudf.TypeId.DECIMAL32
-        return pylibcudf.DataType(tid, -dtype.scale)
-    # libcudf types don't support localization so convert to the base type
+        tid = plc.TypeId.DECIMAL32
+        return plc.DataType(tid, -dtype.scale)
+    # libcudf types don't support timezones so convert to the base type
     elif isinstance(dtype, pd.DatetimeTZDtype):
         dtype = np.dtype(f"<M8[{dtype.unit}]")
     else:
         dtype = np.dtype(dtype)
-    return pylibcudf.DataType(SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[dtype])
-
-cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *:
-    return tid in (
-        libcudf_types.type_id.DECIMAL128,
-        libcudf_types.type_id.DECIMAL64,
-        libcudf_types.type_id.DECIMAL32,
-    )
+    return plc.DataType(SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[dtype])
 
 
 def dtype_from_pylibcudf_lists_column(col):
     child = col.list_view().child()
     tid = child.type().id()
 
-    if tid == pylibcudf.TypeId.LIST:
+    if tid == plc.TypeId.LIST:
         return cudf.ListDtype(dtype_from_pylibcudf_lists_column(child))
-    elif tid == pylibcudf.TypeId.EMPTY:
+    elif tid == plc.TypeId.EMPTY:
         return cudf.ListDtype("int8")
     else:
         return cudf.ListDtype(
@@ -274,26 +149,24 @@ def dtype_from_pylibcudf_column(col):
     type_ = col.type()
     tid = type_.id()
 
-    if tid == pylibcudf.TypeId.LIST:
+    if tid == plc.TypeId.LIST:
         return dtype_from_pylibcudf_lists_column(col)
-    elif tid == pylibcudf.TypeId.STRUCT:
+    elif tid == plc.TypeId.STRUCT:
         return dtype_from_pylibcudf_structs_column(col)
-    elif tid == pylibcudf.TypeId.DECIMAL64:
+    elif tid == plc.TypeId.DECIMAL64:
         return cudf.Decimal64Dtype(
             precision=cudf.Decimal64Dtype.MAX_PRECISION,
             scale=-type_.scale()
         )
-    elif tid == pylibcudf.TypeId.DECIMAL32:
+    elif tid == plc.TypeId.DECIMAL32:
         return cudf.Decimal32Dtype(
             precision=cudf.Decimal32Dtype.MAX_PRECISION,
             scale=-type_.scale()
         )
-    elif tid == pylibcudf.TypeId.DECIMAL128:
+    elif tid == plc.TypeId.DECIMAL128:
         return cudf.Decimal128Dtype(
             precision=cudf.Decimal128Dtype.MAX_PRECISION,
             scale=-type_.scale()
         )
     else:
-        return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
-            <underlying_type_t_type_id>(tid)
-        ]
+        return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid]
diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd
deleted file mode 100644
index 900be721c9a..00000000000
--- a/python/cudf/cudf/_lib/utils.pxd
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-cpdef data_from_pylibcudf_table(tbl, column_names, index_names=*)
-cpdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *)
-cpdef columns_from_pylibcudf_table(tbl)
-cpdef _data_from_columns(columns, column_names, index_names=*)
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
deleted file mode 100644
index 975c9eb741c..00000000000
--- a/python/cudf/cudf/_lib/utils.pyx
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-import cudf
-
-from cudf._lib.column cimport Column
-
-
-cpdef columns_from_pylibcudf_table(tbl):
-    """Convert a pylibcudf table into list of columns.
-
-    Parameters
-    ----------
-    tbl : pylibcudf.Table
-        The pylibcudf table whose columns will be extracted
-
-    Returns
-    -------
-    list[Column]
-        A list of columns.
-    """
-    return [Column.from_pylibcudf(plc) for plc in tbl.columns()]
-
-
-cpdef _data_from_columns(columns, column_names, index_names=None):
-    """Convert a list of columns into a dict with an index.
-
-    This method is intended to provide the bridge between the columns returned
-    from calls to libcudf or pylibcudf APIs and the cuDF Python Frame objects, which
-    require named columns and a separate index.
-
-    Since cuDF Python has an independent representation of a table as a
-    collection of columns, this function simply returns a dict of columns
-    suitable for conversion into data to be passed to cuDF constructors.
-    This method returns the columns of the table in the order they are
-    stored in libcudf, but calling code is responsible for partitioning and
-    labeling them as needed.
-
-    Parameters
-    ----------
-    columns : list[Column]
-        The columns to be extracted
-    column_names : iterable
-        The keys associated with the columns in the output data.
-    index_names : iterable, optional
-        If provided, an iterable of strings that will be used to label the
-        corresponding first set of columns into a (Multi)Index. If this
-        argument is omitted, all columns are assumed to be part of the output
-        table and no index is constructed.
-    """
-    # First construct the index, if any
-    index = (
-        # TODO: For performance, the _from_data methods of Frame types assume
-        # that the passed index object is already an Index because cudf.Index
-        # and cudf.as_index are expensive. As a result, this function is
-        # currently somewhat inconsistent in returning a dict of columns for
-        # the data while actually constructing the Index object here (instead
-        # of just returning a dict for that as well). As we clean up the
-        # Frame factories we may want to look for a less dissonant approach
-        # that does not impose performance penalties.
-        cudf.core.index._index_from_data(
-            {
-                name: columns[i]
-                for i, name in enumerate(index_names)
-            }
-        )
-        if index_names is not None
-        else None
-    )
-    n_index_columns = len(index_names) if index_names is not None else 0
-    data = {
-        name: columns[i + n_index_columns]
-        for i, name in enumerate(column_names)
-    }
-    return data, index
-
-
-cpdef data_from_pylibcudf_table(tbl, column_names, index_names=None):
-    return _data_from_columns(
-        columns_from_pylibcudf_table(tbl),
-        column_names,
-        index_names
-    )
-
-cpdef data_from_pylibcudf_io(tbl_with_meta, column_names=None, index_names=None):
-    """
-    Unpacks the TableWithMetadata from libcudf I/O
-    into a dict of columns and an Index (cuDF format)
-    """
-    if column_names is None:
-        column_names = tbl_with_meta.column_names(include_children=False)
-    return _data_from_columns(
-        columns=[Column.from_pylibcudf(plc) for plc in tbl_with_meta.columns],
-        column_names=column_names,
-        index_names=index_names
-    )
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index d9b54008e85..b10b8dfe207 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1192,10 +1192,10 @@ def _concat(
         codes = [o.codes for o in objs]
 
         newsize = sum(map(len, codes))
-        if newsize > libcudf.MAX_COLUMN_SIZE:
+        if newsize > np.iinfo(libcudf.types.size_type_dtype).max:
             raise MemoryError(
                 f"Result of concat cannot have "
-                f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
+                f"size > {libcudf.types.size_type_dtype}_MAX"
             )
         elif newsize == 0:
             codes_col = column.column_empty(0, head.codes.dtype)
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 75b9070b53f..31efe267c96 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2537,10 +2537,10 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
         )
 
     newsize = sum(map(len, objs))
-    if newsize > libcudf.MAX_COLUMN_SIZE:
+    if newsize > np.iinfo(libcudf.types.size_type_dtype).max:
         raise MemoryError(
             f"Result of concat cannot have "
-            f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
+            f"size > {libcudf.types.size_type_dtype}_MAX"
         )
     elif newsize == 0:
         return column_empty(0, head.dtype)
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 971f0be77f8..8ed233ba737 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -56,7 +56,9 @@ def dtype(arbitrary):
     else:
         if np_dtype.kind in set("OU"):
             return np.dtype("object")
-        elif np_dtype not in cudf._lib.types.SUPPORTED_NUMPY_TO_LIBCUDF_TYPES:
+        elif (
+            np_dtype not in cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES
+        ):
             raise TypeError(f"Unsupported type {np_dtype}")
         return np_dtype
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 9aadbf8f47a..8f45c6f0115 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -861,7 +861,9 @@ def _quantile_table(
                 column_order,
                 null_precedence,
             )
-            columns = libcudf.utils.columns_from_pylibcudf_table(plc_table)
+            columns = [
+                ColumnBase.from_pylibcudf(col) for col in plc_table.columns()
+            ]
         return self._from_columns_like_self(
             columns,
             column_names=self._column_names,
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index bfe716f0afc..4bd5a1e7040 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -9,7 +9,7 @@
 import cupy as cp
 import llvmlite.binding as ll
 import numpy as np
-from cuda import cudart
+from cuda.bindings import runtime
 from numba import cuda, typeof
 from numba.core.datamodel import default_manager, models
 from numba.core.errors import TypingError
@@ -356,8 +356,8 @@ def set_malloc_heap_size(size=None):
     if size is None:
         size = _STRINGS_UDF_DEFAULT_HEAP_SIZE
     if size != _heap_size:
-        (ret,) = cudart.cudaDeviceSetLimit(
-            cudart.cudaLimit.cudaLimitMallocHeapSize, size
+        (ret,) = runtime.cudaDeviceSetLimit(
+            runtime.cudaLimit.cudaLimitMallocHeapSize, size
         )
         if ret.value != 0:
             raise RuntimeError("Unable to set cudaMalloc heap size")
diff --git a/python/cudf/cudf/io/avro.py b/python/cudf/cudf/io/avro.py
index 4966cdb86e1..dcbdd4423fc 100644
--- a/python/cudf/cudf/io/avro.py
+++ b/python/cudf/cudf/io/avro.py
@@ -3,7 +3,7 @@
 import pylibcudf as plc
 
 import cudf
-from cudf._lib.utils import data_from_pylibcudf_io
+from cudf._lib.column import Column
 from cudf.utils import ioutils
 
 
@@ -46,5 +46,12 @@ def read_avro(
         options.set_columns(columns)
 
     plc_result = plc.io.avro.read_avro(options)
-
-    return cudf.DataFrame._from_data(*data_from_pylibcudf_io(plc_result))
+    data = {
+        name: Column.from_pylibcudf(col)
+        for name, col in zip(
+            plc_result.column_names(include_children=False),
+            plc_result.columns,
+            strict=True,
+        )
+    }
+    return cudf.DataFrame._from_data(data)
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index da9a66f3874..6d617cbf38e 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -15,8 +15,8 @@
 import pylibcudf as plc
 
 import cudf
+from cudf._lib.column import Column
 from cudf._lib.types import dtype_to_pylibcudf_type
-from cudf._lib.utils import data_from_pylibcudf_io
 from cudf.api.types import is_hashable, is_scalar
 from cudf.core.buffer import acquire_spill_lock
 from cudf.utils import ioutils
@@ -251,9 +251,17 @@ def read_csv(
     if na_values is not None:
         options.set_na_values([str(val) for val in na_values])
 
-    df = cudf.DataFrame._from_data(
-        *data_from_pylibcudf_io(plc.io.csv.read_csv(options))
-    )
+    table_w_meta = plc.io.csv.read_csv(options)
+    data = {
+        name: Column.from_pylibcudf(col)
+        for name, col in zip(
+            table_w_meta.column_names(include_children=False),
+            table_w_meta.columns,
+            strict=True,
+        )
+    }
+
+    df = cudf.DataFrame._from_data(data)
 
     if isinstance(dtype, abc.Mapping):
         for k, v in dtype.items():
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 4be556e1d67..ff326e09315 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -15,7 +15,6 @@
 import cudf
 from cudf._lib.column import Column
 from cudf._lib.types import dtype_to_pylibcudf_type
-from cudf._lib.utils import _data_from_columns, data_from_pylibcudf_io
 from cudf.core.buffer import acquire_spill_lock
 from cudf.utils import ioutils
 from cudf.utils.dtypes import _maybe_convert_to_default_type
@@ -178,13 +177,11 @@ def read_json(
                     )
                 )
             )
-            df = cudf.DataFrame._from_data(
-                *_data_from_columns(
-                    columns=[Column.from_pylibcudf(col) for col in res_cols],
-                    column_names=res_col_names,
-                    index_names=None,
-                )
-            )
+            data = {
+                name: Column.from_pylibcudf(col)
+                for name, col in zip(res_col_names, res_cols, strict=True)
+            }
+            df = cudf.DataFrame._from_data(data)
             ioutils._add_df_col_struct_names(df, res_child_names)
             return df
         else:
@@ -207,10 +204,15 @@ def read_json(
                     extra_parameters=kwargs,
                 )
             )
-
-            df = cudf.DataFrame._from_data(
-                *data_from_pylibcudf_io(table_w_meta)
-            )
+            data = {
+                name: Column.from_pylibcudf(col)
+                for name, col in zip(
+                    table_w_meta.column_names(include_children=False),
+                    table_w_meta.columns,
+                    strict=True,
+                )
+            }
+            df = cudf.DataFrame._from_data(data)
 
             # Post-processing to add in struct column names
             ioutils._add_df_col_struct_names(df, table_w_meta.child_names)
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index 5103137bc77..f3124552fd1 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -10,10 +10,11 @@
 import pylibcudf as plc
 
 import cudf
+from cudf._lib.column import Column
 from cudf._lib.types import dtype_to_pylibcudf_type
-from cudf._lib.utils import data_from_pylibcudf_io
 from cudf.api.types import is_list_like
 from cudf.core.buffer import acquire_spill_lock
+from cudf.core.index import _index_from_data
 from cudf.utils import ioutils
 
 try:
@@ -323,11 +324,35 @@ def read_orc(
                     actual_index_names = list(index_col_names.values())
                     col_names = names[len(actual_index_names) :]
 
-            data, index = data_from_pylibcudf_io(
-                tbl_w_meta,
-                col_names if columns is None else names,
-                actual_index_names,
-            )
+            result_col_names = col_names if columns is None else names
+            if actual_index_names is None:
+                index = None
+                data = {
+                    name: Column.from_pylibcudf(col)
+                    for name, col in zip(
+                        result_col_names, tbl_w_meta.columns, strict=True
+                    )
+                }
+            else:
+                result_columns = [
+                    Column.from_pylibcudf(col) for col in tbl_w_meta.columns
+                ]
+                index = _index_from_data(
+                    dict(
+                        zip(
+                            actual_index_names,
+                            result_columns[: len(actual_index_names)],
+                            strict=True,
+                        )
+                    )
+                )
+                data = dict(
+                    zip(
+                        result_col_names,
+                        result_columns[len(actual_index_names) :],
+                        strict=True,
+                    )
+                )
 
             if is_range_index:
                 index = range_idx
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index c13489630a3..feb6e12da8c 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -23,10 +23,6 @@
 
 import cudf
 from cudf._lib.column import Column
-from cudf._lib.utils import (
-    _data_from_columns,
-    data_from_pylibcudf_io,
-)
 from cudf.api.types import is_list_like
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import as_column, column_empty
@@ -1238,16 +1234,11 @@ def _read_parquet(
                     # Drop residual columns to save memory
                     tbl._columns[i] = None
 
-            df = cudf.DataFrame._from_data(
-                *_data_from_columns(
-                    columns=[
-                        Column.from_pylibcudf(plc)
-                        for plc in concatenated_columns
-                    ],
-                    column_names=column_names,
-                    index_names=None,
-                )
-            )
+            data = {
+                name: Column.from_pylibcudf(col)
+                for name, col in zip(column_names, concatenated_columns)
+            }
+            df = cudf.DataFrame._from_data(data)
             df = _process_metadata(
                 df,
                 column_names,
@@ -1287,8 +1278,16 @@ def _read_parquet(
                 options.set_filter(filters)
 
             tbl_w_meta = plc.io.parquet.read_parquet(options)
+            data = {
+                name: Column.from_pylibcudf(col)
+                for name, col in zip(
+                    tbl_w_meta.column_names(include_children=False),
+                    tbl_w_meta.columns,
+                    strict=True,
+                )
+            }
 
-            df = cudf.DataFrame._from_data(*data_from_pylibcudf_io(tbl_w_meta))
+            df = cudf.DataFrame._from_data(data)
 
             df = _process_metadata(
                 df,
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 57bf08e6eec..ca8f9cac2d0 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -151,7 +151,7 @@ def cudf_dtype_from_pydata_dtype(dtype):
         return cudf.core.dtypes.Decimal64Dtype
     elif cudf.api.types.is_decimal128_dtype(dtype):
         return cudf.core.dtypes.Decimal128Dtype
-    elif dtype in cudf._lib.types.SUPPORTED_NUMPY_TO_LIBCUDF_TYPES:
+    elif dtype in cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES:
         return dtype.type
 
     return infer_dtype_from_object(dtype)
diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py
index b5387ddeb5f..10ab3f6bb1e 100644
--- a/python/cudf/cudf/utils/gpu_utils.py
+++ b/python/cudf/cudf/utils/gpu_utils.py
@@ -15,7 +15,7 @@ def validate_setup():
 
     import warnings
 
-    from cuda.cudart import cudaDeviceAttr, cudaError_t
+    from cuda.bindings.runtime import cudaDeviceAttr, cudaError_t
 
     from rmm._cuda.gpu import (
         CUDARuntimeError,
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 21c18ef0174..2fdf6b34b8f 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -98,8 +98,6 @@ filterwarnings = [
     "error",
     "ignore:::.*xdist.*",
     "ignore:::.*pytest.*",
-    # https://github.com/rapidsai/build-planning/issues/116
-    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
     "ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore",
     # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 29fcd161444..a9d937435e9 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -60,8 +60,6 @@ addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
   "error",
-  # https://github.com/rapidsai/build-planning/issues/116
-  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
 ]
 xfail_strict = true
 
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index b781b13ec10..5904942aea2 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -66,8 +66,6 @@ addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
   "error",
-  # https://github.com/rapidsai/build-planning/issues/116
-  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
 ]
 xfail_strict = true
 
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index ed43ab83d53..7820157d89b 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -93,8 +93,6 @@ addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error",
-    # https://github.com/rapidsai/build-planning/issues/116
-    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     "ignore:unclosed <socket.socket:ResourceWarning",
     "ignore:Port .* is already in use.:UserWarning:distributed",
     # Should be fixed in the next streamz release
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 33ba8fe083f..8d481408870 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -96,8 +96,6 @@ empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error::FutureWarning",
     "error::DeprecationWarning",
-    # https://github.com/rapidsai/build-planning/issues/116
-    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
     "ignore:.*datetime.*utcnow.*scheduled for removal:DeprecationWarning:botocore",
     "ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning",
diff --git a/python/pylibcudf/pylibcudf/utils.pyx b/python/pylibcudf/pylibcudf/utils.pyx
index ee4421ddeaf..e9365ca1b36 100644
--- a/python/pylibcudf/pylibcudf/utils.pyx
+++ b/python/pylibcudf/pylibcudf/utils.pyx
@@ -5,7 +5,7 @@ from cython.operator import dereference
 from libc.stdint cimport uintptr_t
 from libcpp.functional cimport reference_wrapper
 from libcpp.vector cimport vector
-from cuda import cudart
+from cuda.bindings import runtime
 
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.types cimport bitmask_type
@@ -44,13 +44,13 @@ def _is_concurrent_managed_access_supported():
     """
 
     # Ensure CUDA is initialized before checking cudaDevAttrConcurrentManagedAccess
-    cudart.cudaFree(0)
+    runtime.cudaFree(0)
 
     device_id = 0
-    err, supports_managed_access = cudart.cudaDeviceGetAttribute(
-        cudart.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess, device_id
+    err, supports_managed_access = runtime.cudaDeviceGetAttribute(
+        runtime.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess, device_id
     )
-    if err != cudart.cudaError_t.cudaSuccess:
+    if err != runtime.cudaError_t.cudaSuccess:
         raise RuntimeError(
             f"Failed to check cudaDevAttrConcurrentManagedAccess with error {err}"
         )
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 53ee3e2b56e..e0055d5ebf8 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -99,8 +99,6 @@ addopts = "--tb=native --strict-config --strict-markers --import-mode=importlib"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
   "error",
-  # https://github.com/rapidsai/build-planning/issues/116
-  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
   "ignore:::.*xdist.*",
   "ignore:::.*pytest.*"
 ]