Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into host_udf_reduction
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia committed Dec 28, 2024
2 parents f27c9fd + 45b40c5 commit 2deeb3b
Show file tree
Hide file tree
Showing 34 changed files with 194 additions and 396 deletions.
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ dependencies:
- ptxcompiler
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pynvml>=11.4.1,<12.0.0a0
- pynvml>=12.0.0,<13.0.0a0
- pytest-benchmark
- pytest-cases>=3.8.2
- pytest-cov
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ dependencies:
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pynvjitlink>=0.0.0a0
- pynvml>=11.4.1,<12.0.0a0
- pynvml>=12.0.0,<13.0.0a0
- pytest-benchmark
- pytest-cases>=3.8.2
- pytest-cov
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ requirements:
run:
- python
- cudf ={{ version }}
- pynvml >=11.4.1,<12.0.0a0
- pynvml >=12.0.0,<13.0.0a0
- rapids-dask-dependency ={{ minor_version }}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- pynvml>=11.4.1,<12.0.0a0
- pynvml>=12.0.0,<13.0.0a0
- rapids-dask-dependency==25.2.*,>=0.0.0a0
run_custreamz:
common:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx utils.pyx)
set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx)
set(linked_libraries cudf::cudf)

rapids_cython_create_modules(
Expand Down
Empty file removed python/cudf/cudf/_lib/__init__.pxd
Empty file.
7 changes: 0 additions & 7 deletions python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,2 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
import numpy as np

from . import strings_udf

MAX_COLUMN_SIZE = np.iinfo(np.int32).max
MAX_COLUMN_SIZE_STR = "INT32_MAX"
MAX_STRING_COLUMN_BYTES = np.iinfo(np.int32).max
MAX_STRING_COLUMN_BYTES_STR = "INT32_MAX"
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ from rmm.pylibrmm.device_buffer cimport DeviceBuffer

from cudf._lib.types cimport (
dtype_from_column_view,
dtype_to_data_type,
dtype_to_pylibcudf_type,
)

from cudf._lib.types import dtype_from_pylibcudf_column

from pylibcudf cimport DataType as plc_DataType
cimport pylibcudf.libcudf.copying as cpp_copying
cimport pylibcudf.libcudf.types as libcudf_types
cimport pylibcudf.libcudf.unary as libcudf_unary
Expand Down Expand Up @@ -361,7 +361,7 @@ cdef class Column:
col = self
data_dtype = col.dtype

cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype)
cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
cdef libcudf_types.size_type offset = self.offset
cdef vector[mutable_column_view] children
cdef void* data
Expand Down Expand Up @@ -398,7 +398,7 @@ cdef class Column:
self._data = None

return mutable_column_view(
dtype,
dtype.c_obj,
self.size,
data,
mask,
Expand All @@ -424,7 +424,7 @@ cdef class Column:
col = self
data_dtype = col.dtype

cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype)
cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
cdef libcudf_types.size_type offset = self.offset
cdef vector[column_view] children
cdef void* data
Expand All @@ -450,7 +450,7 @@ cdef class Column:
cdef libcudf_types.size_type c_null_count = null_count

return column_view(
dtype,
dtype.c_obj,
self.size,
data,
mask,
Expand Down
53 changes: 25 additions & 28 deletions python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,22 @@ from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

import pylibcudf
import pylibcudf as plc

import cudf
from cudf._lib.types import LIBCUDF_TO_SUPPORTED_NUMPY_TYPES
from cudf.core.dtypes import ListDtype, StructDtype
from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
from cudf.core.missing import NA, NaT

cimport pylibcudf.libcudf.types as libcudf_types
# We currently need this cimport because some of the implementations here
# access the c_obj of the scalar, and because we need to be able to call
# pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until
# DeviceScalar is phased out entirely from cuDF Cython (at which point
# cudf.Scalar will be directly backed by pylibcudf.Scalar).
from pylibcudf cimport Scalar as plc_Scalar
from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID
from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar

from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id


def _replace_nested(obj, check, replacement):
if isinstance(obj, list):
Expand Down Expand Up @@ -62,12 +60,12 @@ def gather_metadata(dtypes):
"""
out = []
for name, dtype in dtypes.items():
v = pylibcudf.interop.ColumnMetadata(name)
v = plc.interop.ColumnMetadata(name)
if isinstance(dtype, cudf.StructDtype):
v.children_meta = gather_metadata(dtype.fields)
elif isinstance(dtype, cudf.ListDtype):
# Offsets column is unnamed and has no children
v.children_meta.append(pylibcudf.interop.ColumnMetadata(""))
v.children_meta.append(plc.interop.ColumnMetadata(""))
v.children_meta.extend(
gather_metadata({"": dtype.element_type})
)
Expand All @@ -81,7 +79,7 @@ cdef class DeviceScalar:
# that from_unique_ptr is implemented is probably dereferencing this in an
# invalid state. See what the best way to fix that is.
def __cinit__(self, *args, **kwargs):
self.c_value = pylibcudf.Scalar.__new__(pylibcudf.Scalar)
self.c_value = plc.Scalar.__new__(plc.Scalar)

def __init__(self, value, dtype):
"""
Expand Down Expand Up @@ -127,20 +125,20 @@ cdef class DeviceScalar:
pa_array = pa.array([pa.scalar(value, type=pa_type)])

pa_table = pa.Table.from_arrays([pa_array], names=[""])
table = pylibcudf.interop.from_arrow(pa_table)
table = plc.interop.from_arrow(pa_table)

column = table.columns()[0]
if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype):
column = pylibcudf.unary.cast(
column, pylibcudf.DataType(pylibcudf.TypeId.DECIMAL32, -dtype.scale)
column = plc.unary.cast(
column, plc.DataType(plc.TypeId.DECIMAL32, -dtype.scale)
)
elif isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
column = pylibcudf.unary.cast(
column, pylibcudf.DataType(pylibcudf.TypeId.DECIMAL64, -dtype.scale)
column = plc.unary.cast(
column, plc.DataType(plc.TypeId.DECIMAL64, -dtype.scale)
)

self.c_value = pylibcudf.copying.get_element(column, 0)
self.c_value = plc.copying.get_element(column, 0)
self._dtype = dtype

def _to_host_scalar(self):
Expand All @@ -150,7 +148,7 @@ cdef class DeviceScalar:
null_type = NaT if is_datetime or is_timedelta else NA

metadata = gather_metadata({"": self.dtype})[0]
ps = pylibcudf.interop.to_arrow(self.c_value, metadata)
ps = plc.interop.to_arrow(self.c_value, metadata)
if not ps.is_valid:
return null_type

Expand Down Expand Up @@ -225,43 +223,42 @@ cdef class DeviceScalar:
return s

cdef void _set_dtype(self, dtype=None):
cdef libcudf_types.data_type cdtype = self.get_raw_ptr()[0].type()

cdef plc_TypeID cdtype_id = self.c_value.type().id()
if dtype is not None:
self._dtype = dtype
elif cdtype.id() in {
libcudf_types.type_id.DECIMAL32,
libcudf_types.type_id.DECIMAL64,
libcudf_types.type_id.DECIMAL128,
elif cdtype_id in {
plc_TypeID.DECIMAL32,
plc_TypeID.DECIMAL64,
plc_TypeID.DECIMAL128,
}:
raise TypeError(
"Must pass a dtype when constructing from a fixed-point scalar"
)
elif cdtype.id() == libcudf_types.type_id.STRUCT:
elif cdtype_id == plc_TypeID.STRUCT:
struct_table_view = (<struct_scalar*>self.get_raw_ptr())[0].view()
self._dtype = StructDtype({
str(i): dtype_from_column_view(struct_table_view.column(i))
for i in range(struct_table_view.num_columns())
})
elif cdtype.id() == libcudf_types.type_id.LIST:
elif cdtype_id == plc_TypeID.LIST:
if (
<list_scalar*>self.get_raw_ptr()
)[0].view().type().id() == libcudf_types.type_id.LIST:
)[0].view().type().id() == plc_TypeID.LIST:
self._dtype = dtype_from_column_view(
(<list_scalar*>self.get_raw_ptr())[0].view()
)
else:
self._dtype = ListDtype(
LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(
(<list_scalar*>self.get_raw_ptr())[0]
.view().type().id()
)
]
)
else:
self._dtype = LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(cdtype.id())
self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(cdtype_id)
]


Expand Down
5 changes: 0 additions & 5 deletions python/cudf/cudf/_lib/types.pxd
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libc.stdint cimport int32_t
from libcpp cimport bool

cimport pylibcudf.libcudf.types as libcudf_types
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view

ctypedef int32_t underlying_type_t_type_id

cdef dtype_from_column_view(column_view cv)

cdef libcudf_types.data_type dtype_to_data_type(dtype) except *
cpdef dtype_to_pylibcudf_type(dtype)
cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *
Loading

0 comments on commit 2deeb3b

Please sign in to comment.