From 327f9f992a01218480241fb20a1ebf09644b76f1 Mon Sep 17 00:00:00 2001 From: Melih Elibol Date: Thu, 24 Feb 2022 14:45:57 -0800 Subject: [PATCH] initial pass. --- nums/core/array/blockarray.py | 236 +++++-------------------- nums/core/array/ops.py | 321 ++++++++++++++++++++++++++++++++++ nums/core/array/utils.py | 6 +- 3 files changed, 364 insertions(+), 199 deletions(-) create mode 100644 nums/core/array/ops.py diff --git a/nums/core/array/blockarray.py b/nums/core/array/blockarray.py index 88c52f46..fa5aadc8 100644 --- a/nums/core/array/blockarray.py +++ b/nums/core/array/blockarray.py @@ -23,6 +23,7 @@ from nums.core.array.view import ArrayView from nums.core.grid.grid import ArrayGrid from nums.core.compute.compute_manager import ComputeManager +from nums.core.array import ops # pylint: disable=too-many-lines @@ -630,10 +631,6 @@ def to_block_array(obj, cm: ComputeManager, block_shape=None): block_shape = cm.get_block_shape(np_array.shape, np_array.dtype) return BlockArray.from_np(np_array, block_shape, False, cm) - def check_or_convert_other(self, other, compute_block_shape=False): - block_shape = None if compute_block_shape else self.block_shape - return BlockArray.to_block_array(other, self.cm, block_shape=block_shape) - def ufunc(self, op_name): result = self.copy() for grid_entry in self.grid.get_entry_iterator(): @@ -769,7 +766,7 @@ def reduce_axis(self, op_name, axis, keepdims=False): return result ################# - # Linear Algebra + # Tensor Algebra ################# def _compute_tensordot_syskwargs(self, self_block: Block, other_block: Block): @@ -779,71 +776,6 @@ def _compute_tensordot_syskwargs(self, self_block: Block, other_block: Block): else: return other_block.true_grid_entry(), other_block.true_grid_shape() - def tensordot(self, other, axes=2): - if isinstance(axes, int): - pass - elif array_utils.is_array_like(axes): - raise NotImplementedError("Non-integer axes is currently not supported.") - else: - raise TypeError(f"Unexpected axes type '{type(axes).__name__}'") - - other = self.check_or_convert_other(other, compute_block_shape=True) - - if array_utils.np_tensordot_param_test( - self.shape, self.ndim, other.shape, other.ndim, axes - ): - raise ValueError("shape-mismatch for sum") - - this_axes = self.grid.grid_shape[:-axes] - this_sum_axes = self.grid.grid_shape[-axes:] - other_axes = other.grid.grid_shape[axes:] - other_sum_axes = other.grid.grid_shape[:axes] - assert this_sum_axes == other_sum_axes - result_shape = tuple(self.shape[:-axes] + other.shape[axes:]) - result_block_shape = tuple(self.block_shape[:-axes] + other.block_shape[axes:]) - result_grid = ArrayGrid( - shape=result_shape, - block_shape=result_block_shape, - dtype=array_utils.get_bop_output_type( - "tensordot", self.dtype, other.dtype - ).__name__, - ) - assert result_grid.grid_shape == tuple(this_axes + other_axes) - result = BlockArray(result_grid, self.cm) - this_dims = list(itertools.product(*map(range, this_axes))) - other_dims = list(itertools.product(*map(range, other_axes))) - sum_dims = list(itertools.product(*map(range, this_sum_axes))) - for i in this_dims: - for j in other_dims: - grid_entry = tuple(i + j) - result_block: Block = result.blocks[grid_entry] - sum_oids = [] - for k in sum_dims: - self_block: Block = self.blocks[tuple(i + k)] - other_block: Block = other.blocks[tuple(k + j)] - dot_grid_args = self._compute_tensordot_syskwargs( - self_block, other_block - ) - dotted_oid = self.cm.bop( - "tensordot", - self_block.oid, - other_block.oid, - self_block.transposed, - other_block.transposed, - axes=axes, - syskwargs={ - "grid_entry": dot_grid_args[0], - "grid_shape": dot_grid_args[1], - }, - ) - sum_oids.append( - (dotted_oid, dot_grid_args[0], dot_grid_args[1], False) - ) - result_block.oid = self._tree_reduce( - "sum", sum_oids, result_block.grid_entry, result_block.grid_shape - ) - return result - def __matmul__(self, other): if len(self.shape) > 2: # TODO (bcp): NumPy's implementation does a stacked matmul, which is not supported yet. @@ -851,11 +783,10 @@ def __matmul__(self, other): "Matrix multiply for tensors of rank > 2 not supported yet." ) else: - return self.tensordot(other, 1) + return ops.tensordot(self.cm, self, other, axes=1) def __rmatmul__(self, other): - other = self.check_or_convert_other(other) - return other @ self + return ops.tensordot(self.cm, other, self, axes=1) __imatmul__ = __matmul__ @@ -863,52 +794,6 @@ def __rmatmul__(self, other): # Arithmetic ################# - def _fast_element_wise(self, op_name, other): - """ - Implements fast scheduling for basic element-wise operations. - """ - dtype = array_utils.get_bop_output_type(op_name, self.dtype, other.dtype) - # Schedule the op first. - blocks = np.empty(shape=self.grid.grid_shape, dtype=Block) - for grid_entry in self.grid.get_entry_iterator(): - self_block: Block = self.blocks[grid_entry] - other_block: Block = other.blocks[grid_entry] - blocks[grid_entry] = block = Block( - grid_entry=grid_entry, - grid_shape=self_block.grid_shape, - rect=self_block.rect, - shape=self_block.shape, - dtype=dtype, - transposed=False, - cm=self.cm, - ) - block.oid = self.cm.bop( - op_name, - self_block.oid, - other_block.oid, - self_block.transposed, - other_block.transposed, - axes={}, - syskwargs={ - "grid_entry": grid_entry, - "grid_shape": self.grid.grid_shape, - }, - ) - return BlockArray( - ArrayGrid(self.shape, self.block_shape, dtype.__name__), - self.cm, - blocks=blocks, - ) - - def __elementwise__(self, op_name, other): - other = self.check_or_convert_other(other) - if self.shape == other.shape and self.block_shape == other.block_shape: - return self._fast_element_wise(op_name, other) - blocks_op = self.blocks.__getattribute__("__%s__" % op_name) - return BlockArray.from_blocks( - blocks_op(other.blocks), result_shape=None, cm=self.cm - ) - def __neg__(self): return self.ufunc("negative") @@ -919,65 +804,58 @@ def __abs__(self): return self.ufunc("abs") def __mod__(self, other): - return self.__elementwise__("mod", other) + return ops.elementwise("mod", self, other, self.cm) def __rmod__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("mod", self) + return ops.elementwise("mod", other, self, self.cm) __imod__ = __mod__ def __add__(self, other): - return self.__elementwise__("add", other) + return ops.elementwise("add", self, other, self.cm) def __radd__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("add", self) + return ops.elementwise("add", other, self, self.cm) __iadd__ = __add__ def __sub__(self, other): - return self.__elementwise__("sub", other) + return ops.elementwise("sub", self, other, self.cm) def __rsub__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("sub", self) + return ops.elementwise("sub", other, self, self.cm) __isub__ = __sub__ def __mul__(self, other): - return self.__elementwise__("mul", other) + return ops.elementwise("mul", self, other, self.cm) def __rmul__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("mul", self) + return ops.elementwise("mul", other, self, self.cm) __imul__ = __mul__ def __truediv__(self, other): - return self.__elementwise__("truediv", other) + return ops.elementwise("truediv", self, other, self.cm) def __rtruediv__(self, other): - other = self.check_or_convert_other(other) - return other / self + return ops.elementwise("truediv", other, self, self.cm) __itruediv__ = __truediv__ def __floordiv__(self, other): - return self.__elementwise__("floor_divide", other) + return ops.elementwise("floor_divide", self, other, self.cm) def __rfloordiv__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("floor_divide", self) + return ops.elementwise("floor_divide", other, self, self.cm) __ifloordiv__ = __floordiv__ def __pow__(self, other): - return self.__elementwise__("pow", other) + return ops.elementwise("pow", self, other, self.cm) def __rpow__(self, other): - other = self.check_or_convert_other(other) - return other ** self + return ops.elementwise("pow", other, self, self.cm) __ipow__ = __pow__ @@ -985,70 +863,41 @@ def __rpow__(self, other): # Inequalities ################# - def __inequality__(self, op, other): - other = self.check_or_convert_other(other) - assert ( - other.shape == () or other.shape == self.shape - ), "Currently supports comparison with scalars only." - shape = array_utils.broadcast(self.shape, other.shape).shape - block_shape = array_utils.broadcast_block_shape( - self.shape, other.shape, self.block_shape - ) - dtype = bool.__name__ - grid = ArrayGrid(shape, block_shape, dtype) - result = BlockArray(grid, self.cm) - for grid_entry in result.grid.get_entry_iterator(): - if other.shape == (): - other_block: Block = other.blocks.item() - else: - other_block: Block = other.blocks[grid_entry] - result.blocks[grid_entry] = self.blocks[grid_entry].bop( - op, other_block, args={} - ) - - return result - def __ge__(self, other): - return self.__inequality__("ge", other) + return ops.inequality("ge", self, other, self.cm) def __rge__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("ge", self) + return ops.inequality("ge", other, self, self.cm) def __gt__(self, other): - return self.__inequality__("gt", other) + return ops.inequality("gt", self, other, self.cm) def __rgt__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("gt", self) + return ops.inequality("gt", other, self, self.cm) def __le__(self, other): - return self.__inequality__("le", other) + return ops.inequality("le", self, other, self.cm) def __rle__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("le", self) + return ops.inequality("le", other, self, self.cm) def __lt__(self, other): - return self.__inequality__("lt", other) + return ops.inequality("lt", self, other, self.cm) def __rlt__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("lt", self) + return ops.inequality("lt", other, self, self.cm) def __eq__(self, other): - return self.__inequality__("eq", other) + return ops.inequality("eq", self, other, self.cm) def __req__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("eq", self) + return ops.inequality("eq", other, self, self.cm) def __ne__(self, other): - return self.__inequality__("ne", other) + return ops.inequality("ne", self, other, self.cm) def __rne__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("ne", self) + return ops.inequality("ne", other, self, self.cm) ################## # Boolean @@ -1066,47 +915,42 @@ def __invert__(self): return self.ufunc("invert") def __or__(self, other): - return self.__elementwise__("bitwise_or", other) + return ops.elementwise("bitwise_or", self, other, self.cm) def __ror__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("bitwise_or", self) + return ops.elementwise("bitwise_or", other, self, self.cm) __ior__ = __or__ def __and__(self, other): - return self.__elementwise__("bitwise_and", other) + return ops.elementwise("bitwise_and", self, other, self.cm) def __rand__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("bitwise_and", self) + return ops.elementwise("bitwise_and", other, self, self.cm) __iand__ = __and__ def __xor__(self, other): - return self.__elementwise__("bitwise_xor", other) + return ops.elementwise("bitwise_xor", self, other, self.cm) def __rxor__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("bitwise_xor", self) + return ops.elementwise("bitwise_xor", other, self, self.cm) __ixor__ = __xor__ def __lshift__(self, other): - return self.__elementwise__("left_shift", other) + return ops.elementwise("left_shift", self, other, self.cm) def __rlshift__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("left_shift", self) + return ops.elementwise("left_shift", other, self, self.cm) __ilshift__ = __lshift__ def __rshift__(self, other): - return self.__elementwise__("right_shift", other) + return ops.elementwise("right_shift", self, other, self.cm) def __rrshift__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("right_shift", self) + return ops.elementwise("right_shift", other, self, self.cm) __irshift__ = __rshift__ diff --git a/nums/core/array/ops.py b/nums/core/array/ops.py new file mode 100644 index 00000000..dc6b85c7 --- /dev/null +++ b/nums/core/array/ops.py @@ -0,0 +1,321 @@ + +import warnings +import itertools + +import numpy as np + +from nums.core.array import utils as array_utils +from nums.core.array.base import BlockArrayBase, Block +from nums.core.array.view import ArrayView +from nums.core.grid.grid import ArrayGrid +from nums.core.compute.compute_manager import ComputeManager +from nums.core.array.base import Block + + +def atleast_nd_array(obj): + if isinstance(obj, list): + return np.array(obj) + return obj + + +def tree_reduce( + cm, op_name, blocks_or_oids, result_grid_entry, result_grid_shape +): + """ + Basic tree reduce imp. + Schedules op on same node as left operand. + :param op_name: The reduction op. + :param blocks_or_oids: A list of type Block or a list of tuples. + Tuples must be of the form + (oid, grid_entry, grid_shape, transposed) + :param result_grid_entry: The grid entry of the result block. This will be used + to compute the final reduction step. + :param result_grid_shape: The grid entry of the result block. This will be used + to compute the final reduction step. + :return: The oid of the result. + """ + oid_list = blocks_or_oids + if isinstance(blocks_or_oids[0], Block): + oid_list = [ + (b.oid, b.grid_entry, b.grid_shape, b.transposed) + for b in blocks_or_oids + ] + if len(oid_list) == 1: + return oid_list[0][0] + q = oid_list + while len(q) > 1: + a_oid, a_ge, a_gs, a_T = q.pop(0) + b_oid, _, _, b_T = q.pop(0) + ge, gs = ( + (result_grid_entry, result_grid_shape) if len(q) == 0 else (a_ge, a_gs) + ) + c_oid = cm.bop_reduce( + op_name, + a_oid, + b_oid, + a_T, + b_T, + syskwargs={ + "grid_entry": ge, + "grid_shape": gs, + }, + ) + q.append((c_oid, ge, gs, False)) + r_oid, r_ge, r_gs, _ = q.pop(0) + assert r_ge == result_grid_entry + assert r_gs == result_grid_shape + return r_oid + + +def get_compatible_tensordot_operands(cm, left, right, axes: int): + from nums.core.array.blockarray import BlockArray + + def right_from_left_ba(left, right): + # Use this when left.size > right.size. + if not isinstance(left, BlockArray): + # Make left a block array if it's not already. + left = BlockArray.to_block_array(left, cm) + + if isinstance(right, BlockArray): + if left.block_shape[-axes:] == right.block_shape[:axes]: + return left, right + else: + compatible_right_block_shape = tuple(left.block_shape[-axes:] + right.block_shape[axes:]) + return left, right.reshape(block_shape=compatible_right_block_shape) + + assert isinstance(right, np.ndarray) + # Other is the right operand of a tensordot over k axes. + # Set the first k dims of the right operand's block shape + # equal to the last k dims of the left operand's block shape. + # Set the dim of the rest of the axes to the corresponding dim of the right operand's shape. + # This last step is a conservative solution to the partitioning choices we have for the + # remaining dims. + compatible_right_block_shape = tuple(left.block_shape[-axes:] + right.shape[axes:]) + return left, BlockArray.to_block_array(right, cm, + block_shape=compatible_right_block_shape) + + def left_from_right_ba(left, right): + # Use this when left.size <= right.size. + if not isinstance(right, BlockArray): + # Make right a block array if it's not already. + right = BlockArray.to_block_array(right, cm) + + if isinstance(left, BlockArray): + if left.block_shape[-axes:] == right.block_shape[:axes]: + return left, right + else: + compatible_left_block_shape = tuple(left.block_shape[:-axes] + right.block_shape[:axes]) + return left.reshape(block_shape=compatible_left_block_shape), right + + assert isinstance(left, np.ndarray) + compatible_left_block_shape = tuple(left.shape[:-axes] + right.block_shape[:axes]) + return BlockArray.to_block_array(left, cm, block_shape=compatible_left_block_shape), \ + right + + left = atleast_nd_array(left) + right = atleast_nd_array(right) + # Make sure we have enough dims for tensordot. + assert axes <= len(left.shape) + assert axes <= len(right.shape) + + if array_utils.np_tensordot_param_test( + left.shape, left.ndim, right.shape, right.ndim, axes + ): + raise ValueError("shape-mismatch for tensordot.") + + # Four possibilities: + # 1. Left is blockarray, right is ndarray. + # 2. Left is ndarray, right is blockarray. + # 3. Both left and right are ndarray. + # 4. Both left and right are blockarray. + if left.size > right.size: + # Covers all 4 cases when we prefer to repartition right array based on left array. + return right_from_left_ba(left, right) + else: + # Covers all 4 cases when we prefer to repartition left array based on right array. + return left_from_right_ba(left, right) + + +def _compute_tensordot_syskwargs(self_block: Block, other_block: Block): + # Schedule on larger block. + if np.product(self_block.shape) >= np.product(other_block.shape): + return self_block.true_grid_entry(), self_block.true_grid_shape() + else: + return other_block.true_grid_entry(), other_block.true_grid_shape() + + +def tensordot(cm, left, right, axes=2): + if isinstance(axes, int): + pass + elif array_utils.is_array_like(axes): + raise NotImplementedError("Non-integer axes is currently not supported.") + else: + raise TypeError(f"Unexpected axes type '{type(axes).__name__}'") + + left, right = get_compatible_tensordot_operands(cm, left, right, axes=axes) + + this_axes = left.grid.grid_shape[:-axes] + this_sum_axes = left.grid.grid_shape[-axes:] + other_axes = right.grid.grid_shape[axes:] + other_sum_axes = right.grid.grid_shape[:axes] + assert this_sum_axes == other_sum_axes + result_shape = tuple(left.shape[:-axes] + right.shape[axes:]) + result_block_shape = tuple(left.block_shape[:-axes] + right.block_shape[axes:]) + result_grid = ArrayGrid( + shape=result_shape, + block_shape=result_block_shape, + dtype=array_utils.get_bop_output_type( + "tensordot", left.dtype, right.dtype + ).__name__, + ) + assert result_grid.grid_shape == tuple(this_axes + other_axes) + result = BlockArray(result_grid, left.cm) + this_dims = list(itertools.product(*map(range, this_axes))) + other_dims = list(itertools.product(*map(range, other_axes))) + sum_dims = list(itertools.product(*map(range, this_sum_axes))) + for i in this_dims: + for j in other_dims: + grid_entry = tuple(i + j) + result_block: Block = result.blocks[grid_entry] + sum_oids = [] + for k in sum_dims: + self_block: Block = left.blocks[tuple(i + k)] + other_block: Block = right.blocks[tuple(k + j)] + dot_grid_args = left._compute_tensordot_syskwargs( + self_block, other_block + ) + dotted_oid = cm.bop( + "tensordot", + self_block.oid, + other_block.oid, + self_block.transposed, + other_block.transposed, + axes=axes, + syskwargs={ + "grid_entry": dot_grid_args[0], + "grid_shape": dot_grid_args[1], + }, + ) + sum_oids.append( + (dotted_oid, dot_grid_args[0], dot_grid_args[1], False) + ) + result_block.oid = tree_reduce(cm, + "sum", + sum_oids, + result_block.grid_entry, + result_block.grid_shape) + return result + + +def get_compatible_elementwise_operands(left, right, cm): + + def truncated_block_shape(block_shape_a, shape_b): + # Creates block shape for b based on b's available dims. + # Example: If a has shape NxMxK and b has shape K, set b.block_shape = a.block_shape[:-1]. + assert len(block_shape_a) >= len(shape_b) + block_shape_b = [] + for i in range(-1, len(shape_b) - 1, -1): + block_shape_b.append(block_shape_a[i]) + return block_shape_b + + def broadcast_a_to_b(a, b): + if not isinstance(a, BlockArray): + # Make a a block array if it's not already. + a = BlockArray.to_block_array(a, cm) + + if len(a.shape) == len(b.shape): + right_block_shape = a.block_shape + else: + right_block_shape = truncated_block_shape(a.block_shape, b.shape) + + if isinstance(b, BlockArray): + if a.block_shape == b.block_shape: + return a, b + else: + return a, b.reshape(block_shape=right_block_shape) + else: + return a, BlockArray.to_block_array(b, a.cm, block_shape=right_block_shape) + + left = atleast_nd_array(left) + right = atleast_nd_array(right) + assert array_utils.can_broadcast_shapes(left.shape, right.shape) + + # Handle cases where ndims don't match. + if left.ndims > right.ndims: + left, right = broadcast_a_to_b(left, right) + else: + right, left = broadcast_a_to_b(right, left) + return left, right + + +def fast_element_wise(op_name, left, right): + """ + Implements fast scheduling for basic element-wise operations. + """ + dtype = array_utils.get_bop_output_type(op_name, left.dtype, right.dtype) + # Schedule the op first. + blocks = np.empty(shape=left.grid.grid_shape, dtype=Block) + for grid_entry in left.grid.get_entry_iterator(): + self_block: Block = left.blocks[grid_entry] + other_block: Block = right.blocks[grid_entry] + blocks[grid_entry] = block = Block( + grid_entry=grid_entry, + grid_shape=self_block.grid_shape, + rect=self_block.rect, + shape=self_block.shape, + dtype=dtype, + transposed=False, + cm=left.cm, + ) + block.oid = left.cm.bop( + op_name, + self_block.oid, + other_block.oid, + self_block.transposed, + other_block.transposed, + axes={}, + syskwargs={ + "grid_entry": grid_entry, + "grid_shape": left.grid.grid_shape, + }, + ) + return BlockArray( + ArrayGrid(left.shape, left.block_shape, dtype.__name__), + left.cm, + blocks=blocks, + ) + + +def elementwise(op_name, left, right, cm): + left, right = get_compatible_elementwise_operands(left, right, cm) + if left.shape == right.shape and left.block_shape == right.block_shape: + return fast_element_wise(op_name, left, right) + blocks_op = left.blocks.__getattribute__("__%s__" % op_name) + return BlockArray.from_blocks( + blocks_op(right.blocks), result_shape=None, cm=left.cm + ) + + +def inequality(op, left, right, cm): + left, right = get_compatible_elementwise_operands(left, right, cm) + right = left.convert_other_elementwise(right) + assert ( + right.shape == () or right.shape == left.shape + ), "Currently supports comparison with scalars only." + shape = array_utils.broadcast(left.shape, right.shape).shape + block_shape = array_utils.broadcast_block_shape( + left.shape, right.shape, left.block_shape + ) + dtype = bool.__name__ + grid = ArrayGrid(shape, block_shape, dtype) + result = BlockArray(grid, cm) + for grid_entry in result.grid.get_entry_iterator(): + if right.shape == (): + other_block: Block = right.blocks.item() + else: + other_block: Block = right.blocks[grid_entry] + result.blocks[grid_entry] = left.blocks[grid_entry].bop( + op, other_block, args={} + ) + + return result diff --git a/nums/core/array/utils.py b/nums/core/array/utils.py index b1b22798..07a5fb3b 100644 --- a/nums/core/array/utils.py +++ b/nums/core/array/utils.py @@ -123,9 +123,9 @@ def broadcast(a_shape, b_shape): def broadcast_block_shape(a_shape, b_shape, a_block_shape): - # Starting from last block shape dim and - # map each shape dim to block shape dim as already defined, - # and for the rest of dims, set block shape to 1. + # Start from last block shape dim and + # map each shape dim to block shape dim as already defined. + # For the rest of dims, set block shape to 1. result_shape = broadcast(a_shape, b_shape).shape result_block_shape = [] a_block_shape_r = list(reversed(a_block_shape))