From e770b5680e19e7544050b6b410d8f027cec8e02a Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 8 Dec 2023 16:46:50 +0100 Subject: [PATCH] Cleanup and tests --- mpi4py_fft/__init__.py | 1 + mpi4py_fft/distarray.py | 161 +++++++++--------------------------- mpi4py_fft/distarrayCuPy.py | 87 +++++++++++++++++++ tests/test_darrayCuPy.py | 156 ++++++++++++++++++++++++++++++++++ 4 files changed, 284 insertions(+), 121 deletions(-) create mode 100644 mpi4py_fft/distarrayCuPy.py create mode 100644 tests/test_darrayCuPy.py diff --git a/mpi4py_fft/__init__.py b/mpi4py_fft/__init__.py index 7450fae..e2ee0b4 100644 --- a/mpi4py_fft/__init__.py +++ b/mpi4py_fft/__init__.py @@ -20,6 +20,7 @@ __author__ = 'Lisandro Dalcin and Mikael Mortensen' from .distarray import DistArray, newDistArray, Function +from .distarrayCuPy import DistArrayCuPy from .mpifft import PFFT from . import fftw from .fftw import fftlib diff --git a/mpi4py_fft/distarray.py b/mpi4py_fft/distarray.py index e2275f1..c20e5a6 100644 --- a/mpi4py_fft/distarray.py +++ b/mpi4py_fft/distarray.py @@ -5,12 +5,6 @@ from .pencil import Pencil, Subcomm from .io import HDF5File, NCFile, FileBase -try: - import cupy as cp -except ImportError: - # TODO: move cupy stuff to seperate file as to avoid confusion! - import numpy as cp - comm = MPI.COMM_WORLD @@ -20,6 +14,24 @@ class DistArrayBase: Attributes: - xp: Numerical library, a.k.a. numpy or cupy as class attribute + + Note + ---- + Tensors of rank higher than 0 are not distributed in the first ``rank`` + indices. For example, + + >>> from mpi4py_fft import DistArray + >>> a = DistArray((3, 8, 8, 8), rank=1) + >>> print(a.pencil.shape) + (8, 8, 8) + + The array ``a`` cannot be distributed in the first axis of length 3 since + rank is 1 and this first index represent the vector component. The ``pencil`` + attribute of ``a`` thus only considers the last three axes. + + Also note that the ``alignment`` keyword does not take rank into + consideration. Setting alignment=2 for the array above means that the last + axis will be aligned, also when rank>0. """ xp = None @@ -70,11 +82,6 @@ def dimensions(self): """Return dimensions of array not including rank""" return len(self._p0.shape) - @property - def v(self): - """Return local ``self`` array as an ``ndarray`` object""" - return self.__array__() - @staticmethod def getSubcomm(subcomm, global_shape, rank, alignment): if isinstance(subcomm, Subcomm): @@ -104,14 +111,13 @@ def getPencil(cls, subcomm, rank, global_shape, alignment): assert sizes[alignment] == 1 else: # Decide that alignment is the last axis with size 1 - alignment = cls.xp.flatnonzero(cls.xp.array(sizes) == 1)[-1] + alignment = np.flatnonzero(np.array(sizes) == 1)[-1] p0 = Pencil(subcomm, global_shape[rank:], axis=alignment) + subshape = p0.subshape if rank > 0: subshape = global_shape[:rank] + subshape - else: - subshape = p0.subshape return p0, subshape @@ -180,16 +186,17 @@ def get(self, gslice): # MPI and only on rank 0. import h5py + # TODO: can we use h5py to communicate the data without copying to cpu first when using cupy? f = h5py.File("tmp.h5", "w", driver="mpio", comm=comm) s = self.local_slice() - sp = self.xp.nonzero([isinstance(x, slice) for x in gslice])[0] - sf = tuple(self.xp.take(s, sp)) + sp = np.nonzero([isinstance(x, slice) for x in gslice])[0] + sf = tuple(np.take(s, sp)) f.require_dataset( - "data", shape=tuple(self.xp.take(self.global_shape, sp)), dtype=self.dtype + "data", shape=tuple(np.take(self.global_shape, sp)), dtype=self.dtype ) gslice = list(gslice) # We are required to check if the indices in si are on this processor - si = self.xp.nonzero( + si = np.nonzero( [isinstance(x, int) and not z == slice(None) for x, z in zip(gslice, s)] )[0] on_this_proc = True @@ -199,7 +206,8 @@ def get(self, gslice): else: on_this_proc = False if on_this_proc: - f["data"][sf] = self[tuple(gslice)] + data = self.asnumpy + f["data"][sf] = data[tuple(gslice)] f.close() c = None if comm.Get_rank() == 0: @@ -280,7 +288,7 @@ def redistribute(self, axis=None, out=None): return self if out is not None: - assert isinstance(out, DistArray) + assert issubclass(type(out), DistArrayBase) assert self.global_shape == out.global_shape axis = out.alignment if self.commsizes == out.commsizes: @@ -448,24 +456,6 @@ class DistArray(DistArrayBase, np.ndarray): For more information, see `numpy.ndarray `_ - Note - ---- - Tensors of rank higher than 0 are not distributed in the first ``rank`` - indices. For example, - - >>> from mpi4py_fft import DistArray - >>> a = DistArray((3, 8, 8, 8), rank=1) - >>> print(a.pencil.shape) - (8, 8, 8) - - The array ``a`` cannot be distributed in the first axis of length 3 since - rank is 1 and this first index represent the vector component. The ``pencil`` - attribute of ``a`` thus only considers the last three axes. - - Also note that the ``alignment`` keyword does not take rank into - consideration. Setting alignment=2 for the array above means that the last - axis will be aligned, also when rank>0. - """ xp = np @@ -501,89 +491,14 @@ def __new__( obj._rank = rank return obj + @property + def v(self): + """Return local ``self`` array as an ``ndarray`` object""" + return self.__array__() -class DistArrayCuPy(DistArrayBase, cp.ndarray): - """Distributed CuPy array - - This Numpy array is part of a larger global array. Information about the - distribution is contained in the attributes. - - Parameters - ---------- - global_shape : sequence of ints - Shape of non-distributed global array - subcomm : None, :class:`.Subcomm` object or sequence of ints, optional - Describes how to distribute the array - val : Number or None, optional - Initialize array with this number if buffer is not given - dtype : np.dtype, optional - Type of array - buffer : Numpy array, optional - Array of correct shape. The buffer owns the memory that is used for - this array. - alignment : None or int, optional - Make sure array is aligned in this direction. Note that alignment does - not take rank into consideration. - rank : int, optional - Rank of tensor (number of free indices, a scalar is zero, vector one, - matrix two) - - - For more information, see `numpy.ndarray `_ - - Note - ---- - Tensors of rank higher than 0 are not distributed in the first ``rank`` - indices. For example, - - >>> from mpi4py_fft import DistArray - >>> a = DistArray((3, 8, 8, 8), rank=1) - >>> print(a.pencil.shape) - (8, 8, 8) - - The array ``a`` cannot be distributed in the first axis of length 3 since - rank is 1 and this first index represent the vector component. The ``pencil`` - attribute of ``a`` thus only considers the last three axes. - - Also note that the ``alignment`` keyword does not take rank into - consideration. Setting alignment=2 for the array above means that the last - axis will be aligned, also when rank>0. - - """ - - # TODO: docs - xp = cp - - def __new__( - cls, - global_shape, - subcomm=None, - val=None, - dtype=float, - memptr=None, - strides=None, - alignment=None, - rank=0, - ): - if len(global_shape[rank:]) < 2: # 1D case - obj = cls.xp.ndarray.__new__( - cls, global_shape, dtype=dtype, memptr=memptr, strides=strides - ) - if memptr is None and isinstance(val, Number): - obj.fill(val) - obj._rank = rank - obj._p0 = None - return obj - - subcomm = cls.getSubcomm(subcomm, global_shape, rank, alignment) - p0, subshape = cls.getPencil(subcomm, rank, global_shape, alignment) - - obj = cls.xp.ndarray.__new__(cls, subshape, dtype=dtype, memptr=memptr) - if memptr is None and isinstance(val, Number): - obj.fill(val) - obj._p0 = p0 - obj._rank = rank - return obj + @property + def asnumpy(self): + return self def newDistArray(pfft, forward_output=True, val=0, rank=0, view=False, useCuPy=False): @@ -630,7 +545,11 @@ def newDistArray(pfft, forward_output=True, val=0, rank=0, view=False, useCuPy=F dtype = pfft.forward.input_array.dtype global_shape = (len(global_shape),) * rank + global_shape - darraycls = DistArrayCuPy if useCuPy else DistArray + if useCuPy: + from mpi4py_fft.distarrayCuPy import DistArrayCuPy as darraycls + else: + darraycls = DistArray + z = darraycls( global_shape, subcomm=p0.subcomm, diff --git a/mpi4py_fft/distarrayCuPy.py b/mpi4py_fft/distarrayCuPy.py new file mode 100644 index 0000000..d3ead89 --- /dev/null +++ b/mpi4py_fft/distarrayCuPy.py @@ -0,0 +1,87 @@ +from mpi4py_fft.distarray import DistArrayBase +import cupy as cp +from mpi4py import MPI +from numbers import Number + +comm = MPI.COMM_WORLD + + +class DistArrayCuPy(DistArrayBase, cp.ndarray): + """Distributed CuPy array + + This CuPy array is part of a larger global array. Information about the + distribution is contained in the attributes. + + Parameters + ---------- + global_shape : sequence of ints + Shape of non-distributed global array + subcomm : None, :class:`.Subcomm` object or sequence of ints, optional + Describes how to distribute the array + val : Number or None, optional + Initialize array with this number if buffer is not given + dtype : cp.dtype, optional + Type of array + memptr : cupy.cuda.MemoryPointer, optional + Pointer to the array content head. + alignment : None or int, optional + Make sure array is aligned in this direction. Note that alignment does + not take rank into consideration. + rank : int, optional + Rank of tensor (number of free indices, a scalar is zero, vector one, + matrix two) + + + For more information, see `cupy.ndarray `_ + + """ + + xp = cp + + def __new__( + cls, + global_shape, + subcomm=None, + val=None, + dtype=float, + memptr=None, + strides=None, + alignment=None, + rank=0, + ): + if len(global_shape[rank:]) < 2: # 1D case + obj = cls.xp.ndarray.__new__( + cls, global_shape, dtype=dtype, memptr=memptr, strides=strides + ) + if memptr is None and isinstance(val, Number): + obj.fill(val) + obj._rank = rank + obj._p0 = None + return obj + + subcomm = cls.getSubcomm(subcomm, global_shape, rank, alignment) + p0, subshape = cls.getPencil(subcomm, rank, global_shape, alignment) + + obj = cls.xp.ndarray.__new__(cls, subshape, dtype=dtype, memptr=memptr) + if memptr is None and isinstance(val, Number): + obj.fill(val) + obj._p0 = p0 + obj._rank = rank + return obj + + def get(self, *args, **kwargs): + """Untangle inheritance conflicts""" + if any(isinstance(me, tuple) for me in args): + return DistArrayBase.get(self, *args, **kwargs) + else: + return cp.ndarray.get(self, *args, **kwargs) + + @property + def asnumpy(self): + """Copy the array to CPU""" + return self.get() + + @property + def v(self): + """Return local ``self`` array as an ``ndarray`` object""" + return cp.ndarray.__getitem__(self, slice(0, -1, 1)) diff --git a/tests/test_darrayCuPy.py b/tests/test_darrayCuPy.py new file mode 100644 index 0000000..0fd8d08 --- /dev/null +++ b/tests/test_darrayCuPy.py @@ -0,0 +1,156 @@ +import cupy as cp +from mpi4py import MPI +from mpi4py_fft import DistArrayCuPy, newDistArray, PFFT +from mpi4py_fft.pencil import Subcomm + +comm = MPI.COMM_WORLD + + +def test_1Darray(): + N = (8,) + z = DistArrayCuPy(N, val=2) + assert z[0] == 2 + assert z.shape == N + + +def test_2Darray(): + N = (8, 8) + for subcomm in ((0, 1), (1, 0), None, Subcomm(comm, (0, 1))): + for rank in (0, 1, 2): + M = (2,) * rank + N + alignment = None + if subcomm is None and rank == 1: + alignment = 1 + a = DistArrayCuPy(M, subcomm=subcomm, val=1, rank=rank, alignment=alignment) + assert a.rank == rank + assert a.global_shape == M + _ = a.substart + c = a.subcomm + z = a.commsizes + _ = a.pencil + assert cp.prod(cp.array(z)) == comm.Get_size() + if rank > 0: + a0 = a[0] + assert isinstance(a0, DistArrayCuPy) + assert a0.rank == rank - 1 + aa = a.v + assert isinstance(aa, cp.ndarray) + try: + k = a.get((0,) * rank + (0, slice(None))) + if comm.Get_rank() == 0: + assert len(k) == N[1] + assert cp.sum(k) == N[1] + k = a.get((0,) * rank + (slice(None), 0)) + if comm.Get_rank() == 0: + assert len(k) == N[0] + assert cp.sum(k) == N[0] + except ModuleNotFoundError: + pass + _ = a.local_slice() + newaxis = (a.alignment + 1) % 2 + _ = a.get_pencil_and_transfer(newaxis) + a[:] = MPI.COMM_WORLD.Get_rank() + b = a.redistribute(newaxis) + a = b.redistribute(out=a) + a = b.redistribute(a.alignment, out=a) + s0 = MPI.COMM_WORLD.reduce(cp.linalg.norm(a) ** 2) + s1 = MPI.COMM_WORLD.reduce(cp.linalg.norm(b) ** 2) + if MPI.COMM_WORLD.Get_rank() == 0: + assert abs(s0 - s1) < 1e-1 + c = a.redistribute(a.alignment) + assert c is a + + +def test_3Darray(): + N = (8, 8, 8) + for subcomm in ( + (0, 0, 1), + (0, 1, 0), + (1, 0, 0), + (0, 1, 1), + (1, 0, 1), + (1, 1, 0), + None, + Subcomm(comm, (0, 0, 1)), + ): + for rank in (0, 1, 2): + M = (3,) * rank + N + alignment = None + if subcomm is None and rank == 1: + alignment = 2 + a = DistArrayCuPy(M, subcomm=subcomm, val=1, rank=rank, alignment=alignment) + assert a.rank == rank + assert a.global_shape == M + _ = a.substart + _ = a.subcomm + z = a.commsizes + _ = a.pencil + assert cp.prod(cp.array(z)) == comm.Get_size() + if rank > 0: + a0 = a[0] + assert isinstance(a0, DistArrayCuPy) + assert a0.rank == rank - 1 + if rank == 2: + a0 = a[0, 1] + assert isinstance(a0, DistArrayCuPy) + assert a0.rank == 0 + aa = a.v + assert isinstance(aa, cp.ndarray) + try: + k = a.get((0,) * rank + (0, 0, slice(None))) + if comm.Get_rank() == 0: + assert len(k) == N[2] + assert cp.sum(k) == N[2] + k = a.get((0,) * rank + (slice(None), 0, 0)) + if comm.Get_rank() == 0: + assert len(k) == N[0] + assert cp.sum(k) == N[0] + except ModuleNotFoundError: + pass + _ = a.local_slice() + newaxis = (a.alignment + 1) % 3 + _ = a.get_pencil_and_transfer(newaxis) + a[:] = MPI.COMM_WORLD.Get_rank() + b = a.redistribute(newaxis) + a = b.redistribute(out=a) + s0 = MPI.COMM_WORLD.reduce(cp.linalg.norm(a) ** 2) + s1 = MPI.COMM_WORLD.reduce(cp.linalg.norm(b) ** 2) + if MPI.COMM_WORLD.Get_rank() == 0: + assert abs(s0 - s1) < 1e-1 + + +def test_newDistArray(): + N = (8, 8, 8) + pfft = PFFT(MPI.COMM_WORLD, N) + for forward_output in (True, False): + for view in (True, False): + for rank in (0, 1, 2): + a = newDistArray( + pfft, + forward_output=forward_output, + rank=rank, + view=view, + useCuPy=True, + ) + if view is False: + assert isinstance(a, DistArrayCuPy) + assert a.rank == rank + if rank == 0: + qfft = PFFT(MPI.COMM_WORLD, darray=a) + elif rank == 1: + qfft = PFFT(MPI.COMM_WORLD, darray=a[0]) + else: + qfft = PFFT(MPI.COMM_WORLD, darray=a[0, 0]) + qfft.destroy() + + else: + assert isinstance(a, cp.ndarray) + assert a.base.rank == rank + pfft.destroy() + + +if __name__ == "__main__": + test_1Darray() + test_2Darray() + test_3Darray() + test_newDistArray()