From e770b5680e19e7544050b6b410d8f027cec8e02a Mon Sep 17 00:00:00 2001
From: Thomas <t.baumann@fz-juelich.de>
Date: Fri, 8 Dec 2023 16:46:50 +0100
Subject: [PATCH] Cleanup and tests

---
 mpi4py_fft/__init__.py      |   1 +
 mpi4py_fft/distarray.py     | 161 +++++++++---------------------------
 mpi4py_fft/distarrayCuPy.py |  87 +++++++++++++++++++
 tests/test_darrayCuPy.py    | 156 ++++++++++++++++++++++++++++++++++
 4 files changed, 284 insertions(+), 121 deletions(-)
 create mode 100644 mpi4py_fft/distarrayCuPy.py
 create mode 100644 tests/test_darrayCuPy.py

diff --git a/mpi4py_fft/__init__.py b/mpi4py_fft/__init__.py
index 7450fae..e2ee0b4 100644
--- a/mpi4py_fft/__init__.py
+++ b/mpi4py_fft/__init__.py
@@ -20,6 +20,7 @@
 __author__ = 'Lisandro Dalcin and Mikael Mortensen'
 
 from .distarray import DistArray, newDistArray, Function
+from .distarrayCuPy import DistArrayCuPy
 from .mpifft import PFFT
 from . import fftw
 from .fftw import fftlib
diff --git a/mpi4py_fft/distarray.py b/mpi4py_fft/distarray.py
index e2275f1..c20e5a6 100644
--- a/mpi4py_fft/distarray.py
+++ b/mpi4py_fft/distarray.py
@@ -5,12 +5,6 @@
 from .pencil import Pencil, Subcomm
 from .io import HDF5File, NCFile, FileBase
 
-try:
-    import cupy as cp
-except ImportError:
-    # TODO: move cupy stuff to seperate file as to avoid confusion!
-    import numpy as cp
-
 comm = MPI.COMM_WORLD
 
 
@@ -20,6 +14,24 @@ class DistArrayBase:
 
     Attributes:
         - xp: Numerical library, a.k.a. numpy or cupy as class attribute
+
+    Note
+    ----
+    Tensors of rank higher than 0 are not distributed in the first ``rank``
+    indices. For example,
+
+    >>> from mpi4py_fft import DistArray
+    >>> a = DistArray((3, 8, 8, 8), rank=1)
+    >>> print(a.pencil.shape)
+    (8, 8, 8)
+
+    The array ``a`` cannot be distributed in the first axis of length 3 since
+    rank is 1 and this first index represent the vector component. The ``pencil``
+    attribute of ``a`` thus only considers the last three axes.
+
+    Also note that the ``alignment`` keyword does not take rank into
+    consideration. Setting alignment=2 for the array above means that the last
+    axis will be aligned, also when rank>0.
     """
 
     xp = None
@@ -70,11 +82,6 @@ def dimensions(self):
         """Return dimensions of array not including rank"""
         return len(self._p0.shape)
 
-    @property
-    def v(self):
-        """Return local ``self`` array as an ``ndarray`` object"""
-        return self.__array__()
-
     @staticmethod
     def getSubcomm(subcomm, global_shape, rank, alignment):
         if isinstance(subcomm, Subcomm):
@@ -104,14 +111,13 @@ def getPencil(cls, subcomm, rank, global_shape, alignment):
             assert sizes[alignment] == 1
         else:
             # Decide that alignment is the last axis with size 1
-            alignment = cls.xp.flatnonzero(cls.xp.array(sizes) == 1)[-1]
+            alignment = np.flatnonzero(np.array(sizes) == 1)[-1]
 
         p0 = Pencil(subcomm, global_shape[rank:], axis=alignment)
 
+        subshape = p0.subshape
         if rank > 0:
             subshape = global_shape[:rank] + subshape
-        else:
-            subshape = p0.subshape
 
         return p0, subshape
 
@@ -180,16 +186,17 @@ def get(self, gslice):
         # MPI and only on rank 0.
         import h5py
 
+        # TODO: can we use h5py to communicate the data without copying to cpu first when using cupy?
         f = h5py.File("tmp.h5", "w", driver="mpio", comm=comm)
         s = self.local_slice()
-        sp = self.xp.nonzero([isinstance(x, slice) for x in gslice])[0]
-        sf = tuple(self.xp.take(s, sp))
+        sp = np.nonzero([isinstance(x, slice) for x in gslice])[0]
+        sf = tuple(np.take(s, sp))
         f.require_dataset(
-            "data", shape=tuple(self.xp.take(self.global_shape, sp)), dtype=self.dtype
+            "data", shape=tuple(np.take(self.global_shape, sp)), dtype=self.dtype
         )
         gslice = list(gslice)
         # We are required to check if the indices in si are on this processor
-        si = self.xp.nonzero(
+        si = np.nonzero(
             [isinstance(x, int) and not z == slice(None) for x, z in zip(gslice, s)]
         )[0]
         on_this_proc = True
@@ -199,7 +206,8 @@ def get(self, gslice):
             else:
                 on_this_proc = False
         if on_this_proc:
-            f["data"][sf] = self[tuple(gslice)]
+            data = self.asnumpy
+            f["data"][sf] = data[tuple(gslice)]
         f.close()
         c = None
         if comm.Get_rank() == 0:
@@ -280,7 +288,7 @@ def redistribute(self, axis=None, out=None):
                 return self
 
         if out is not None:
-            assert isinstance(out, DistArray)
+            assert issubclass(type(out), DistArrayBase)
             assert self.global_shape == out.global_shape
             axis = out.alignment
             if self.commsizes == out.commsizes:
@@ -448,24 +456,6 @@ class DistArray(DistArrayBase, np.ndarray):
 
     For more information, see `numpy.ndarray <https://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html>`_
 
-    Note
-    ----
-    Tensors of rank higher than 0 are not distributed in the first ``rank``
-    indices. For example,
-
-    >>> from mpi4py_fft import DistArray
-    >>> a = DistArray((3, 8, 8, 8), rank=1)
-    >>> print(a.pencil.shape)
-    (8, 8, 8)
-
-    The array ``a`` cannot be distributed in the first axis of length 3 since
-    rank is 1 and this first index represent the vector component. The ``pencil``
-    attribute of ``a`` thus only considers the last three axes.
-
-    Also note that the ``alignment`` keyword does not take rank into
-    consideration. Setting alignment=2 for the array above means that the last
-    axis will be aligned, also when rank>0.
-
     """
 
     xp = np
@@ -501,89 +491,14 @@ def __new__(
         obj._rank = rank
         return obj
 
+    @property
+    def v(self):
+        """Return local ``self`` array as an ``ndarray`` object"""
+        return self.__array__()
 
-class DistArrayCuPy(DistArrayBase, cp.ndarray):
-    """Distributed CuPy array
-
-    This Numpy array is part of a larger global array. Information about the
-    distribution is contained in the attributes.
-
-    Parameters
-    ----------
-    global_shape : sequence of ints
-        Shape of non-distributed global array
-    subcomm : None, :class:`.Subcomm` object or sequence of ints, optional
-        Describes how to distribute the array
-    val : Number or None, optional
-        Initialize array with this number if buffer is not given
-    dtype : np.dtype, optional
-        Type of array
-    buffer : Numpy array, optional
-        Array of correct shape. The buffer owns the memory that is used for
-        this array.
-    alignment : None or int, optional
-        Make sure array is aligned in this direction. Note that alignment does
-        not take rank into consideration.
-    rank : int, optional
-        Rank of tensor (number of free indices, a scalar is zero, vector one,
-        matrix two)
-
-
-    For more information, see `numpy.ndarray <https://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html>`_
-
-    Note
-    ----
-    Tensors of rank higher than 0 are not distributed in the first ``rank``
-    indices. For example,
-
-    >>> from mpi4py_fft import DistArray
-    >>> a = DistArray((3, 8, 8, 8), rank=1)
-    >>> print(a.pencil.shape)
-    (8, 8, 8)
-
-    The array ``a`` cannot be distributed in the first axis of length 3 since
-    rank is 1 and this first index represent the vector component. The ``pencil``
-    attribute of ``a`` thus only considers the last three axes.
-
-    Also note that the ``alignment`` keyword does not take rank into
-    consideration. Setting alignment=2 for the array above means that the last
-    axis will be aligned, also when rank>0.
-
-    """
-
-    # TODO: docs
-    xp = cp
-
-    def __new__(
-        cls,
-        global_shape,
-        subcomm=None,
-        val=None,
-        dtype=float,
-        memptr=None,
-        strides=None,
-        alignment=None,
-        rank=0,
-    ):
-        if len(global_shape[rank:]) < 2:  # 1D case
-            obj = cls.xp.ndarray.__new__(
-                cls, global_shape, dtype=dtype, memptr=memptr, strides=strides
-            )
-            if memptr is None and isinstance(val, Number):
-                obj.fill(val)
-            obj._rank = rank
-            obj._p0 = None
-            return obj
-
-        subcomm = cls.getSubcomm(subcomm, global_shape, rank, alignment)
-        p0, subshape = cls.getPencil(subcomm, rank, global_shape, alignment)
-
-        obj = cls.xp.ndarray.__new__(cls, subshape, dtype=dtype, memptr=memptr)
-        if memptr is None and isinstance(val, Number):
-            obj.fill(val)
-        obj._p0 = p0
-        obj._rank = rank
-        return obj
+    @property
+    def asnumpy(self):
+        return self
 
 
 def newDistArray(pfft, forward_output=True, val=0, rank=0, view=False, useCuPy=False):
@@ -630,7 +545,11 @@ def newDistArray(pfft, forward_output=True, val=0, rank=0, view=False, useCuPy=F
         dtype = pfft.forward.input_array.dtype
     global_shape = (len(global_shape),) * rank + global_shape
 
-    darraycls = DistArrayCuPy if useCuPy else DistArray
+    if useCuPy:
+        from mpi4py_fft.distarrayCuPy import DistArrayCuPy as darraycls
+    else:
+        darraycls = DistArray
+
     z = darraycls(
         global_shape,
         subcomm=p0.subcomm,
diff --git a/mpi4py_fft/distarrayCuPy.py b/mpi4py_fft/distarrayCuPy.py
new file mode 100644
index 0000000..d3ead89
--- /dev/null
+++ b/mpi4py_fft/distarrayCuPy.py
@@ -0,0 +1,87 @@
+from mpi4py_fft.distarray import DistArrayBase
+import cupy as cp
+from mpi4py import MPI
+from numbers import Number
+
+comm = MPI.COMM_WORLD
+
+
+class DistArrayCuPy(DistArrayBase, cp.ndarray):
+    """Distributed CuPy array
+
+    This CuPy array is part of a larger global array. Information about the
+    distribution is contained in the attributes.
+
+    Parameters
+    ----------
+    global_shape : sequence of ints
+        Shape of non-distributed global array
+    subcomm : None, :class:`.Subcomm` object or sequence of ints, optional
+        Describes how to distribute the array
+    val : Number or None, optional
+        Initialize array with this number if buffer is not given
+    dtype : cp.dtype, optional
+        Type of array
+    memptr : cupy.cuda.MemoryPointer, optional
+        Pointer to the array content head.
+    alignment : None or int, optional
+        Make sure array is aligned in this direction. Note that alignment does
+        not take rank into consideration.
+    rank : int, optional
+        Rank of tensor (number of free indices, a scalar is zero, vector one,
+        matrix two)
+
+
+    For more information, see `cupy.ndarray <https://docs.cupy.dev/en/stable/reference/generated/cupy.ndarray.html#cupy.ndarray>`_
+
+    """
+
+    xp = cp
+
+    def __new__(
+        cls,
+        global_shape,
+        subcomm=None,
+        val=None,
+        dtype=float,
+        memptr=None,
+        strides=None,
+        alignment=None,
+        rank=0,
+    ):
+        if len(global_shape[rank:]) < 2:  # 1D case
+            obj = cls.xp.ndarray.__new__(
+                cls, global_shape, dtype=dtype, memptr=memptr, strides=strides
+            )
+            if memptr is None and isinstance(val, Number):
+                obj.fill(val)
+            obj._rank = rank
+            obj._p0 = None
+            return obj
+
+        subcomm = cls.getSubcomm(subcomm, global_shape, rank, alignment)
+        p0, subshape = cls.getPencil(subcomm, rank, global_shape, alignment)
+
+        obj = cls.xp.ndarray.__new__(cls, subshape, dtype=dtype, memptr=memptr)
+        if memptr is None and isinstance(val, Number):
+            obj.fill(val)
+        obj._p0 = p0
+        obj._rank = rank
+        return obj
+
+    def get(self, *args, **kwargs):
+        """Untangle inheritance conflicts"""
+        if any(isinstance(me, tuple) for me in args):
+            return DistArrayBase.get(self, *args, **kwargs)
+        else:
+            return cp.ndarray.get(self, *args, **kwargs)
+
+    @property
+    def asnumpy(self):
+        """Copy the array to CPU"""
+        return self.get()
+
+    @property
+    def v(self):
+        """Return local ``self`` array as an ``ndarray`` object"""
+        return cp.ndarray.__getitem__(self, slice(0, -1, 1))
diff --git a/tests/test_darrayCuPy.py b/tests/test_darrayCuPy.py
new file mode 100644
index 0000000..0fd8d08
--- /dev/null
+++ b/tests/test_darrayCuPy.py
@@ -0,0 +1,156 @@
+import cupy as cp
+from mpi4py import MPI
+from mpi4py_fft import DistArrayCuPy, newDistArray, PFFT
+from mpi4py_fft.pencil import Subcomm
+
+comm = MPI.COMM_WORLD
+
+
+def test_1Darray():
+    N = (8,)
+    z = DistArrayCuPy(N, val=2)
+    assert z[0] == 2
+    assert z.shape == N
+
+
+def test_2Darray():
+    N = (8, 8)
+    for subcomm in ((0, 1), (1, 0), None, Subcomm(comm, (0, 1))):
+        for rank in (0, 1, 2):
+            M = (2,) * rank + N
+            alignment = None
+            if subcomm is None and rank == 1:
+                alignment = 1
+            a = DistArrayCuPy(M, subcomm=subcomm, val=1, rank=rank, alignment=alignment)
+            assert a.rank == rank
+            assert a.global_shape == M
+            _ = a.substart
+            c = a.subcomm
+            z = a.commsizes
+            _ = a.pencil
+            assert cp.prod(cp.array(z)) == comm.Get_size()
+            if rank > 0:
+                a0 = a[0]
+                assert isinstance(a0, DistArrayCuPy)
+                assert a0.rank == rank - 1
+            aa = a.v
+            assert isinstance(aa, cp.ndarray)
+            try:
+                k = a.get((0,) * rank + (0, slice(None)))
+                if comm.Get_rank() == 0:
+                    assert len(k) == N[1]
+                    assert cp.sum(k) == N[1]
+                k = a.get((0,) * rank + (slice(None), 0))
+                if comm.Get_rank() == 0:
+                    assert len(k) == N[0]
+                    assert cp.sum(k) == N[0]
+            except ModuleNotFoundError:
+                pass
+            _ = a.local_slice()
+            newaxis = (a.alignment + 1) % 2
+            _ = a.get_pencil_and_transfer(newaxis)
+            a[:] = MPI.COMM_WORLD.Get_rank()
+            b = a.redistribute(newaxis)
+            a = b.redistribute(out=a)
+            a = b.redistribute(a.alignment, out=a)
+            s0 = MPI.COMM_WORLD.reduce(cp.linalg.norm(a) ** 2)
+            s1 = MPI.COMM_WORLD.reduce(cp.linalg.norm(b) ** 2)
+            if MPI.COMM_WORLD.Get_rank() == 0:
+                assert abs(s0 - s1) < 1e-1
+            c = a.redistribute(a.alignment)
+            assert c is a
+
+
+def test_3Darray():
+    N = (8, 8, 8)
+    for subcomm in (
+        (0, 0, 1),
+        (0, 1, 0),
+        (1, 0, 0),
+        (0, 1, 1),
+        (1, 0, 1),
+        (1, 1, 0),
+        None,
+        Subcomm(comm, (0, 0, 1)),
+    ):
+        for rank in (0, 1, 2):
+            M = (3,) * rank + N
+            alignment = None
+            if subcomm is None and rank == 1:
+                alignment = 2
+            a = DistArrayCuPy(M, subcomm=subcomm, val=1, rank=rank, alignment=alignment)
+            assert a.rank == rank
+            assert a.global_shape == M
+            _ = a.substart
+            _ = a.subcomm
+            z = a.commsizes
+            _ = a.pencil
+            assert cp.prod(cp.array(z)) == comm.Get_size()
+            if rank > 0:
+                a0 = a[0]
+                assert isinstance(a0, DistArrayCuPy)
+                assert a0.rank == rank - 1
+            if rank == 2:
+                a0 = a[0, 1]
+                assert isinstance(a0, DistArrayCuPy)
+                assert a0.rank == 0
+            aa = a.v
+            assert isinstance(aa, cp.ndarray)
+            try:
+                k = a.get((0,) * rank + (0, 0, slice(None)))
+                if comm.Get_rank() == 0:
+                    assert len(k) == N[2]
+                    assert cp.sum(k) == N[2]
+                k = a.get((0,) * rank + (slice(None), 0, 0))
+                if comm.Get_rank() == 0:
+                    assert len(k) == N[0]
+                    assert cp.sum(k) == N[0]
+            except ModuleNotFoundError:
+                pass
+            _ = a.local_slice()
+            newaxis = (a.alignment + 1) % 3
+            _ = a.get_pencil_and_transfer(newaxis)
+            a[:] = MPI.COMM_WORLD.Get_rank()
+            b = a.redistribute(newaxis)
+            a = b.redistribute(out=a)
+            s0 = MPI.COMM_WORLD.reduce(cp.linalg.norm(a) ** 2)
+            s1 = MPI.COMM_WORLD.reduce(cp.linalg.norm(b) ** 2)
+            if MPI.COMM_WORLD.Get_rank() == 0:
+                assert abs(s0 - s1) < 1e-1
+
+
+def test_newDistArray():
+    N = (8, 8, 8)
+    pfft = PFFT(MPI.COMM_WORLD, N)
+    for forward_output in (True, False):
+        for view in (True, False):
+            for rank in (0, 1, 2):
+                a = newDistArray(
+                    pfft,
+                    forward_output=forward_output,
+                    rank=rank,
+                    view=view,
+                    useCuPy=True,
+                )
+                if view is False:
+                    assert isinstance(a, DistArrayCuPy)
+                    assert a.rank == rank
+                    if rank == 0:
+                        qfft = PFFT(MPI.COMM_WORLD, darray=a)
+                    elif rank == 1:
+                        qfft = PFFT(MPI.COMM_WORLD, darray=a[0])
+                    else:
+                        qfft = PFFT(MPI.COMM_WORLD, darray=a[0, 0])
+                    qfft.destroy()
+
+                else:
+                    assert isinstance(a, cp.ndarray)
+                    assert a.base.rank == rank
+    pfft.destroy()
+
+
+if __name__ == "__main__":
+    test_1Darray()
+    test_2Darray()
+    test_3Darray()
+    test_newDistArray()