Skip to content

Commit

Permalink
add coo matmul
Browse files Browse the repository at this point in the history
  • Loading branch information
yangguohao committed Dec 18, 2023
1 parent 6b66ab5 commit b09446e
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 75 deletions.
29 changes: 29 additions & 0 deletions paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,25 @@ void MatmulCsrCsrGradKernel(const Context& dev_ctx,
#endif
}

template <typename T, typename Context>
void MatmulCooCooGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const SparseCooTensor& y,
const SparseCooTensor& dout,
SparseCooTensor* dx,
SparseCooTensor* dy) {
// 'cusparseSPGEMM' only support CSR now, so use COO->CSR->COO,
SparseCsrTensor x_csr = CooToCsr<T, Context>(dev_ctx, x);
SparseCsrTensor y_csr = CooToCsr<T, Context>(dev_ctx, y);
SparseCsrTensor dout_csr = CooToCsr<T, Context>(dev_ctx, dout);
SparseCsrTensor dx_csr, dy_csr;
dx_csr.set_dims(dx->dims());
dy_csr.set_dims(dy->dims());
MatmulCsrCsrGradKernel<T>(dev_ctx, x_csr, y_csr, dout_csr, &dx_csr, &dy_csr);
CsrToCooKernel<T>(dev_ctx, dx_csr, dx);
CsrToCooKernel<T>(dev_ctx, dy_csr, dy);
}

template <typename T, typename Context>
void MaskedMatmulCsrGradKernel(const Context& dev_ctx,
const DenseTensor& x,
Expand Down Expand Up @@ -271,3 +290,13 @@ PD_REGISTER_KERNEL(matmul_csr_csr_grad,
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_CSR);
}

PD_REGISTER_KERNEL(matmul_coo_coo_grad,
GPU,
ALL_LAYOUT,
phi::sparse::MatmulCooCooGradKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
45 changes: 25 additions & 20 deletions paddle/phi/kernels/sparse/gpu/matmul_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/math_function_impl.h"
#include "paddle/phi/kernels/funcs/sparse/sparse_blas.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"

namespace phi {
namespace sparse {
Expand Down Expand Up @@ -240,7 +241,6 @@ void MatmulKernelImpl(const Context& dev_ctx,
"The shape of Input(x) and Input(y) is not suitable for matmul "
"opetation, x_dim[-1] must be eaqual to y_dim[-2]."));

// InferMeta of DenseTensor 'out'
std::vector<int64_t> out_dim_vec(ydim_vec);
out_dim_vec[y_ndims - 2] = xdim_vec[x_ndims - 2];
out_dim_vec[y_ndims - 1] = ydim_vec[y_ndims - 1];
Expand All @@ -251,9 +251,9 @@ void MatmulKernelImpl(const Context& dev_ctx,
sparse_blas.SPMM(
false, false, static_cast<T>(1), x, y, static_cast<T>(0), out);
#else
PADDLE_THROW(
phi::errors::Unimplemented("forward of 'sparse.matmul' use cusparseSpMM, "
"which is supported from CUDA 11.0"));
PADDLE_THROW(phi::errors::Unimplemented(
"forward of 'sparse.matmul' use cusparseSpGEMM, "
"which is supported from CUDA 11.0"));
#endif
}

Expand All @@ -265,13 +265,18 @@ void MatmulCsrCsrKernel(const Context& dev_ctx,
MatmulKernelImpl<T>(dev_ctx, x, y, out);
}

// template <typename T, typename Context>
// void MatmulCooCooKernel(const Context& dev_ctx,
// const SparseCooTensor& x,
// const SparseCooTensor& y,
// SparseCooTensor* out) {
// MatmulKernelImpl<T>(dev_ctx, x, y, out);
// }
template <typename T, typename Context>
void MatmulCooCooKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const SparseCooTensor& y,
SparseCooTensor* out) {
SparseCsrTensor x_csr = CooToCsr<T, Context>(dev_ctx, x);
SparseCsrTensor y_csr = CooToCsr<T, Context>(dev_ctx, y);
SparseCsrTensor out_csr;
out_csr.set_dims(out->dims());
MatmulKernelImpl<T>(dev_ctx, x_csr, y_csr, &out_csr);
CsrToCooKernel<T>(dev_ctx, out_csr, out);
}

} // namespace sparse
} // namespace phi
Expand Down Expand Up @@ -311,12 +316,12 @@ PD_REGISTER_KERNEL(matmul_csr_csr,
kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_CSR);
}

// PD_REGISTER_KERNEL(matmul_coo_coo,
// GPU,
// ALL_LAYOUT,
// phi::sparse::MatmulCooCooKernel,
// float,
// double) {
// kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
// kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO);
// }
PD_REGISTER_KERNEL(matmul_coo_coo,
GPU,
ALL_LAYOUT,
phi::sparse::MatmulCooCooKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
114 changes: 59 additions & 55 deletions test/legacy_test/test_sparse_matmul_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,65 +226,69 @@ def check_result(self, x_shape, y_shape):
def test_matmul_2d(self):
self.check_result([16, 12], [12, 10])

@unittest.skipIf(
not paddle.is_compiled_with_cuda() or get_cuda_version() < 11000,
"only support cuda>=11.0",
)
def test_matmul_3d(self):
self.check_result([2, 16, 12], [2, 12, 10])


#
# class TestMatmulCOO(unittest.TestCase):
# # x: coo sparse, y: coo sparse, out: coo sparse
# def check_result(self, x_shape, y_shape):
# mask = paddle.randint(0, 2, x_shape)
# origin_x = paddle.rand(x_shape) * mask
# origin_y = paddle.rand(y_shape)
#
# dense_x = origin_x.detach()
# dense_x.stop_gradient = False
# dense_y = origin_y.detach()
# dense_y.stop_gradient = False
# dense_out = paddle.matmul(dense_x, dense_y)
#
# sp_x = origin_x.detach().to_sparse_coo(len(x_shape))
#
# # only support 32-bit index.
# sp_x_indices = paddle.cast(sp_x.indices(), "int32")
# sp_x = paddle.sparse.sparse_coo_tensor(
# sp_x_indices, sp_x.values(), sp_x.shape
# )
#
# sp_y = origin_y.detach().to_sparse_coo(len(y_shape))
# # only support 32-bit index.
# sp_y_indices = paddle.cast(sp_y.indices(), "int32")
# sp_y = paddle.sparse.sparse_coo_tensor(
# sp_y_indices, sp_y.values(), sp_y.shape
# )
#
# sp_x.stop_gradient = False
# sp_y.stop_gradient = False
#
# sp_out = paddle.sparse.matmul(sp_x, sp_y)
# np.testing.assert_allclose(
# sp_out.to_dense().numpy(), dense_out.numpy(), rtol=1e-05
# )
#
# if get_cuda_version() >= 11000:
# dense_out.backward()
# sp_out.backward()
# np.testing.assert_allclose(
# sp_x.grad.to_dense().numpy(),
# dense_x.grad.numpy(),
# rtol=1e-05,
# )
# np.testing.assert_allclose(
# sp_y.grad.to_dense().numpy(), dense_y.grad.numpy(), rtol=1e-05
# )
#
# @unittest.skipIf(
# not paddle.is_compiled_with_cuda() or get_cuda_version() < 11000,
# "only support cuda>=11.0",
# )
# def test_matmul_2d(self):
# self.check_result([16, 12], [12, 10])
class TestMatmulCOO(unittest.TestCase):
# x: coo sparse, y: coo sparse, out: coo sparse
def check_result(self, x_shape, y_shape):
mask = paddle.randint(0, 2, x_shape)
origin_x = paddle.rand(x_shape) * mask
origin_y = paddle.rand(y_shape)

dense_x = origin_x.detach()
dense_x.stop_gradient = False
dense_y = origin_y.detach()
dense_y.stop_gradient = False
dense_out = paddle.matmul(dense_x, dense_y)

sp_x = origin_x.detach().to_sparse_coo(len(x_shape))

# only support 32-bit index.
sp_x_indices = paddle.cast(sp_x.indices(), "int32")
sp_x = paddle.sparse.sparse_coo_tensor(
sp_x_indices, sp_x.values(), sp_x.shape
)

sp_y = origin_y.detach().to_sparse_coo(len(y_shape))
# only support 32-bit index.
sp_y_indices = paddle.cast(sp_y.indices(), "int32")
sp_y = paddle.sparse.sparse_coo_tensor(
sp_y_indices, sp_y.values(), sp_y.shape
)

sp_x.stop_gradient = False
sp_y.stop_gradient = False

sp_out = paddle.sparse.matmul(sp_x, sp_y)
np.testing.assert_allclose(
sp_out.to_dense().numpy(), dense_out.numpy(), rtol=1e-05
)

if get_cuda_version() >= 11000:
dense_out.backward()
sp_out.backward()
np.testing.assert_allclose(
sp_x.grad.to_dense().numpy(),
dense_x.grad.numpy(),
rtol=1e-05,
)
np.testing.assert_allclose(
sp_y.grad.to_dense().numpy(), dense_y.grad.numpy(), rtol=1e-05
)

@unittest.skipIf(
not paddle.is_compiled_with_cuda() or get_cuda_version() < 11000,
"only support cuda>=11.0",
)
def test_matmul_2d(self):
self.check_result([16, 12], [12, 10])


if __name__ == "__main__":
unittest.main()

0 comments on commit b09446e

Please sign in to comment.