Skip to content

Commit

Permalink
Implement noinit allocation for dense and distributed2d
Browse files Browse the repository at this point in the history
  • Loading branch information
jeanlucf22 committed Aug 23, 2023
1 parent 906e286 commit 7bfdada
Show file tree
Hide file tree
Showing 11 changed files with 175 additions and 17 deletions.
11 changes: 6 additions & 5 deletions src/C-interface/bml_allocate.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,16 +276,17 @@ bml_noinit_rectangular_matrix(
matrix_dimension.N_rows);
#ifdef BML_USE_MPI
if (distrib_mode == distributed)
return bml_zero_matrix_distributed2d(matrix_type, matrix_precision,
matrix_dimension.N_rows,
matrix_dimension.N_nz_max);
return bml_noinit_matrix_distributed2d(matrix_type, matrix_precision,
matrix_dimension.N_rows,
matrix_dimension.N_nz_max);
else
#endif
switch (matrix_type)
{
case dense:
return bml_zero_matrix_dense(matrix_precision,
matrix_dimension, distrib_mode);
return bml_noinit_matrix_dense(matrix_precision,
matrix_dimension,
distrib_mode);
break;
case ellpack:
return bml_noinit_matrix_ellpack(matrix_precision,
Expand Down
43 changes: 43 additions & 0 deletions src/C-interface/dense/bml_allocate_dense.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,49 @@ bml_clear_dense(
}
}

/** Allocate an uninitialized matrix.
*
* \ingroup allocate_group
*
* \param matrix_precision The precision of the matrix. The default
* is double precision.
* \param matrix_dimension The matrix size.
* \param distrib_mode The distribution mode.
* \return The matrix.
*/
bml_matrix_dense_t *
bml_noinit_matrix_dense(
bml_matrix_precision_t matrix_precision,
bml_matrix_dimension_t matrix_dimension,
bml_distribution_mode_t distrib_mode)
{
switch (matrix_precision)
{
case single_real:
return bml_noinit_matrix_dense_single_real(matrix_dimension,
distrib_mode);
break;
case double_real:
return bml_noinit_matrix_dense_double_real(matrix_dimension,
distrib_mode);
break;
#ifdef BML_COMPLEX
case single_complex:
return bml_noinit_matrix_dense_single_complex(matrix_dimension,
distrib_mode);
break;
case double_complex:
return bml_noinit_matrix_dense_double_complex(matrix_dimension,
distrib_mode);
break;
#endif
default:
LOG_ERROR("unknown precision (%d)\n", matrix_precision);
break;
}
return NULL;
}

/** Allocate the zero matrix.
*
* Note that the matrix \f$ a \f$ will be newly allocated. If it is
Expand Down
25 changes: 25 additions & 0 deletions src/C-interface/dense/bml_allocate_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,31 @@ void bml_clear_dense_single_complex(
void bml_clear_dense_double_complex(
bml_matrix_dense_t * A);

bml_matrix_dense_t *bml_noinit_matrix_dense(
bml_matrix_precision_t matrix_precision,
bml_matrix_dimension_t matrix_dimension,
bml_distribution_mode_t distrib_mode);

bml_matrix_dense_t *bml_noinit_matrix_dense_single_real(
bml_matrix_dimension_t matrix_dimension,
bml_distribution_mode_t distrib_mode);

bml_matrix_dense_t *bml_noinit_matrix_dense_double_real(
bml_matrix_dimension_t matrix_dimension,
bml_distribution_mode_t distrib_mode);

bml_matrix_dense_t
* bml_noinit_matrix_dense_single_complex(bml_matrix_dimension_t
matrix_dimension,
bml_distribution_mode_t
distrib_mode);

bml_matrix_dense_t
* bml_noinit_matrix_dense_double_complex(bml_matrix_dimension_t
matrix_dimension,
bml_distribution_mode_t
distrib_mode);

bml_matrix_dense_t *bml_zero_matrix_dense(
bml_matrix_precision_t matrix_precision,
bml_matrix_dimension_t matrix_dimension,
Expand Down
53 changes: 53 additions & 0 deletions src/C-interface/dense/bml_allocate_dense_typed.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,59 @@ void TYPED_FUNC(
#endif
}

/** Allocate a matrix with uninitialized values.
*
* \ingroup allocate_group
*
* \param matrix_precision The precision of the matrix. The default
* is double precision.
* \param matrix_dimension The matrix size.
* \param distrib_mode The distribution mode.
* \return The matrix.
*/
bml_matrix_dense_t *TYPED_FUNC(
bml_noinit_matrix_dense) (
bml_matrix_dimension_t matrix_dimension,
bml_distribution_mode_t distrib_mode)
{
bml_matrix_dense_t *A =
bml_noinit_allocate_memory(sizeof(bml_matrix_dense_t));
A->matrix_type = dense;
A->matrix_precision = MATRIX_PRECISION;
A->N = matrix_dimension.N_rows;
A->distribution_mode = distrib_mode;
#ifdef BML_USE_MAGMA
A->ld = magma_roundup(matrix_dimension.N_rows, 32);
int device;
magma_getdevice(&device);
bml_queue_create(device);
magma_int_t ret = MAGMA(malloc) ((MAGMA_T **) & A->matrix,
A->ld * matrix_dimension.N_rows);
assert(ret == MAGMA_SUCCESS);
#else
A->ld = matrix_dimension.N_rows;
A->matrix =
bml_noinit_allocate_memory(sizeof(REAL_T) * matrix_dimension.N_rows *
matrix_dimension.N_rows);
#ifdef MKL_GPU
int sizea = A->ld * A->ld;
int dnum = 0;

REAL_T *A_matrix = (REAL_T *) A->matrix;
// allocate and offload the matrix to GPU
#pragma omp target enter data map(alloc:A_matrix[0:sizea])
#endif // end of MKL_GPU

#endif
A->domain =
bml_default_domain(matrix_dimension.N_rows, matrix_dimension.N_rows,
distrib_mode);
A->domain2 =
bml_default_domain(matrix_dimension.N_rows, matrix_dimension.N_rows,
distrib_mode);
return A;
}

/** Allocate the zero matrix.
*
* Note that the matrix \f$ a \f$ will be newly allocated. If it is
Expand Down
4 changes: 2 additions & 2 deletions src/C-interface/dense/bml_copy_dense_typed.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ bml_matrix_dense_t *TYPED_FUNC(
{
bml_matrix_dimension_t matrix_dimension = { A->N, A->N, A->N };
bml_matrix_dense_t *B =
TYPED_FUNC(bml_zero_matrix_dense) (matrix_dimension,
A->distribution_mode);
TYPED_FUNC(bml_noinit_matrix_dense) (matrix_dimension,
A->distribution_mode);
#ifdef BML_USE_MAGMA
MAGMA(copymatrix) (A->N, A->N, A->matrix, A->ld,
B->matrix, B->ld, bml_queue());
Expand Down
2 changes: 1 addition & 1 deletion src/C-interface/dense/bml_transpose_dense_typed.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ bml_matrix_dense_t *TYPED_FUNC(

bml_matrix_dimension_t matrix_dimension = { A->N, A->N, A->N };
bml_matrix_dense_t *B =
TYPED_FUNC(bml_zero_matrix_dense) (matrix_dimension,
TYPED_FUNC(bml_noinit_matrix_dense) (matrix_dimension,
A->distribution_mode);
REAL_T *A_matrix = A->matrix;
REAL_T *B_matrix = B->matrix;
Expand Down
30 changes: 30 additions & 0 deletions src/C-interface/distributed2d/bml_allocate_distributed2d.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,36 @@ bml_clear_distributed2d(
bml_clear(A->matrix);
}

/** Allocate uninitialized matrix.
*
* \ingroup allocate_group
*
* \param matrix_precision The precision of the matrix. The default
* is double precision.
* \param matrix_dimension The matrix size.
* \return The matrix.
*/
bml_matrix_distributed2d_t *
bml_noinit_matrix_distributed2d(
bml_matrix_type_t matrix_type,
bml_matrix_precision_t matrix_precision,
int N,
int M)
{
assert(N > 0);
assert(M > 0);

bml_matrix_distributed2d_t *A =
bml_noinit_allocate_memory(sizeof(bml_matrix_distributed2d_t));
bml_setup_distributed2d(N, A);
A->M = M;
A->matrix_precision = matrix_precision;
int m = M / bml_sqrtint(A->ntasks);
A->matrix =
bml_noinit_matrix(matrix_type, matrix_precision, A->n, m, sequential);
return A;
}

/** Allocate the zero matrix.
*
* Note that the matrix \f$ a \f$ will be newly allocated. If it is
Expand Down
6 changes: 6 additions & 0 deletions src/C-interface/distributed2d/bml_allocate_distributed2d.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ void bml_clear_distributed2d_single_complex(
void bml_clear_distributed2d_double_complex(
bml_matrix_distributed2d_t * A);

bml_matrix_distributed2d_t *bml_noinit_matrix_distributed2d(
bml_matrix_type_t matrix_type,
bml_matrix_precision_t matrix_precision,
int N,
int M);

bml_matrix_distributed2d_t *bml_zero_matrix_distributed2d(
bml_matrix_type_t matrix_type,
bml_matrix_precision_t matrix_precision,
Expand Down
6 changes: 3 additions & 3 deletions src/C-interface/distributed2d/bml_copy_distributed2d.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ bml_copy_distributed2d_new(
assert(A->M > 0);

bml_matrix_distributed2d_t *B =
bml_zero_matrix_distributed2d(bml_get_type(A->matrix),
bml_get_precision(A->matrix), A->N,
A->M);
bml_noinit_matrix_distributed2d(bml_get_type(A->matrix),
bml_get_precision(A->matrix), A->N,
A->M);

// copy local block
bml_copy(A->matrix, B->matrix);
Expand Down
6 changes: 3 additions & 3 deletions src/C-interface/distributed2d/bml_scale_distributed2d.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ bml_scale_distributed2d_new(
assert(A->M > 0);

bml_matrix_distributed2d_t *B =
bml_zero_matrix_distributed2d(bml_get_type(A->matrix),
bml_get_precision(A->matrix), A->N,
A->M);
bml_noinit_matrix_distributed2d(bml_get_type(A->matrix),
bml_get_precision(A->matrix), A->N,
A->M);

bml_scale(scale_factor, A->matrix, B->matrix);

Expand Down
6 changes: 3 additions & 3 deletions src/C-interface/distributed2d/bml_threshold_distributed2d.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ bml_matrix_distributed2d_t
assert(A->M > 0);

bml_matrix_distributed2d_t *B =
bml_zero_matrix_distributed2d(bml_get_type(A->matrix),
bml_get_precision(A->matrix), A->N,
A->M);
bml_noinit_matrix_distributed2d(bml_get_type(A->matrix),
bml_get_precision(A->matrix), A->N,
A->M);
// copy local block
bml_copy(A->matrix, B->matrix);

Expand Down

0 comments on commit 7bfdada

Please sign in to comment.