From 92ddaa99b52687ef09452aaa7cd224746906abd2 Mon Sep 17 00:00:00 2001 From: Daniel Osei-Kuffuor Date: Wed, 23 Aug 2023 12:03:08 -0700 Subject: [PATCH] Some cleanup and minor modifications. * Cleaned up old transpose code that was commented out. * Updated header files to include TYPED helper functions. --- src/C-interface/csr/bml_transpose_csr_typed.c | 152 +----------------- .../ellblock/bml_transpose_ellblock.h | 24 +++ .../ellblock/bml_transpose_ellblock_typed.c | 91 +---------- .../ellpack/bml_transpose_ellpack.h | 48 ++++++ .../ellpack/bml_transpose_ellpack_typed.c | 32 +++- 5 files changed, 98 insertions(+), 249 deletions(-) diff --git a/src/C-interface/csr/bml_transpose_csr_typed.c b/src/C-interface/csr/bml_transpose_csr_typed.c index 8e5766ec2..b2f55f7e7 100644 --- a/src/C-interface/csr/bml_transpose_csr_typed.c +++ b/src/C-interface/csr/bml_transpose_csr_typed.c @@ -77,156 +77,7 @@ bml_matrix_csr_t *TYPED_FUNC( return B; } -#if 0 -/** swap row entries in position ipos and jpos. - * - * column indexes and non-zero entries are swapped - * - * \ingroup transpose_group - * - * \param A The matrix. - */ -void TYPED_FUNC( - csr_swap_row_entries) ( - csr_sparse_row_t * row, - const int ipos, - const int jpos) -{ - if (ipos == jpos) - return; - - REAL_T *vals = (REAL_T *) row->vals_; - int *cols = row->cols_; - REAL_T tmp = vals[ipos]; - int itmp = cols[ipos]; - /* swap */ - vals[ipos] = vals[jpos]; - vals[jpos] = tmp; - cols[ipos] = cols[jpos]; - cols[jpos] = itmp; -} - -/** Transpose a matrix in place. - * - * \ingroup transpose_group - * - * \param A The matrix to be transposeed - * \return the transposed A - */ -void TYPED_FUNC( - bml_transpose_csr) ( - bml_matrix_csr_t * A) -{ - int N = A->N_; - int nz_t[N]; - memset(nz_t, 0, sizeof(int) * N); -//#pragma omp parallel for shared(N, M, A_value, A_index, A_nnz) - // symmetric and nonsymmetric contributions from upper triangular part - for (int i = 0; i < N; i++) - { - int *icols = A->data_[i]->cols_; - REAL_T *ivals = (REAL_T *) A->data_[i]->vals_; - const int innz = A->data_[i]->NNZ_; - - int ipos = innz - 1; - while (ipos >= nz_t[i]) - { - const int j = icols[ipos]; - - if (j > i) - { - int *jcols = A->data_[j]->cols_; - REAL_T *jvals = (REAL_T *) A->data_[j]->vals_; - const int jnnz = A->data_[j]->NNZ_; - const int jstart = nz_t[j]; - int found = 0; - /* search for symmetric position */ - for (int jpos = jstart; jpos < jnnz; jpos++) - { - const int k = jcols[jpos]; - if (k == i) - { - /* symmetric position found so just swap entries */ - REAL_T tmp = ivals[ipos]; - ivals[ipos] = jvals[jpos]; - jvals[jpos] = tmp; - /* swap position in row i */ - TYPED_FUNC(csr_swap_row_entries) (A->data_[i], ipos, - nz_t[i]); - /* swap position in row j */ - TYPED_FUNC(csr_swap_row_entries) (A->data_[j], jpos, - nz_t[j]); - /* update nonzero count */ - nz_t[i]++; - nz_t[j]++; - found = 1; - break; - } - } - if (!found) - { - /* nonsymmetric entry. Insert entry and swap position */ - TYPED_FUNC(csr_set_row_element_new) (A->data_[j], i, - &ivals[ipos]); - /* swap position in row j */ - const int nzpos = csr_row_NNZ(A->data_[j]) - 1; - TYPED_FUNC(csr_swap_row_entries) (A->data_[j], nzpos, - nz_t[j]); - /* update nonzero count */ - nz_t[j]++; - /* update ipos */ - ipos--; - } - } - else - { - if (j == i) - { - /* swap position in row i */ - TYPED_FUNC(csr_swap_row_entries) (A->data_[i], ipos, - nz_t[i]); - /* update nonzero count */ - nz_t[i]++; - } - else - { - ipos--; - } - } - } - } - // nonsymmetric contribution from lower triangular part - for (int i = 0; i < N; i++) - { - int *icols = A->data_[i]->cols_; - REAL_T *ivals = (REAL_T *) A->data_[i]->vals_; - const int innz = A->data_[i]->NNZ_; - for (int ipos = nz_t[i]; ipos < innz; ipos++) - { - const int j = icols[ipos]; - if (j < i) - { - // insert entry in row j - TYPED_FUNC(csr_set_row_element_new) (A->data_[j], i, - &ivals[ipos]); - /* swap position in row j */ - const int nzpos = csr_row_NNZ(A->data_[j]) - 1; - TYPED_FUNC(csr_swap_row_entries) (A->data_[j], nzpos, - nz_t[j]); - /* update nonzero count */ - nz_t[j]++; - } - } - } - /* update nonzero row counts */ -#pragma omp parallel for shared(N) - for (int i = 0; i < N; i++) - { - csr_row_NNZ(A->data_[i]) = nz_t[i]; - } -} -#else /** swap row entries in position ipos and jpos. * * column indexes and non-zero entries are swapped @@ -352,5 +203,4 @@ void TYPED_FUNC( } } } -} -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/src/C-interface/ellblock/bml_transpose_ellblock.h b/src/C-interface/ellblock/bml_transpose_ellblock.h index 474468e1e..de92b2745 100644 --- a/src/C-interface/ellblock/bml_transpose_ellblock.h +++ b/src/C-interface/ellblock/bml_transpose_ellblock.h @@ -3,6 +3,30 @@ #include "bml_types_ellblock.h" +void ellblock_swap_block_row_entries_single_real ( + bml_matrix_ellblock_t * A, + const int block_row, + const int iposb, + const int jposb); + +void ellblock_swap_block_row_entries_double_real ( + bml_matrix_ellblock_t * A, + const int block_row, + const int iposb, + const int jposb); + +void ellblock_swap_block_row_entries_single_complex ( + bml_matrix_ellblock_t * A, + const int block_row, + const int iposb, + const int jposb); + +void ellblock_swap_block_row_entries_double_complex ( + bml_matrix_ellblock_t * A, + const int block_row, + const int iposb, + const int jposb); + bml_matrix_ellblock_t *bml_transpose_new_ellblock( bml_matrix_ellblock_t * A); diff --git a/src/C-interface/ellblock/bml_transpose_ellblock_typed.c b/src/C-interface/ellblock/bml_transpose_ellblock_typed.c index 60711f2f9..e4ea7acec 100644 --- a/src/C-interface/ellblock/bml_transpose_ellblock_typed.c +++ b/src/C-interface/ellblock/bml_transpose_ellblock_typed.c @@ -105,7 +105,6 @@ void TYPED_FUNC( A_indexb[ROWMAJOR(block_row, jposb, NB, MB)] = itmp; } -#if 1 /** Transpose a matrix in place. * * \ingroup transpose_group @@ -268,92 +267,4 @@ void TYPED_FUNC( } } } -} -#else -/** Transpose a matrix in place. - * - * \ingroup transpose_group - * - * \param A The matrix to be transposeed - * \return the transposed A - */ -void TYPED_FUNC( - bml_transpose_ellblock) ( - bml_matrix_ellblock_t * A) -{ - int NB = A->NB; - int MB = A->MB; - - REAL_T **A_ptr_value = (REAL_T **) A->ptr_value; - int *A_indexb = A->indexb; - int *A_nnzb = A->nnzb; - int *bsize = A->bsize; - - for (int ib = 0; ib < NB; ib++) - { - for (int jp = A_nnzb[ib] - 1; jp >= 0; jp--) - { - int indl = ROWMAJOR(ib, jp, NB, MB); - int jb = A_indexb[indl]; - printf("ib %d: jp = %d\n", ib, jb); - if (jb >= ib) - { - int exchangeDone = 0; - for (int kp = 0; kp < A_nnzb[jb]; kp++) - { - int indr = ROWMAJOR(jb, kp, NB, MB); - if (A_indexb[indr] == ib) - { - REAL_T *A_value_l = A_ptr_value[indl]; - REAL_T *A_value_r = A_ptr_value[indr]; - if (ib == jb) - { - for (int ii = 0; ii < bsize[ib]; ii++) - for (int jj = 0; jj < ii; jj++) - { - int il = ROWMAJOR(ii, jj, bsize[ib], - bsize[jb]); - int ir = ROWMAJOR(jj, ii, bsize[jb], - bsize[ib]); - double tmp = A_value_r[il]; - A_value_l[il] = A_value_r[ir]; - A_value_l[ir] = tmp; - } - } - else - { - for (int ii = 0; ii < bsize[ib]; ii++) - for (int jj = 0; jj < bsize[jb]; jj++) - { - int il = ROWMAJOR(ii, jj, bsize[ib], - bsize[jb]); - int ir = ROWMAJOR(jj, ii, bsize[jb], - bsize[ib]); - double tmp = A_value_l[il]; - A_value_l[il] = A_value_r[ir]; - A_value_r[ir] = tmp; - } - } - exchangeDone = 1; - break; - } - } - assert(exchangeDone); - // If no match add to end of row -// if (!exchangeDone) -// { -// int jind = A_nnzb[ind]; -// { -// A_index[ROWMAJOR(ind, jind, N, M)] = i; -// A_value[ROWMAJOR(ind, jind, N, M)] = -// A_value[ROWMAJOR(i, j, N, M)]; -// A_nnz[ind]++; -// A_nnz[i]--; -// } -// } - } - } - } - -} -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/src/C-interface/ellpack/bml_transpose_ellpack.h b/src/C-interface/ellpack/bml_transpose_ellpack.h index 91fe370b9..413d06e58 100644 --- a/src/C-interface/ellpack/bml_transpose_ellpack.h +++ b/src/C-interface/ellpack/bml_transpose_ellpack.h @@ -3,6 +3,54 @@ #include "bml_types_ellpack.h" +void ellpack_swap_row_entries_single_real ( + bml_matrix_ellpack_t * A, + const int row, + const int ipos, + const int jpos); + +void ellpack_swap_row_entries_double_real ( + bml_matrix_ellpack_t * A, + const int row, + const int ipos, + const int jpos); + +void ellpack_swap_row_entries_single_complex ( + bml_matrix_ellpack_t * A, + const int row, + const int ipos, + const int jpos); + +void ellpack_swap_row_entries_double_complex ( + bml_matrix_ellpack_t * A, + const int row, + const int ipos, + const int jpos); + +void ellpack_transpose_left_looking_single_real ( + bml_matrix_ellpack_t * A); + +void ellpack_transpose_left_looking_double_real ( + bml_matrix_ellpack_t * A); + +void ellpack_transpose_left_looking_single_complex ( + bml_matrix_ellpack_t * A); + +void ellpack_transpose_left_looking_double_complex ( + bml_matrix_ellpack_t * A); + +void ellpack_transpose_right_looking_single_real ( + bml_matrix_ellpack_t * A); + +void ellpack_transpose_right_looking_double_real ( + bml_matrix_ellpack_t * A); + +void ellpack_transpose_right_looking_single_complex ( + bml_matrix_ellpack_t * A); + +void ellpack_transpose_right_looking_double_complex ( + bml_matrix_ellpack_t * A); + bml_matrix_ellpack_t *bml_transpose_new_ellpack( bml_matrix_ellpack_t * A); diff --git a/src/C-interface/ellpack/bml_transpose_ellpack_typed.c b/src/C-interface/ellpack/bml_transpose_ellpack_typed.c index a665b505e..1bc313c43 100644 --- a/src/C-interface/ellpack/bml_transpose_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_transpose_ellpack_typed.c @@ -171,16 +171,15 @@ void TYPED_FUNC( A_index[ROWMAJOR(row, jpos, N, M)] = itmp; } -#if 1 /** Transpose a matrix in place. - * Sequential backward-looking algorithm - good for non-structurally symmetric systems + * Sequential left-looking algorithm - good for structurally non-symmetric systems * \ingroup transpose_group * - * \param A The matrix to be transposeed + * \param A The matrix to be transposed * \return the transposed A */ void TYPED_FUNC( - bml_transpose_ellpack) ( + ellpack_transpose_left_looking) ( bml_matrix_ellpack_t * A) { int N = A->N; @@ -292,16 +291,16 @@ void TYPED_FUNC( #endif #endif } -#else + /** Transpose a matrix in place. - * Sequential forward-looking algorithm - good for structurally symmetric systems + * Sequential right-looking algorithm - good for structurally symmetric systems * \ingroup transpose_group * * \param A The matrix to be transposeed * \return the transposed A */ void TYPED_FUNC( - bml_transpose_ellpack) ( + ellpack_transpose_right_looking) ( bml_matrix_ellpack_t * A) { int N = A->N; @@ -406,7 +405,24 @@ void TYPED_FUNC( #endif #endif } -#endif + +/** Transpose a matrix in place. + * \ingroup transpose_group + * + * \param A The matrix to be transposed + * \return the transposed A + */ +void TYPED_FUNC( + bml_transpose_ellpack) ( + bml_matrix_ellpack_t * A) +{ + /* Call in-place transpose function. + * consider: ellpack_transpose_left_looking(A) - for structurally non-symmetric matrices + * consider: ellpack_transpose_right_looking(A) - for structurally symmetric matrices + * Either algorithm is sufficient for arbitrary sparse matrices + */ + TYPED_FUNC(ellpack_transpose_left_looking) (A); +} #if defined(BML_USE_CUSPARSE) /** cuSPARSE matrix transpose