Skip to content

Commit

Permalink
use atomic operations as in the corresponding getrf
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-frbg authored Mar 28, 2024
1 parent 9af2a9d commit 2dda40d
Showing 1 changed file with 37 additions and 8 deletions.
45 changes: 37 additions & 8 deletions lapack/potrf/potrf_parallel.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,14 @@ typedef struct {
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
} job_t;

#ifdef HAVE_C11
#define atomic_load_long(p) __atomic_load_n(p, __ATOMIC_RELAXED)
#define atomic_store_long(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
#else
#define atomic_load_long(p) (BLASLONG)(*(volatile BLASLONG*)(p))
#define atomic_store_long(p, v) (*(volatile BLASLONG *)(p)) = (v)
#endif


#ifndef KERNEL_OPERATION
#ifndef COMPLEX
Expand Down Expand Up @@ -233,14 +241,18 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}

#ifndef LOWER
MB;
for (i = 0; i <= mypos; i++)
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
atomic_store_long(&job[mypos].working[i][CACHE_LINE_SIZE * bufferside], (BLASLONG)buffer[bufferside]);
// job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
#else
MB
for (i = mypos; i < args -> nthreads; i++)
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
atomic_store_long(&job[mypos].working[i][CACHE_LINE_SIZE * bufferside], (BLASLONG)buffer[bufferside]);
// job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
#endif

WMB;
// WMB;
}

min_i = m_to - m_from;
Expand Down Expand Up @@ -271,14 +283,21 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {

/* thread has to wait */
if (current != mypos) while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
if (current != mypos)
do {
jw = atomic_load_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside]);
} while (jw == 0);
MB;

//while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};

KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), k, alpha,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, lda, m_from, xxx);

if (m_from + min_i >= m_to) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
atomic_store_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside], job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0);
// job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
WMB;
}
}
Expand Down Expand Up @@ -323,7 +342,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
c, lda, is, xxx);

if (is + min_i >= m_to) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
atomic_store_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside], job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0);
// job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
WMB;
}
}
Expand All @@ -337,9 +357,18 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,

for (i = 0; i < args -> nthreads; i++) {
if (i != mypos) {
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
for (xxx = 0; xxx < DIVIDE_RATE; xxx++)
#if 1
{
do {
jw = atomic_load_long(&job[mypos].working[i][CACHE_LINE_SIZE * xxx]);
} while (jw);
MB;
}
#else
while (job[mypos].working[i][CACHE_LINE_SIZE * xxx] ) {YIELDING;};
}
#endif
// }
}
}

Expand Down

0 comments on commit 2dda40d

Please sign in to comment.