Skip to content

Commit

Permalink
Merge branch 'develop' into arm64_cmake_small_matrix_opt
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-frbg authored Oct 3, 2024
2 parents 7087b0a + 68eefe6 commit b4495a8
Show file tree
Hide file tree
Showing 26 changed files with 602 additions and 146 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/loongarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ jobs:
- target: LOONGSON2K1000
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: LA64_GENERIC
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
- target: LA464
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
- target: LA264
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
- target: DYNAMIC_ARCH
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/loongarch64_clang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ jobs:
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
- target: LOONGSON2K1000
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: LA64_GENERIC
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
- target: LA464
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
- target: LA264
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
- target: DYNAMIC_ARCH
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC

Expand Down
4 changes: 4 additions & 0 deletions Makefile.install
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ endif
ifeq ($(INTERFACE64),1)
USE_64BITINT=1
endif
ifeq ($(USE_OPENMP),1)
FOMP_OPT:= -fopenmp
endif

PREFIX ?= /opt/OpenBLAS

Expand Down Expand Up @@ -178,6 +181,7 @@ endif
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)"
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
@echo 'omp_opt='$(FOMP_OPT) >> "$(PKGFILE)"
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(TARGET) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
@echo 'version='$(VERSION) >> "$(PKGFILE)"
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"
Expand Down
6 changes: 3 additions & 3 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,7 @@ endif
endif

ifeq ($(ARCH), loongarch64)
DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC
DYNAMIC_CORE = LA64_GENERIC LA264 LA464
endif

ifeq ($(ARCH), riscv64)
Expand Down Expand Up @@ -1720,8 +1720,8 @@ LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
endif
ifeq ($(F_COMPILER),FLANGNEW)
LAPACK_FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
override FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
LAPACK_FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 -mtune=% -mabi=% ,$(FFLAGS))
override FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 -mtune=% -mabi=% ,$(FFLAGS))
endif

LAPACK_CFLAGS = $(CFLAGS)
Expand Down
10 changes: 9 additions & 1 deletion TargetList.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,17 @@ x280
RISCV64_ZVL256B

11.LOONGARCH64:
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 are legacy names,
// and it is recommended to use the more standardized naming conventions
// LA64_GENERIC/LA264/LA464. You can still specify TARGET as
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 during compilation or runtime,
// and they will be internally relocated to LA64_GENERIC/LA264/LA464.
LOONGSONGENERIC
LOONGSON3R5
LOONGSON2K1000
LOONGSON3R5
LA64_GENERIC
LA264
LA464

12. Elbrus E2000:
E2K
Expand Down
8 changes: 4 additions & 4 deletions cblas.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,13 +407,13 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);

void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
float *c, OPENBLAS_CONST blasint cldc);
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
double *c, OPENBLAS_CONST blasint cldc);
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
float *c, OPENBLAS_CONST blasint cldc);
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
double *c, OPENBLAS_CONST blasint cldc);

void cblas_sgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
Expand Down
4 changes: 4 additions & 0 deletions cmake/arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ if (DYNAMIC_ARCH)
endif ()
endif ()

if (LOONGARCH64)
set(DYNAMIC_CORE LOONGSONGENERIC LOONGSON2K1000 LOONGSON3R5)
endif ()

if (EXISTS ${PROJECT_SOURCE_DIR}/config_kernel.h)
message (FATAL_ERROR "Your build directory contains a file config_kernel.h, probably from a previous compilation with make. This will conflict with the cmake compilation and cause strange compiler errors - please remove the file before trying again")
endif ()
Expand Down
26 changes: 15 additions & 11 deletions cmake/fc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -61,21 +61,25 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F
endif ()
if (LOONGARCH64)
if (BINARY64)
CHECK_C_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
if(COMPILER_SUPPORT_LP64D_ABI)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
else()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
endif ()
if (NOT CMAKE_Fortran_COMPILER_ID MATCHES "LLVMFlang.*")
CHECK_C_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
if(COMPILER_SUPPORT_LP64D_ABI)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
else()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
endif ()
endif ()
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
endif ()
else ()
CHECK_C_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
if(COMPILER_SUPPORT_ILP32D_ABI)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
else()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
if (NOT CMAKE_Fortran_COMPILER_ID MATCHES "LLVMFlang.*")
CHECK_C_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
if(COMPILER_SUPPORT_ILP32D_ABI)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
else()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
endif ()
endif ()
endif ()
endif ()
Expand Down
4 changes: 2 additions & 2 deletions cmake/openblas.pc.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ Name: OpenBLAS
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: @OpenBLAS_VERSION@
URL: https://github.com/OpenMathLib/OpenBLAS
Libs: @OpenMP_C_FLAGS@ -L${libdir} -l${libnameprefix}openblas${libnamesuffix}${libsuffix}
Cflags: -I${includedir}
Libs: -L${libdir} -l${libnameprefix}openblas${libnamesuffix}${libsuffix}
Cflags: -I${includedir} @OpenMP_C_FLAGS@
48 changes: 48 additions & 0 deletions cmake/prebuild.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,54 @@ endif ()
"#define DTB_DEFAULT_ENTRIES 128\n"
"#define DTB_SIZE 4096\n"
"#define L2_ASSOCIATIVE 4\n")
elseif ("${TCORE}" STREQUAL "LOONGSONGENERIC")
file(APPEND ${TARGET_CONF_TEMP}
"#define DTB_DEFAULT_ENTRIES 64\n")
set(SGEMM_UNROLL_M 2)
set(SGEMM_UNROLL_N 8)
set(DGEMM_UNROLL_M 2)
set(DGEMM_UNROLL_N 8)
set(CGEMM_UNROLL_M 1)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 1)
set(ZGEMM_UNROLL_N 4)
set(CGEMM3M_UNROLL_M 2)
set(CGEMM3M_UNROLL_N 8)
set(ZGEMM3M_UNROLL_M 2)
set(ZGEMM3M_UNROLL_N 8)
elseif ("${TCORE}" STREQUAL "LOONGSON2K1000")
file(APPEND ${TARGET_CONF_TEMP}
"#define DTB_DEFAULT_ENTRIES 64\n")
set(HAVE_LSX 1)
set(SGEMM_UNROLL_M 2)
set(SGEMM_UNROLL_N 8)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(CGEMM3M_UNROLL_M 2)
set(CGEMM3M_UNROLL_N 8)
set(ZGEMM3M_UNROLL_M 8)
set(ZGEMM3M_UNROLL_N 4)
elseif ("${TCORE}" STREQUAL "LOONGSON3R5")
file(APPEND ${TARGET_CONF_TEMP}
"#define DTB_DEFAULT_ENTRIES 64\n")
set(HAVE_LASX 1)
set(HAVE_LSX 1)
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 8)
set(DGEMM_UNROLL_M 16)
set(DGEMM_UNROLL_N 6)
set(CGEMM_UNROLL_M 16)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 8)
set(ZGEMM_UNROLL_N 4)
set(CGEMM3M_UNROLL_M 16)
set(CGEMM3M_UNROLL_N 8)
set(ZGEMM3M_UNROLL_M 16)
set(ZGEMM3M_UNROLL_N 6)
endif()
set(SBGEMM_UNROLL_M 8)
set(SBGEMM_UNROLL_N 4)
Expand Down
4 changes: 2 additions & 2 deletions cmake/system.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ if (NEED_PIC)
endif()
endif ()

if (X86_64 OR ${CORE} STREQUAL POWER10 OR ARM64)
if (X86_64 OR ${CORE} STREQUAL POWER10 OR ARM64 OR LOONGARCH64)
set(SMALL_MATRIX_OPT TRUE)
endif ()
if (ARM64)
Expand All @@ -403,7 +403,7 @@ if (SMALL_MATRIX_OPT)
endif ()

if (DYNAMIC_ARCH)
if (X86 OR X86_64 OR ARM64 OR POWER OR RISCV64)
if (X86 OR X86_64 OR ARM64 OR POWER OR RISCV64 OR LOONGARCH64)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
if (DYNAMIC_OLDER)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
Expand Down
2 changes: 2 additions & 0 deletions cmake/system_check.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ elseif(ARM)
set(ARCH "arm")
elseif(ARM64)
set(ARCH "arm64")
elseif(LOONGARCH64)
set(ARCH "loongarch64")
else()
set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture")
endif ()
Expand Down
4 changes: 4 additions & 0 deletions common_loongarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,13 @@ REALNAME: ;\
#define GNUSTACK
#endif /* defined(__linux__) && defined(__ELF__) */

#ifdef __clang__
#define EPILOGUE .end
#else
#define EPILOGUE \
.end REALNAME ;\
GNUSTACK
#endif

#define PROFCODE

Expand Down
Loading

0 comments on commit b4495a8

Please sign in to comment.