From aaf65210ccba0c53408c242a2e0f5ad5d798d532 Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Fri, 19 Jan 2024 19:04:21 +0000 Subject: [PATCH] Add dynamic support for Arm(R) Neoverse(TM) V2 processor Whilst I figure out how best to map the L2 parameters without duplicating all of `ARMV8SVE`, lets just map this to `NEOVERSEV1`. --- driver/others/dynamic_arm64.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 803e0b5ebf..6b21028d12 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -1,6 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ -/* Copyright 2023 The OpenBLAS Project */ +/* Copyright 2023-2024 The OpenBLAS Project */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -143,12 +143,13 @@ extern gotoblas_t gotoblas_ARMV8SVE; #endif extern gotoblas_t gotoblas_THUNDERX3T110; #endif +#define gotoblas_NEOVERSEV2 gotoblas_NEOVERSEV1 extern void openblas_warning(int verbose, const char * msg); #define FALLBACK_VERBOSE 1 #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n" -#define NUM_CORETYPES 16 +#define NUM_CORETYPES 17 /* * In case asm/hwcap.h is outdated on the build system, make sure @@ -178,6 +179,7 @@ static char *corename[] = { "emag8180", "neoversen1", "neoversev1", + "neoversev2", "neoversen2", "thunderx3t110", "cortexa55", @@ -198,10 +200,11 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10]; if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11]; - if (gotoblas == &gotoblas_NEOVERSEN2) return corename[12]; - if (gotoblas == &gotoblas_THUNDERX3T110) return corename[13]; - if (gotoblas == &gotoblas_CORTEXA55) return corename[14]; - if (gotoblas == &gotoblas_ARMV8SVE) return corename[15]; + if (gotoblas == &gotoblas_NEOVERSEV2) return corename[12]; + if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13]; + if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14]; + if (gotoblas == &gotoblas_CORTEXA55) return corename[15]; + if (gotoblas == &gotoblas_ARMV8SVE) return corename[16]; return corename[NUM_CORETYPES]; } @@ -233,10 +236,11 @@ static gotoblas_t *force_coretype(char *coretype) { case 9: return (&gotoblas_EMAG8180); case 10: return (&gotoblas_NEOVERSEN1); case 11: return (&gotoblas_NEOVERSEV1); - case 12: return (&gotoblas_NEOVERSEN2); - case 13: return (&gotoblas_THUNDERX3T110); - case 14: return (&gotoblas_CORTEXA55); - case 15: return (&gotoblas_ARMV8SVE); + case 12: return (&gotoblas_NEOVERSEV2); + case 13: return (&gotoblas_NEOVERSEN2); + case 14: return (&gotoblas_THUNDERX3T110); + case 15: return (&gotoblas_CORTEXA55); + case 16: return (&gotoblas_ARMV8SVE); } snprintf(message, 128, "Core not found: %s\n", coretype); openblas_warning(1, message); @@ -312,6 +316,13 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_NEOVERSEN1; }else return &gotoblas_NEOVERSEV1; + case 0xd4f: + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK); + return &gotoblas_NEOVERSEN1; + } else { + return &gotoblas_NEOVERSEV2; + } #endif case 0xd05: // Cortex A55 return &gotoblas_CORTEXA55;