Skip to content

Commit

Permalink
Merge pull request #643 from ThePortlandGroup/nv_stage
Browse files Browse the repository at this point in the history
Pull 2018-12-19T17-19 Recent NVIDIA Changes
  • Loading branch information
sscalpone authored Dec 21, 2018
2 parents 87c7238 + f5a5ac4 commit 6a5cc79
Show file tree
Hide file tree
Showing 10 changed files with 2,436 additions and 87 deletions.
18 changes: 9 additions & 9 deletions runtime/libpgmath/lib/common/dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
#include "mth_tbldefs.h"

#if defined(TARGET_LINUX_X8664) || defined(TARGET_OSX_X8664) || defined(TARGET_WIN_X8664)
#include "cpuid8664.h"
#include "x86id.h"
#endif

/*
Expand Down Expand Up @@ -1026,25 +1026,25 @@ __math_dispatch()

} else { /* Get processor architecture using CPUID information */
#if defined(TARGET_LINUX_X8664) || defined(TARGET_OSX_X8664) || defined(TARGET_WIN_X8664)
if (CPUIDX8664(is_avx512vl)() == 1) {
if (X86IDFN(is_avx512vl)() == 1) {
__math_target = arch_avx512;
} else if (CPUIDX8664(is_avx512f)() == 1) {
} else if (X86IDFN(is_avx512f)() == 1) {
__math_target = arch_avx512knl;
} else if (CPUIDX8664(is_avx2)() == 1) {
} else if (X86IDFN(is_avx2)() == 1) {
__math_target = arch_avx2;
} else if (CPUIDX8664(is_avx)() == 1) {
if (CPUIDX8664(is_intel)() == 1) {
} else if (X86IDFN(is_avx)() == 1) {
if (X86IDFN(is_intel)() == 1) {
__math_target = arch_avx;
}
if (CPUIDX8664(is_amd)() == 1) {
if (CPUIDX8664(is_fma4)() == 1) {
if (X86IDFN(is_amd)() == 1) {
if (X86IDFN(is_fma4)() == 1) {
__math_target = arch_avxfma4;
} else {
__math_target = arch_sse4;
}
}
} else {
if ((CPUIDX8664(is_sse4a)() == 1) || (CPUIDX8664(is_sse41)() == 1)) {
if ((X86IDFN(is_sse4a)() == 1) || (X86IDFN(is_sse41)() == 1)) {
__math_target = arch_sse4;
} else {
__math_target = arch_em64t;
Expand Down
7 changes: 7 additions & 0 deletions runtime/libpgmath/lib/x86_64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,16 @@ set(SRCS
dsqrt.c
fabs.c
sqrt.c
pgcpuid.c
${ASM_SRCS})
libmath_add_object_library("${SRCS}" "${FLAGS}" "${DEFINITIONS}" "")

# Decorate entry points and global objects in x86id with an internal prefix.
set(SRCS
x86id.c)
list(APPEND DEFINITIONS_FOR_LIBPGC ${DEFINITIONS} FOR_LIBPGC)
libmath_add_object_library("${SRCS}" "${FLAGS}" "${DEFINITIONS_FOR_LIBPGC}" "for_libpgc")

# isoc99
set(SRCS
alog.c
Expand Down
26 changes: 26 additions & 0 deletions runtime/libpgmath/lib/x86_64/cpuid8664.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ static int CPUIDX8664(is_amd)();
static int CPUIDX8664(is_fma4)();
static int CPUIDX8664(is_sse4a)();
static int CPUIDX8664(is_sse41)();
static int CPUIDX8664(is_f16c)();

/*
* Check that this is a Genuine Intel processor
Expand Down Expand Up @@ -296,6 +297,30 @@ CPUIDX8664(is_avx512vl)(void)
return (ebx & bit_AVX512VL) != 0;
}/* is_avx512vl */

/*
* Check that this is either a Genuine Intel or AMD processor that supports
* f16c instructions.
*/
static int
CPUIDX8664(is_f16c)(void)
{
uint32_t eax, ebx, ecx, edx;

if ((CPUIDX8664(is_intel)() == 0) && (CPUIDX8664(is_amd)() == 0)) {
return 0;
}

if (CPUIDX8664(is_avx)() == 0) {
return 0;
}

if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0) {
return 0;
}

return (ecx & bit_F16C) != 0;
}/* is_f16c */

#ifdef UNIT_TEST
int
main()
Expand All @@ -309,6 +334,7 @@ main()
printf("is_avx2()=%d\n", CPUIDX8664(is_avx2)());
printf("is_avx512f()=%d\n", CPUIDX8664(is_avx512f)());
printf("is_avx512vl()=%d\n", CPUIDX8664(is_avx512vl)());
printf("is_f16c()=%d\n", CPUIDX8664(is_f16c)());
}
#endif
#endif // #ifndef CPUIDX8664_H
135 changes: 135 additions & 0 deletions runtime/libpgmath/lib/x86_64/pgcpuid.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

#include <stdint.h>
#include "pgcpuid.h"

/*
* Note:
* 1) these functions cannot call any other function
* 2) these functions can only use GPR (not floating point)
*
*/

/** @brief returns false/true if CPUID supports eax function.
* __pgi_cpuid_getma (uint32_t cpuid_func)
* @param cpuid_func (I1) function to execute CPUID with
*
* Returns false(0)/true(1)
*
*/

int
__pgi_cpuid_getmax(uint32_t f)
{
uint32_t maxcpueax;
uint32_t fin = f & 0x80000000;
asm("\tcpuid"
: "=a"(maxcpueax)
: "0"(fin)
: "ebx", "ecx", "edx"
);
return f <= maxcpueax;
}

/** @brief returns results of executing CPUID with function cpuid_func and
* sub function ecx.
* __pgi_cpuid_ecx(uint32_t cpuid_func, uint32_t *res, uint32_t ecx)
* @param cpuid_func (I1) function to execute CPUID with
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
* @param ecx (I3) value of %ecx to execute CPUID with
*
* Returns false(0): if cpuid_func not supported
* true(1): CPUID successfully executed with cpuid_func+ecx and:
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
*
*/

int
__pgi_cpuid_ecx(uint32_t f, uint32_t *r, uint32_t c)
{
if (__pgi_cpuid_getmax(f) == 0) return 0;
asm("\tcpuid"
: "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3])
: "0"(f), "2"(c)
:
);
return 1;
}


/** @brief returns results of executing CPUID with function cpuid_func.
* __pgi_cpuid(uint32_t cpuid_func, uint32_t *res)
* @param cpuid_func (I1) function to execute CPUID with
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
*
* Returns false(0): if cpuid_func not supported
* true(1): CPUID successfully executed with cpuid_func and:
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
*
*/

int
__pgi_cpuid(uint32_t f, uint32_t *r)
{
return __pgi_cpuid_ecx(f, r, 0);
}

/** @brief returns results of executing CPUID with function cpuid_func.
* __pgcpuid(uint32_t cpuid_func, uint32_t *res)
* @param cpuid_func (I1) function to execute CPUID with
* @param res (I2) pointer to buffer to store eax, ebx, ecx, edx
*
* Returns false(0): if cpuid_func not supported
* true(1): CPUID successfully executed with cpuid_func and:
* res[0]=%eax, res[1]=%ebx, res[2]=%ecx, res[3]=%edx
*
*/

int
__pgcpuid(uint32_t f, uint32_t *r)
{
return __pgi_cpuid_ecx(f, r, 0);
}

/** @brief read extended control register.
* __pgi_getbv(uint32_t xcr_num, uint64_t *xcr_res)
* @param xcr_num (I1) extended control register number to read
* @param xcr_res (I2) pointer to buffer to store xcr[xcr_num]
*
* Returns true(1) with:
* xcr_res[31: 0]=%eax
* xcr_res[63:32]=%edx
*
*/
int
__pgi_getbv(uint32_t f, uint64_t *r)
{
uint32_t *u32;
u32 = (uint32_t *)r;
asm(
#if defined(__WIN64)
"\t.byte\t0x0f, 0x01, 0xd0"
#else
"\txgetbv"
#endif
: "=a"(u32[0]), "=d"(u32[1])
: "c"(f)
:
);
return 1;
}
Loading

0 comments on commit 6a5cc79

Please sign in to comment.