From 810da941cb7fb3e9005b380d3640aaef9aaa92d3 Mon Sep 17 00:00:00 2001 From: regro-cf-autotick-bot <36490558+regro-cf-autotick-bot@users.noreply.github.com> Date: Thu, 12 Sep 2024 04:08:29 +0000 Subject: [PATCH 01/11] updated v0.4.32 --- recipe/meta.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index facc0bad..808e3c4d 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,5 +1,5 @@ -{% set version = "0.4.31" %} -{% set build = 1 %} +{% set version = "0.4.32" %} +{% set build = 0 %} {% if cuda_compiler_version != "None" %} {% set build = build + 200 %} @@ -13,7 +13,7 @@ package: source: # only pull sources after upstream PyPI release... url: https://github.com/google/jax/archive/jaxlib-v{{ version }}.tar.gz - sha256: 022ea1347f9b21cbea31410b3d650d976ea4452a48ea7317a5f91c238031bf94 + sha256: 3fe36d596e4d640443c0a5c533845c74fbc4341e024d9bb1cd75cb49f5f419c2 patches: - patches/0001-Allow-for-custom-CUDA-build.patch - patches/0002-Consolidated-build-fixes-for-XLA.patch From d1f821e4178b1d73ece2d4dda639a29109257ca6 Mon Sep 17 00:00:00 2001 From: regro-cf-autotick-bot <36490558+regro-cf-autotick-bot@users.noreply.github.com> Date: Thu, 12 Sep 2024 04:09:37 +0000 Subject: [PATCH 02/11] MNT: Re-rendered with conda-build 24.7.1, conda-smithy 3.39.1, and conda-forge-pinning 2024.09.11.15.30.13 --- .ci_support/migrations/cuda120.yaml | 103 ---------------------------- build-locally.py | 5 +- 2 files changed, 4 insertions(+), 104 deletions(-) delete mode 100644 .ci_support/migrations/cuda120.yaml diff --git a/.ci_support/migrations/cuda120.yaml b/.ci_support/migrations/cuda120.yaml deleted file mode 100644 index 2fd68804..00000000 --- a/.ci_support/migrations/cuda120.yaml +++ /dev/null @@ -1,103 +0,0 @@ -migrator_ts: 1682985063 -__migrator: - kind: - version - migration_number: - 3 - build_number: - 1 - paused: false - override_cbc_keys: - - cuda_compiler_stub - operation: key_add - check_solvable: false - primary_key: cuda_compiler_version - ordering: - cxx_compiler_version: - - 9 - - 8 - - 7 - c_compiler_version: - - 9 - - 8 - - 7 - fortran_compiler_version: - - 9 - - 8 - - 7 - docker_image: - - quay.io/condaforge/linux-anvil-comp7 # [os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-aarch64 # [os.environ.get("BUILD_PLATFORM") == "linux-aarch64"] - - quay.io/condaforge/linux-anvil-ppc64le # [os.environ.get("BUILD_PLATFORM") == "linux-ppc64le"] - - quay.io/condaforge/linux-anvil-armv7l # [os.environ.get("BUILD_PLATFORM") == "linux-armv7l"] - - quay.io/condaforge/linux-anvil-cuda:9.2 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-cuda:10.0 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-cuda:10.1 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-cuda:10.2 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-cuda:11.0 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-cuda:11.1 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - # case: CUDA 11.2 - - quay.io/condaforge/linux-anvil-cuda:11.2 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - # case: native compilation (build == target) - - quay.io/condaforge/linux-anvil-ppc64le-cuda:11.2 # [ppc64le and os.environ.get("BUILD_PLATFORM") == "linux-ppc64le"] - - quay.io/condaforge/linux-anvil-aarch64-cuda:11.2 # [aarch64 and os.environ.get("BUILD_PLATFORM") == "linux-aarch64"] - # case: cross-compilation (build != target) - - quay.io/condaforge/linux-anvil-cuda:11.2 # [ppc64le and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-cuda:11.2 # [aarch64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - # case: CUDA 11.8 - - quay.io/condaforge/linux-anvil-cuda:11.8 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - # case: native compilation (build == target) - - quay.io/condaforge/linux-anvil-ppc64le-cuda:11.8 # [ppc64le and os.environ.get("BUILD_PLATFORM") == "linux-ppc64le"] - - quay.io/condaforge/linux-anvil-aarch64-cuda:11.8 # [aarch64 and os.environ.get("BUILD_PLATFORM") == "linux-aarch64"] - # case: cross-compilation (build != target) - - quay.io/condaforge/linux-anvil-cuda:11.8 # [ppc64le and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-cuda:11.8 # [aarch64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - # case: non-CUDA builds - - quay.io/condaforge/linux-anvil-cos7-x86_64 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] - cuda_compiler_version: - - None - - 10.2 # [(linux64 or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 11.0 # [(linux64 or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 11.1 # [(linux64 or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 11.2 # [(linux or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 11.8 # [(linux or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 12.0 # [(linux or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - commit_message: | - Rebuild for CUDA 12 w/arch + Windows support - - The transition to CUDA 12 SDK includes new packages for all CUDA libraries and - build tools. Notably, the cudatoolkit package no longer exists, and packages - should depend directly on the specific CUDA libraries (libcublas, libcusolver, - etc) as needed. For an in-depth overview of the changes and to report problems - [see this issue]( https://github.com/conda-forge/conda-forge.github.io/issues/1963 ). - Please feel free to raise any issues encountered there. Thank you! :pray: - -cuda_compiler: # [(linux or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - cuda-nvcc # [(linux or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - -cuda_compiler_version: # [(linux or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 12.0 # [(linux or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - -c_compiler_version: # [linux and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 12 # [linux and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - -cxx_compiler_version: # [linux and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 12 # [linux and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - -fortran_compiler_version: # [linux and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - 12 # [linux and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - -cdt_name: # [linux and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - cos7 # [linux and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - -docker_image: # [os.environ.get("BUILD_PLATFORM", "").startswith("linux-") and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - - quay.io/condaforge/linux-anvil-cos7-x86_64 # [linux64 and os.environ.get("BUILD_PLATFORM") == "linux-64" and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] - # case: native compilation (build == target) - - quay.io/condaforge/linux-anvil-ppc64le # [ppc64le and os.environ.get("BUILD_PLATFORM") == "linux-ppc64le"] - - quay.io/condaforge/linux-anvil-aarch64 # [aarch64 and os.environ.get("BUILD_PLATFORM") == "linux-aarch64"] - # case: cross-compilation (build != target) - - quay.io/condaforge/linux-anvil-cos7-x86_64 # [ppc64le and os.environ.get("BUILD_PLATFORM") == "linux-64"] - - quay.io/condaforge/linux-anvil-cos7-x86_64 # [aarch64 and os.environ.get("BUILD_PLATFORM") == "linux-64"] diff --git a/build-locally.py b/build-locally.py index d78427b5..8ac9b846 100755 --- a/build-locally.py +++ b/build-locally.py @@ -1,8 +1,11 @@ -#!/usr/bin/env python3 +#!/bin/sh +"""exec' "python3" "$0" "$@" #""" # fmt: off # fmt: on # # This file has been generated by conda-smithy in order to build the recipe # locally. # +# The line above this comment is a bash / sh / zsh guard +# to stop people from running it with the wrong interpreter import glob import os import platform From fb30d194fcf1f850447920e2443e655ec15105fd Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 12 Sep 2024 12:49:13 +0200 Subject: [PATCH 03/11] Update patches --- ...-Add-x64_windows-as-a-wheel-platform.patch | 24 +++++ .../0001-Allow-for-custom-CUDA-build.patch | 26 ++--- ...002-Consolidated-build-fixes-for-XLA.patch | 101 +++++++++++++----- 3 files changed, 113 insertions(+), 38 deletions(-) create mode 100644 recipe/patches/0001-Add-x64_windows-as-a-wheel-platform.patch diff --git a/recipe/patches/0001-Add-x64_windows-as-a-wheel-platform.patch b/recipe/patches/0001-Add-x64_windows-as-a-wheel-platform.patch new file mode 100644 index 00000000..d0a4f5e8 --- /dev/null +++ b/recipe/patches/0001-Add-x64_windows-as-a-wheel-platform.patch @@ -0,0 +1,24 @@ +From e03061546d4b95610465b22227e36ebcdd36c728 Mon Sep 17 00:00:00 2001 +From: "Uwe L. Korn" +Date: Thu, 18 Jul 2024 15:55:38 +0200 +Subject: [PATCH] Add x64_windows as a wheel platform + +--- + build/build.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/build/build.py b/build/build.py +index 0c25cea..db4a2a4 100755 +--- a/build/build.py ++++ b/build/build.py +@@ -575,6 +575,7 @@ def main(): + "darwin_x86_64": "x86_64", + "ppc": "ppc64le", + "aarch64": "aarch64", ++ "x64_windows": "x64_windows", + } + # TODO(phawkins): support other bazel cpu overrides. + wheel_cpu = (wheel_cpus[args.target_cpu] if args.target_cpu is not None +-- +2.39.3 (Apple Git-146) + diff --git a/recipe/patches/0001-Allow-for-custom-CUDA-build.patch b/recipe/patches/0001-Allow-for-custom-CUDA-build.patch index 484c4c39..c4e2e708 100644 --- a/recipe/patches/0001-Allow-for-custom-CUDA-build.patch +++ b/recipe/patches/0001-Allow-for-custom-CUDA-build.patch @@ -1,4 +1,4 @@ -From 1daa8cc30c7c2d70a71aa164d9ecb5923b34e0c0 Mon Sep 17 00:00:00 2001 +From 96762e9ab2ab0cf89354b25da2d0738df4d2dc1a Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Sun, 8 Oct 2023 19:34:34 +0200 Subject: [PATCH 1/2] Allow for custom CUDA build @@ -8,18 +8,18 @@ Subject: [PATCH 1/2] Allow for custom CUDA build 1 file changed, 5 insertions(+) diff --git a/build/build.py b/build/build.py -index 2f6822281..0000dbbf8 100755 +index c3a8627..5bfbdcb 100755 --- a/build/build.py +++ b/build/build.py -@@ -277,6 +277,11 @@ def write_bazelrc(*, remote_build, - f.write("build --action_env TF_CUDA_PATHS=\"{tf_cuda_paths}\"\n" - .format(tf_cuda_paths=",".join(tf_cuda_paths))) - if cuda_version: -+ # set GCC_HOST_COMPILER_PATH for toolchain with conda-forge -+ f.write("build --action_env GCC_HOST_COMPILER_PATH=\"{gcc_host_compiler_path}\"\n" -+ .format(gcc_host_compiler_path=os.environ["GCC"])) -+ f.write("build --action_env GCC_HOST_COMPILER_PREFIX=\"{gcc_host_compiler_prefix}\"\n" +@@ -289,6 +289,11 @@ def write_bazelrc(*, remote_build, + f.write("build --config=nvcc_clang\n") + f.write(f"build --action_env=CLANG_CUDA_COMPILER_PATH={clang_path}\n") + if cuda_version: ++ # set GCC_HOST_COMPILER_PATH for toolchain with conda-forge ++ f.write("build --action_env GCC_HOST_COMPILER_PATH=\"{gcc_host_compiler_path}\"\n" ++ .format(gcc_host_compiler_path=os.environ["GCC"])) ++ f.write("build --action_env GCC_HOST_COMPILER_PREFIX=\"{gcc_host_compiler_prefix}\"\n" + .format(gcc_host_compiler_prefix=os.path.dirname(os.environ["GCC"]))) - f.write("build --action_env TF_CUDA_VERSION=\"{cuda_version}\"\n" - .format(cuda_version=cuda_version)) - if cudnn_version: + f.write("build --repo_env HERMETIC_CUDA_VERSION=\"{cuda_version}\"\n" + .format(cuda_version=cuda_version)) + if cudnn_version: diff --git a/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch b/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch index 5722a364..9e241ec7 100644 --- a/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch +++ b/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch @@ -1,4 +1,4 @@ -From 3967a662a3cb00e8144628ba021116ee59d74134 Mon Sep 17 00:00:00 2001 +From 7bf6376e95bd3b61204f8f01644814874293a2c2 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 14 Dec 2023 17:06:15 +0100 Subject: [PATCH 2/2] Consolidated build fixes for XLA @@ -14,24 +14,75 @@ which is also where we're patching in the list of patches to apply to xla. Co-Authored-By: H. Vetinari --- + .../xla/0001-Omit-usage-of-StrFormat.patch | 43 ++++ ...pport-third-party-build-of-boringssl.patch | 51 ++++ third_party/xla/0002-Fix-abseil-headers.patch | 73 ++++++ .../xla/0003-Omit-usage-of-StrFormat.patch | 43 ++++ ...0004-Add-missing-bits-absl-systemlib.patch | 226 ++++++++++++++++++ third_party/xla/workspace.bzl | 6 + - 5 files changed, 399 insertions(+) + 6 files changed, 442 insertions(+) + create mode 100644 third_party/xla/0001-Omit-usage-of-StrFormat.patch create mode 100644 third_party/xla/0001-Support-third-party-build-of-boringssl.patch create mode 100644 third_party/xla/0002-Fix-abseil-headers.patch create mode 100644 third_party/xla/0003-Omit-usage-of-StrFormat.patch create mode 100644 third_party/xla/0004-Add-missing-bits-absl-systemlib.patch +diff --git a/third_party/xla/0001-Omit-usage-of-StrFormat.patch b/third_party/xla/0001-Omit-usage-of-StrFormat.patch +new file mode 100644 +index 0000000..d1b4765 +--- /dev/null ++++ b/third_party/xla/0001-Omit-usage-of-StrFormat.patch +@@ -0,0 +1,43 @@ ++From b7d3f685ea9f58f0054af0f34d0bc3ccac43fa5c Mon Sep 17 00:00:00 2001 ++From: "Uwe L. Korn" ++Date: Thu, 4 Jul 2024 10:36:03 +0200 ++Subject: [PATCH] Omit usage of StrFormat ++ ++--- ++ xla/stream_executor/gpu/gpu_executor.h | 9 ++++++--- ++ 1 file changed, 6 insertions(+), 3 deletions(-) ++ ++diff --git a/xla/stream_executor/gpu/gpu_executor.h b/xla/stream_executor/gpu/gpu_executor.h ++index 3a5945e884..9bdc2acd47 100644 ++--- a/xla/stream_executor/gpu/gpu_executor.h +++++ b/xla/stream_executor/gpu/gpu_executor.h ++@@ -29,6 +29,7 @@ limitations under the License. ++ #include ++ #include ++ #include +++#include ++ #include ++ #include ++ ++@@ -38,7 +39,6 @@ limitations under the License. ++ #include "absl/numeric/int128.h" ++ #include "absl/status/status.h" ++ #include "absl/status/statusor.h" ++-#include "absl/strings/str_format.h" ++ #include "absl/synchronization/mutex.h" ++ #include "absl/types/span.h" ++ #include "xla/stream_executor/blas.h" ++@@ -187,8 +187,11 @@ class GpuExecutor : public StreamExecutor { ++ uint64_t size) override { ++ auto* buffer = GpuDriver::HostAllocate(context_, size); ++ if (buffer == nullptr && size > 0) { ++- return absl::InternalError( ++- absl::StrFormat("Failed to allocate HostMemory of size %d", size)); +++ std::ostringstream stringStream; +++ stringStream << "Failed to allocate HostMemory of size "; +++ stringStream << size; +++ std::string res = stringStream.str(); +++ return absl::InternalError(res); ++ } ++ return std::make_unique(buffer, size, this); ++ } diff --git a/third_party/xla/0001-Support-third-party-build-of-boringssl.patch b/third_party/xla/0001-Support-third-party-build-of-boringssl.patch new file mode 100644 -index 000000000..e24a45e1f +index 0000000..26a9904 --- /dev/null +++ b/third_party/xla/0001-Support-third-party-build-of-boringssl.patch @@ -0,0 +1,51 @@ -+From 876bfe566992d7829dc4fdb82de72ff2c622f015 Mon Sep 17 00:00:00 2001 ++From 9a5932bb8a363f777ce39ff75a52eda2bba9c21f Mon Sep 17 00:00:00 2001 +From: "Uwe L. Korn" +Date: Thu, 14 Dec 2023 15:04:51 +0100 +Subject: [PATCH 1/4] Support third-party build of boringssl @@ -70,10 +121,10 @@ index 000000000..e24a45e1f ++ ], ++) +diff --git a/workspace2.bzl b/workspace2.bzl -+index 5c9d465040..69dfa954b3 100644 ++index 1809702d8b..6fc538d3a2 100644 +--- a/workspace2.bzl ++++ b/workspace2.bzl -+@@ -67,7 +67,7 @@ def _tf_repositories(): ++@@ -69,7 +69,7 @@ def _tf_repositories(): + name = "boringssl", + sha256 = "9dc53f851107eaf87b391136d13b815df97ec8f76dadb487b58b2fc45e624d2c", + strip_prefix = "boringssl-c00d7ca810e93780bd0c8ee4eea28f4f2ea4bcdc", @@ -84,11 +135,11 @@ index 000000000..e24a45e1f + diff --git a/third_party/xla/0002-Fix-abseil-headers.patch b/third_party/xla/0002-Fix-abseil-headers.patch new file mode 100644 -index 000000000..7a58075e1 +index 0000000..96a6fec --- /dev/null +++ b/third_party/xla/0002-Fix-abseil-headers.patch @@ -0,0 +1,73 @@ -+From adc3749cd0a77a502c9ffd9c558dbee96c1fc0ab Mon Sep 17 00:00:00 2001 ++From 97ad75d0bf891be488fb223ac95ff6572b4ecd88 Mon Sep 17 00:00:00 2001 +From: "Uwe L. Korn" +Date: Thu, 23 May 2024 15:45:52 +0200 +Subject: [PATCH 2/4] Fix abseil headers @@ -101,10 +152,10 @@ index 000000000..7a58075e1 + 4 files changed, 10 insertions(+) + +diff --git a/third_party/tsl/tsl/platform/default/BUILD b/third_party/tsl/tsl/platform/default/BUILD -+index 01cf593888..ba5b5cc068 100644 ++index b3ce4301fb..9b72c2eb42 100644 +--- a/third_party/tsl/tsl/platform/default/BUILD ++++ b/third_party/tsl/tsl/platform/default/BUILD -+@@ -220,6 +220,8 @@ cc_library( ++@@ -225,6 +225,8 @@ cc_library( + deps = [ + "//tsl/platform:logging", + "@com_google_absl//absl/log:check", @@ -114,7 +165,7 @@ index 000000000..7a58075e1 + ) + +diff --git a/third_party/tsl/tsl/profiler/rpc/client/BUILD b/third_party/tsl/tsl/profiler/rpc/client/BUILD -+index 03f8c1deff..1f081a14d1 100644 ++index 4b8ece7403..a2772846b8 100644 +--- a/third_party/tsl/tsl/profiler/rpc/client/BUILD ++++ b/third_party/tsl/tsl/profiler/rpc/client/BUILD +@@ -101,6 +101,8 @@ cc_library( @@ -150,7 +201,7 @@ index 000000000..7a58075e1 + ], + alwayslink = True, +diff --git a/xla/tsl/distributed_runtime/rpc/BUILD b/xla/tsl/distributed_runtime/rpc/BUILD -+index 0f9a93eb1a..e5f11fa62c 100644 ++index 817c4dc5a4..d6f27deb5c 100644 +--- a/xla/tsl/distributed_runtime/rpc/BUILD ++++ b/xla/tsl/distributed_runtime/rpc/BUILD +@@ -37,6 +37,7 @@ cc_library( @@ -163,11 +214,11 @@ index 000000000..7a58075e1 + "@tsl//tsl/platform:status", diff --git a/third_party/xla/0003-Omit-usage-of-StrFormat.patch b/third_party/xla/0003-Omit-usage-of-StrFormat.patch new file mode 100644 -index 000000000..541c06f40 +index 0000000..67d2275 --- /dev/null +++ b/third_party/xla/0003-Omit-usage-of-StrFormat.patch @@ -0,0 +1,43 @@ -+From 8434fbb499a3c035c9b028f1500b01229ce04a4a Mon Sep 17 00:00:00 2001 ++From a360cd33b748c4f6b1ab00e386ac8031112c5b2f Mon Sep 17 00:00:00 2001 +From: "Uwe L. Korn" +Date: Thu, 4 Jul 2024 10:36:03 +0200 +Subject: [PATCH 3/4] Omit usage of StrFormat @@ -177,17 +228,17 @@ index 000000000..541c06f40 + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/xla/stream_executor/gpu/gpu_executor.h b/xla/stream_executor/gpu/gpu_executor.h -+index c19fa1ccee..c1565b864e 100644 ++index 8e9a8352e2..36d42493c6 100644 +--- a/xla/stream_executor/gpu/gpu_executor.h ++++ b/xla/stream_executor/gpu/gpu_executor.h -+@@ -28,6 +28,7 @@ limitations under the License. ++@@ -27,6 +27,7 @@ limitations under the License. + #include + #include + #include ++#include + #include + #include -+ ++ #include +@@ -37,7 +38,6 @@ limitations under the License. + #include "absl/numeric/int128.h" + #include "absl/status/status.h" @@ -196,7 +247,7 @@ index 000000000..541c06f40 + #include "absl/synchronization/mutex.h" + #include "absl/types/span.h" + #include "xla/stream_executor/blas.h" -+@@ -177,8 +177,11 @@ class GpuExecutor : public StreamExecutorCommon { ++@@ -166,8 +166,11 @@ class GpuExecutor : public StreamExecutorCommon { + uint64_t size) override { + auto* buffer = GpuDriver::HostAllocate(context_, size); + if (buffer == nullptr && size > 0) { @@ -212,16 +263,15 @@ index 000000000..541c06f40 + } diff --git a/third_party/xla/0004-Add-missing-bits-absl-systemlib.patch b/third_party/xla/0004-Add-missing-bits-absl-systemlib.patch new file mode 100644 -index 000000000..e151c23c8 +index 0000000..1941f79 --- /dev/null +++ b/third_party/xla/0004-Add-missing-bits-absl-systemlib.patch @@ -0,0 +1,226 @@ -+From f43652257c58896305d13c6dc9829c9f3f522a8f Mon Sep 17 00:00:00 2001 ++From fc6d67a2f5fce78eb91477fa4bca5c47b6fc31fd Mon Sep 17 00:00:00 2001 +From: "Uwe L. Korn" +Date: Thu, 4 Jul 2024 15:58:32 +0200 +Subject: [PATCH 4/4] Add missing bits absl systemlib + -+Co-Authored-By: H. Vetinari +--- + .../third_party/absl/system.absl.base.BUILD | 16 +++++ + .../third_party/absl/system.absl.crc.BUILD | 70 +++++++++++++++++++ @@ -425,14 +475,15 @@ index 000000000..e151c23c8 + name = "strings", + linkopts = ["-labsl_strings"], +diff --git a/third_party/tsl/third_party/absl/workspace.bzl b/third_party/tsl/third_party/absl/workspace.bzl -+index 06f75166ce..446dbc4081 100644 ++index 9565a82c33..e71aa16726 100644 +--- a/third_party/tsl/third_party/absl/workspace.bzl ++++ b/third_party/tsl/third_party/absl/workspace.bzl -+@@ -15,11 +15,13 @@ def repo(): ++@@ -14,12 +14,14 @@ def repo(): ++ SYS_DIRS = [ + "algorithm", + "base", -+ "cleanup", ++ "crc", ++ "cleanup", + "container", + "debugging", + "flags", @@ -443,7 +494,7 @@ index 000000000..e151c23c8 + "meta", + "numeric", diff --git a/third_party/xla/workspace.bzl b/third_party/xla/workspace.bzl -index af52e7671..76fb83680 100644 +index 8f4accc..3b7afaf 100644 --- a/third_party/xla/workspace.bzl +++ b/third_party/xla/workspace.bzl @@ -30,6 +30,12 @@ def repo(): From 462d60b157b1e6340a2424ff8fe7eb1aa77fc864 Mon Sep 17 00:00:00 2001 From: Uwe Korn Date: Wed, 25 Sep 2024 20:31:28 +0000 Subject: [PATCH 04/11] Don't set cuda_path --- recipe/build.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/recipe/build.sh b/recipe/build.sh index 2cbe5e18..4bd5cdee 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -41,7 +41,6 @@ if [[ "${cuda_compiler_version:-None}" != "None" ]]; then CUDA_ARGS="--enable_cuda \ --enable_nccl \ - --cuda_path=${TF_CUDA_PATHS} \ --cudnn_path=${PREFIX} \ --cuda_compute_capabilities=$TF_CUDA_COMPUTE_CAPABILITIES \ --cuda_version=$TF_CUDA_VERSION \ From 4e67265251693647617bfc8105cbf6ffd3c2228f Mon Sep 17 00:00:00 2001 From: Uwe Korn Date: Fri, 27 Sep 2024 08:44:30 +0000 Subject: [PATCH 05/11] Add setuptools as a build backend --- recipe/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 808e3c4d..c063e62c 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -65,6 +65,7 @@ requirements: - python - pip - numpy + - setuptools - wheel - cuda-version {{ cuda_compiler_version }} # [cuda_compiler_version != "None"] # avoid not being able to pass `-C=--build-option=--python-tag=cp` due to From 9f9cc1ec90f8f8ae33b6a77e6732f36cf58e15eb Mon Sep 17 00:00:00 2001 From: Uwe Korn Date: Sun, 29 Sep 2024 18:44:32 +0000 Subject: [PATCH 06/11] MNT: Re-rendered with conda-build 24.7.1, conda-smithy 3.40.1, and conda-forge-pinning 2024.09.29.16.11.30 --- .scripts/build_steps.sh | 2 ++ build-locally.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.scripts/build_steps.sh b/.scripts/build_steps.sh index ba4b251c..856f469d 100755 --- a/.scripts/build_steps.sh +++ b/.scripts/build_steps.sh @@ -43,6 +43,8 @@ setup_conda_rc "${FEEDSTOCK_ROOT}" "${RECIPE_ROOT}" "${CONFIG_FILE}" source run_conda_forge_build_setup + + # make the build number clobber make_build_number "${FEEDSTOCK_ROOT}" "${RECIPE_ROOT}" "${CONFIG_FILE}" diff --git a/build-locally.py b/build-locally.py index 8ac9b846..6788aea6 100755 --- a/build-locally.py +++ b/build-locally.py @@ -1,5 +1,5 @@ #!/bin/sh -"""exec' "python3" "$0" "$@" #""" # fmt: off # fmt: on +"""exec" "python3" "$0" "$@" #""" # fmt: off # fmt: on # # This file has been generated by conda-smithy in order to build the recipe # locally. From 937e296e46c70a24ec90341e50384a212455fc32 Mon Sep 17 00:00:00 2001 From: Uwe Korn Date: Mon, 30 Sep 2024 19:08:47 +0000 Subject: [PATCH 07/11] Fix linux CPU build --- recipe/build.sh | 3 + recipe/meta.yaml | 1 + ...49cd4a6c58b3b9823a32fe1320d65c98c45d.patch | 62 +++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 recipe/patches/c61e49cd4a6c58b3b9823a32fe1320d65c98c45d.patch diff --git a/recipe/build.sh b/recipe/build.sh index 4bd5cdee..646ed0e3 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -87,6 +87,9 @@ if [[ "${target_platform}" == "osx-arm64" || "${target_platform}" != "${build_pl else EXTRA="${CUDA_ARGS:-}" fi +if [[ "${target_platform}" == linux-* ]]; then + EXTRA="${EXTRA} --nouse_clang" +fi ${PYTHON} build/build.py \ --target_cpu_features default \ --enable_mkl_dnn \ diff --git a/recipe/meta.yaml b/recipe/meta.yaml index c063e62c..c7da6780 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -17,6 +17,7 @@ source: patches: - patches/0001-Allow-for-custom-CUDA-build.patch - patches/0002-Consolidated-build-fixes-for-XLA.patch + - patches/c61e49cd4a6c58b3b9823a32fe1320d65c98c45d.patch build: number: {{ build }} diff --git a/recipe/patches/c61e49cd4a6c58b3b9823a32fe1320d65c98c45d.patch b/recipe/patches/c61e49cd4a6c58b3b9823a32fe1320d65c98c45d.patch new file mode 100644 index 00000000..3fc88041 --- /dev/null +++ b/recipe/patches/c61e49cd4a6c58b3b9823a32fe1320d65c98c45d.patch @@ -0,0 +1,62 @@ +From c61e49cd4a6c58b3b9823a32fe1320d65c98c45d Mon Sep 17 00:00:00 2001 +From: Dan Foreman-Mackey +Date: Tue, 17 Sep 2024 11:22:49 -0700 +Subject: [PATCH] Simplify logic in jaxlib FFI_ASSIGN_OR_RETURN macro, and fix + gcc build. + +In https://github.com/google/jax/issues/23687, it was reported that recent jaxlib changes introduced issues when building from source using gcc, instead of the clang build that we test. I'm not 100% sure why the previous macro didn't work, but in investigating I found a version that seems to work on both clang and gcc with simpler logic. + +PiperOrigin-RevId: 675641259 +--- + jaxlib/ffi_helpers.h | 38 +++++++++----------------------------- + 1 file changed, 9 insertions(+), 29 deletions(-) + +diff --git a/jaxlib/ffi_helpers.h b/jaxlib/ffi_helpers.h +index fba57d11b9f2..47505020f3b8 100644 +--- a/jaxlib/ffi_helpers.h ++++ b/jaxlib/ffi_helpers.h +@@ -62,35 +62,15 @@ namespace jax { + FFI_ASSIGN_OR_RETURN_CONCAT_INNER_(x, y) + + // All the macros below here are to handle the case in FFI_ASSIGN_OR_RETURN +-// where the LHS is wrapped in parentheses. +-#define FFI_ASSIGN_OR_RETURN_EAT(...) +-#define FFI_ASSIGN_OR_RETURN_REM(...) __VA_ARGS__ +-#define FFI_ASSIGN_OR_RETURN_EMPTY() +- +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER(...) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER_HELPER((__VA_ARGS__, 0, 1)) +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER_HELPER(args) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER_I args +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER_I(e0, e1, is_empty, ...) is_empty +- +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY(...) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY_I(__VA_ARGS__) +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY_I(...) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER(_, ##__VA_ARGS__) +- +-#define FFI_ASSIGN_OR_RETURN_IF_1(_Then, _Else) _Then +-#define FFI_ASSIGN_OR_RETURN_IF_0(_Then, _Else) _Else +-#define FFI_ASSIGN_OR_RETURN_IF(_Cond, _Then, _Else) \ +- FFI_ASSIGN_OR_RETURN_CONCAT_(FFI_ASSIGN_OR_RETURN_IF_, _Cond)(_Then, _Else) +- +-#define FFI_ASSIGN_OR_RETURN_IS_PARENTHESIZED(...) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY(FFI_ASSIGN_OR_RETURN_EAT __VA_ARGS__) +- +-#define FFI_ASSIGN_OR_RETURN_UNPARENTHESIZE_IF_PARENTHESIZED(...) \ +- FFI_ASSIGN_OR_RETURN_IF(FFI_ASSIGN_OR_RETURN_IS_PARENTHESIZED(__VA_ARGS__), \ +- FFI_ASSIGN_OR_RETURN_REM, \ +- FFI_ASSIGN_OR_RETURN_EMPTY()) \ +- __VA_ARGS__ ++// where the LHS is wrapped in parentheses. See a more detailed discussion at ++// https://stackoverflow.com/a/62984543 ++#define FFI_ASSIGN_OR_RETURN_UNPARENTHESIZE_IF_PARENTHESIZED(X) \ ++ FFI_ASSIGN_OR_RETURN_ESCAPE(FFI_ASSIGN_OR_RETURN_EMPTY X) ++#define FFI_ASSIGN_OR_RETURN_EMPTY(...) FFI_ASSIGN_OR_RETURN_EMPTY __VA_ARGS__ ++#define FFI_ASSIGN_OR_RETURN_ESCAPE(...) \ ++ FFI_ASSIGN_OR_RETURN_ESCAPE_(__VA_ARGS__) ++#define FFI_ASSIGN_OR_RETURN_ESCAPE_(...) FFI_ASSIGN_OR_RETURN_##__VA_ARGS__ ++#define FFI_ASSIGN_OR_RETURN_FFI_ASSIGN_OR_RETURN_EMPTY + + template + inline absl::StatusOr MaybeCastNoOverflow( From 94d311d09ee8ab0107d28689e9d1f33aa1af7384 Mon Sep 17 00:00:00 2001 From: Uwe Korn Date: Mon, 30 Sep 2024 19:30:55 +0000 Subject: [PATCH 08/11] Remove cudnn_path --- recipe/build.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/recipe/build.sh b/recipe/build.sh index 646ed0e3..b857a0f5 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -41,7 +41,6 @@ if [[ "${cuda_compiler_version:-None}" != "None" ]]; then CUDA_ARGS="--enable_cuda \ --enable_nccl \ - --cudnn_path=${PREFIX} \ --cuda_compute_capabilities=$TF_CUDA_COMPUTE_CAPABILITIES \ --cuda_version=$TF_CUDA_VERSION \ --cudnn_version=$TF_CUDNN_VERSION" From 4ad204f95a13c958f1fcf0c5274ea1ec085d2843 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Tue, 8 Oct 2024 15:04:22 +0000 Subject: [PATCH 09/11] Use local hermetic Python --- recipe/build.sh | 15 +- recipe/meta.yaml | 3 +- .../0001-Allow-for-custom-CUDA-build.patch | 4 +- ...002-Consolidated-build-fixes-for-XLA.patch | 4 +- ...n-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch | 62 ++++++++ .../0004-Fix-XLA_FFIR_REGISTER-macros.patch | 147 ++++++++++++++++++ 6 files changed, 227 insertions(+), 8 deletions(-) create mode 100644 recipe/patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch create mode 100644 recipe/patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch diff --git a/recipe/build.sh b/recipe/build.sh index b857a0f5..1ab9ef98 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -16,16 +16,25 @@ export CXXFLAGS="${CXXFLAGS} -DNDEBUG" if [[ "${cuda_compiler_version:-None}" != "None" ]]; then if [[ ${cuda_compiler_version} == 11.8 ]]; then - export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_50,sm_60,sm_62,sm_70,sm_72,sm_75,sm_80,sm_86,sm_87,sm_89,sm_90,compute_90 + export HERMETIC_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_50,sm_60,sm_62,sm_70,sm_72,sm_75,sm_80,sm_86,sm_87,sm_89,sm_90,compute_90 export TF_CUDA_PATHS="${CUDA_HOME},${PREFIX}" elif [[ ${cuda_compiler_version} == 12* ]]; then - export TF_CUDA_COMPUTE_CAPABILITIES=sm_60,sm_70,sm_75,sm_80,sm_86,sm_89,sm_90,compute_90 + export HERMETIC_CUDA_COMPUTE_CAPABILITIES=sm_60,sm_70,sm_75,sm_80,sm_86,sm_89,sm_90,compute_90 export CUDA_HOME="${BUILD_PREFIX}/targets/x86_64-linux" export TF_CUDA_PATHS="${BUILD_PREFIX}/targets/x86_64-linux,${PREFIX}/targets/x86_64-linux" # Needed for some nvcc binaries export PATH=$PATH:${BUILD_PREFIX}/nvvm/bin # XLA can only cope with a single cuda header include directory, merge both rsync -a ${PREFIX}/targets/x86_64-linux/include/ ${BUILD_PREFIX}/targets/x86_64-linux/include/ + + # Although XLA supports a non-hermetic build, it still tries to find headers in the hermetic locations. + rm -rf ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party + mkdir -p ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party/gpus/cuda/extras/CUPTI + cp -r ${PREFIX}/targets/x86_64-linux/include ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party/gpus/cuda/ + cp -r ${PREFIX}/targets/x86_64-linux/include ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party/gpus/cuda/extras/CUPTI/ + export LOCAL_CUDA_PATH="${BUILD_PREFIX}/targets/x86_64-linux" + export LOCAL_CUDNN_PATH="${PREFIX}" + export LOCAL_NCCL_PATH="${PREFIX}" else echo "unsupported cuda version." exit 1 @@ -41,7 +50,7 @@ if [[ "${cuda_compiler_version:-None}" != "None" ]]; then CUDA_ARGS="--enable_cuda \ --enable_nccl \ - --cuda_compute_capabilities=$TF_CUDA_COMPUTE_CAPABILITIES \ + --cuda_compute_capabilities=$HERMETIC_CUDA_COMPUTE_CAPABILITIES \ --cuda_version=$TF_CUDA_VERSION \ --cudnn_version=$TF_CUDNN_VERSION" fi diff --git a/recipe/meta.yaml b/recipe/meta.yaml index c7da6780..09a9741e 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -17,7 +17,8 @@ source: patches: - patches/0001-Allow-for-custom-CUDA-build.patch - patches/0002-Consolidated-build-fixes-for-XLA.patch - - patches/c61e49cd4a6c58b3b9823a32fe1320d65c98c45d.patch + - patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch + - patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch build: number: {{ build }} diff --git a/recipe/patches/0001-Allow-for-custom-CUDA-build.patch b/recipe/patches/0001-Allow-for-custom-CUDA-build.patch index c4e2e708..1d37a1ff 100644 --- a/recipe/patches/0001-Allow-for-custom-CUDA-build.patch +++ b/recipe/patches/0001-Allow-for-custom-CUDA-build.patch @@ -1,7 +1,7 @@ -From 96762e9ab2ab0cf89354b25da2d0738df4d2dc1a Mon Sep 17 00:00:00 2001 +From 6464d446efa93f9449d49a1efb7eeca695a7f7af Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Sun, 8 Oct 2023 19:34:34 +0200 -Subject: [PATCH 1/2] Allow for custom CUDA build +Subject: [PATCH 1/4] Allow for custom CUDA build --- build/build.py | 5 +++++ diff --git a/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch b/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch index 9e241ec7..b10a51a8 100644 --- a/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch +++ b/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch @@ -1,7 +1,7 @@ -From 7bf6376e95bd3b61204f8f01644814874293a2c2 Mon Sep 17 00:00:00 2001 +From 5bb8f68ae81cf4e0484ccaaf399e569da030a76a Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 14 Dec 2023 17:06:15 +0100 -Subject: [PATCH 2/2] Consolidated build fixes for XLA +Subject: [PATCH 2/4] Consolidated build fixes for XLA jax vendors xla, but only populates the sources through bazel, so we cannot patch as usual through conda, but rather need to teach the bazel build file diff --git a/recipe/patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch b/recipe/patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch new file mode 100644 index 00000000..582cfcc0 --- /dev/null +++ b/recipe/patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch @@ -0,0 +1,62 @@ +From 4576ed73aa2e53d7a235b9f9121b02411c5749c5 Mon Sep 17 00:00:00 2001 +From: Dan Foreman-Mackey +Date: Tue, 17 Sep 2024 11:22:49 -0700 +Subject: [PATCH 3/4] Simplify logic in jaxlib FFI_ASSIGN_OR_RETURN macro, and + fix gcc build. + +In https://github.com/google/jax/issues/23687, it was reported that recent jaxlib changes introduced issues when building from source using gcc, instead of the clang build that we test. I'm not 100% sure why the previous macro didn't work, but in investigating I found a version that seems to work on both clang and gcc with simpler logic. + +PiperOrigin-RevId: 675641259 +--- + jaxlib/ffi_helpers.h | 38 +++++++++----------------------------- + 1 file changed, 9 insertions(+), 29 deletions(-) + +diff --git a/jaxlib/ffi_helpers.h b/jaxlib/ffi_helpers.h +index fba57d1..4750502 100644 +--- a/jaxlib/ffi_helpers.h ++++ b/jaxlib/ffi_helpers.h +@@ -62,35 +62,15 @@ namespace jax { + FFI_ASSIGN_OR_RETURN_CONCAT_INNER_(x, y) + + // All the macros below here are to handle the case in FFI_ASSIGN_OR_RETURN +-// where the LHS is wrapped in parentheses. +-#define FFI_ASSIGN_OR_RETURN_EAT(...) +-#define FFI_ASSIGN_OR_RETURN_REM(...) __VA_ARGS__ +-#define FFI_ASSIGN_OR_RETURN_EMPTY() +- +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER(...) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER_HELPER((__VA_ARGS__, 0, 1)) +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER_HELPER(args) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER_I args +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER_I(e0, e1, is_empty, ...) is_empty +- +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY(...) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY_I(__VA_ARGS__) +-#define FFI_ASSIGN_OR_RETURN_IS_EMPTY_I(...) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY_INNER(_, ##__VA_ARGS__) +- +-#define FFI_ASSIGN_OR_RETURN_IF_1(_Then, _Else) _Then +-#define FFI_ASSIGN_OR_RETURN_IF_0(_Then, _Else) _Else +-#define FFI_ASSIGN_OR_RETURN_IF(_Cond, _Then, _Else) \ +- FFI_ASSIGN_OR_RETURN_CONCAT_(FFI_ASSIGN_OR_RETURN_IF_, _Cond)(_Then, _Else) +- +-#define FFI_ASSIGN_OR_RETURN_IS_PARENTHESIZED(...) \ +- FFI_ASSIGN_OR_RETURN_IS_EMPTY(FFI_ASSIGN_OR_RETURN_EAT __VA_ARGS__) +- +-#define FFI_ASSIGN_OR_RETURN_UNPARENTHESIZE_IF_PARENTHESIZED(...) \ +- FFI_ASSIGN_OR_RETURN_IF(FFI_ASSIGN_OR_RETURN_IS_PARENTHESIZED(__VA_ARGS__), \ +- FFI_ASSIGN_OR_RETURN_REM, \ +- FFI_ASSIGN_OR_RETURN_EMPTY()) \ +- __VA_ARGS__ ++// where the LHS is wrapped in parentheses. See a more detailed discussion at ++// https://stackoverflow.com/a/62984543 ++#define FFI_ASSIGN_OR_RETURN_UNPARENTHESIZE_IF_PARENTHESIZED(X) \ ++ FFI_ASSIGN_OR_RETURN_ESCAPE(FFI_ASSIGN_OR_RETURN_EMPTY X) ++#define FFI_ASSIGN_OR_RETURN_EMPTY(...) FFI_ASSIGN_OR_RETURN_EMPTY __VA_ARGS__ ++#define FFI_ASSIGN_OR_RETURN_ESCAPE(...) \ ++ FFI_ASSIGN_OR_RETURN_ESCAPE_(__VA_ARGS__) ++#define FFI_ASSIGN_OR_RETURN_ESCAPE_(...) FFI_ASSIGN_OR_RETURN_##__VA_ARGS__ ++#define FFI_ASSIGN_OR_RETURN_FFI_ASSIGN_OR_RETURN_EMPTY + + template + inline absl::StatusOr MaybeCastNoOverflow( diff --git a/recipe/patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch b/recipe/patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch new file mode 100644 index 00000000..fc2febbf --- /dev/null +++ b/recipe/patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch @@ -0,0 +1,147 @@ +From d209bfd9298f14f2949366e7747a0f65da72d9d9 Mon Sep 17 00:00:00 2001 +From: "Uwe L. Korn" +Date: Tue, 8 Oct 2024 12:57:07 +0000 +Subject: [PATCH 4/4] Fix XLA_FFIR_REGISTER macros + +--- + ..._FFI_REGISTER_-macros-global-qualifi.patch | 118 ++++++++++++++++++ + third_party/xla/workspace.bzl | 1 + + 2 files changed, 119 insertions(+) + create mode 100644 third_party/xla/0005-PR-17477-Fix-XLA_FFI_REGISTER_-macros-global-qualifi.patch + +diff --git a/third_party/xla/0005-PR-17477-Fix-XLA_FFI_REGISTER_-macros-global-qualifi.patch b/third_party/xla/0005-PR-17477-Fix-XLA_FFI_REGISTER_-macros-global-qualifi.patch +new file mode 100644 +index 0000000..8fc06e0 +--- /dev/null ++++ b/third_party/xla/0005-PR-17477-Fix-XLA_FFI_REGISTER_-macros-global-qualifi.patch +@@ -0,0 +1,118 @@ ++From 6b73d321ad45ca86cba50a308f12215a6f96ee28 Mon Sep 17 00:00:00 2001 ++From: Alexander Pivovarov ++Date: Mon, 23 Sep 2024 00:35:34 -0700 ++Subject: [PATCH 5/5] PR #17477: Fix XLA_FFI_REGISTER_ macros - global ++ qualification of class name is invalid ++ ++Imported from GitHub PR https://github.com/openxla/xla/pull/17477 ++ ++Currently `bazel test //xla/ffi/api:ffi_test` fails with compilation error: ++```bash ++In file included from ./xla/ffi/api/ffi.h:48, ++ from xla/ffi/api/ffi_test.cc:16: ++./xla/ffi/api/api.h:1774:38: error: global qualification of class name is invalid before '{' token ++ 1774 | struct ::xla::ffi::AttrDecoding { \ ++ | ^ ++xla/ffi/api/ffi_test.cc:71:1: note: in expansion of macro 'XLA_FFI_REGISTER_ENUM_ATTR_DECODING' ++ 71 | XLA_FFI_REGISTER_ENUM_ATTR_DECODING(::xla::ffi::Int32BasedEnum); ++ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++``` ++ ++To solve "global qualification of class name is invalid" issue we can add `namespace xla::ffi { ` block to the macros and remove `::xla::ffi::` prefix in struct decls inside `XLA_FFI_REGISTER_*` macros ++ ++### Testing ++``` ++bazel test //xla/ffi/... ++ ++INFO: Build completed successfully, 47 total actions ++//xla/ffi:ffi_test PASSED in 0.5s ++//xla/ffi:call_frame_test PASSED in 0.1s ++//xla/ffi:execution_context_test PASSED in 0.1s ++//xla/ffi:execution_state_test PASSED in 0.1s ++//xla/ffi:type_id_registry_test PASSED in 0.1s ++//xla/ffi/api:ffi_test PASSED in 0.5s ++ ++Executed 6 out of 6 tests: 6 tests pass. ++``` ++ ++### Related links: ++- https://github.com/openxla/xla/pull/15747 ++- https://github.com/openxla/xla/commit/ef49d057bffd4b8ff14bda925d48ea7610aaa856 ++ ++Copybara import of the project: ++ ++-- ++fffa62b2d47feb915c0c6300b0af5540974911d4 by Alexander Pivovarov : ++ ++Fix XLA_FFI_REGISTER_ macros ++ ++Merging this change closes #17477 ++ ++COPYBARA_INTEGRATE_REVIEW=https://github.com/openxla/xla/pull/17477 from apivovarov:fix_XLA_FFI_REGISTER_macro fffa62b2d47feb915c0c6300b0af5540974911d4 ++PiperOrigin-RevId: 677666742 ++--- ++ xla/ffi/api/api.h | 20 ++++++++++++++------ ++ 1 file changed, 14 insertions(+), 6 deletions(-) ++ ++diff --git a/xla/ffi/api/api.h b/xla/ffi/api/api.h ++index 8e3774f45c..914a1a8697 100644 ++--- a/xla/ffi/api/api.h +++++ b/xla/ffi/api/api.h ++@@ -1678,13 +1678,14 @@ auto DictionaryDecoder(Members... m) { ++ // binding specification inference from a callable signature. ++ // ++ #define XLA_FFI_REGISTER_STRUCT_ATTR_DECODING(T, ...) \ +++ namespace xla::ffi { \ ++ template <> \ ++- struct ::xla::ffi::AttrsBinding { \ +++ struct AttrsBinding { \ ++ using Attrs = T; \ ++ }; \ ++ \ ++ template <> \ ++- struct ::xla::ffi::AttrDecoding { \ +++ struct AttrDecoding { \ ++ using Type = T; \ ++ static std::optional Decode(XLA_FFI_AttrType type, void* attr, \ ++ DiagnosticEngine& diagnostic) { \ ++@@ -1699,13 +1700,17 @@ auto DictionaryDecoder(Members... m) { ++ reinterpret_cast(attr), \ ++ internal::StructMemberNames(__VA_ARGS__), diagnostic); \ ++ } \ ++- } +++ }; \ +++ } /* namespace xla::ffi */ \ +++ static_assert(std::is_class_v<::xla::ffi::AttrsBinding>); \ +++ static_assert(std::is_class_v<::xla::ffi::AttrDecoding>) ++ ++ // Registers decoding for a user-defined enum class type. Uses enums underlying ++ // type to decode the attribute as a scalar value and cast it to the enum type. ++ #define XLA_FFI_REGISTER_ENUM_ATTR_DECODING(T) \ +++ namespace xla::ffi { \ ++ template <> \ ++- struct ::xla::ffi::AttrDecoding { \ +++ struct AttrDecoding { \ ++ using Type = T; \ ++ using U = std::underlying_type_t; \ ++ static_assert(std::is_enum::value, "Expected enum class"); \ ++@@ -1718,7 +1723,8 @@ auto DictionaryDecoder(Members... m) { ++ } \ ++ \ ++ auto* scalar = reinterpret_cast(attr); \ ++- auto expected_dtype = internal::NativeTypeToCApiDataType(); \ +++ auto expected_dtype = \ +++ ::xla::ffi::internal::NativeTypeToCApiDataType(); \ ++ if (XLA_FFI_PREDICT_FALSE(scalar->dtype != expected_dtype)) { \ ++ return diagnostic.Emit("Wrong scalar data type: expected ") \ ++ << expected_dtype << " but got " << scalar->dtype; \ ++@@ -1727,7 +1733,9 @@ auto DictionaryDecoder(Members... m) { ++ auto underlying = *reinterpret_cast(scalar->value); \ ++ return static_cast(underlying); \ ++ } \ ++- }; +++ }; \ +++ } /* namespace xla::ffi */ \ +++ static_assert(std::is_class_v<::xla::ffi::AttrDecoding>) ++ ++ //===----------------------------------------------------------------------===// ++ // Helper macro for registering FFI implementations +diff --git a/third_party/xla/workspace.bzl b/third_party/xla/workspace.bzl +index 3b7afaf..013a020 100644 +--- a/third_party/xla/workspace.bzl ++++ b/third_party/xla/workspace.bzl +@@ -35,6 +35,7 @@ def repo(): + "//third_party/xla:0002-Fix-abseil-headers.patch", + "//third_party/xla:0003-Omit-usage-of-StrFormat.patch", + "//third_party/xla:0004-Add-missing-bits-absl-systemlib.patch", ++ "//third_party/xla:0005-PR-17477-Fix-XLA_FFI_REGISTER_-macros-global-qualifi.patch", + ], + ) + From 9c4c61c1136053fdde7d7686bd77c6b2136aef7a Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Wed, 9 Oct 2024 19:00:50 +0000 Subject: [PATCH 10/11] Fix CUDA 12 build --- recipe/build.sh | 22 +++++++++++------ recipe/meta.yaml | 1 + ...-Add-x64_windows-as-a-wheel-platform.patch | 24 ------------------- .../0001-Allow-for-custom-CUDA-build.patch | 4 ++-- ...002-Consolidated-build-fixes-for-XLA.patch | 4 ++-- ...n-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch | 4 ++-- .../0004-Fix-XLA_FFIR_REGISTER-macros.patch | 4 ++-- .../patches/0005-Add-missing-typename.patch | 22 +++++++++++++++++ 8 files changed, 46 insertions(+), 39 deletions(-) delete mode 100644 recipe/patches/0001-Add-x64_windows-as-a-wheel-platform.patch create mode 100644 recipe/patches/0005-Add-missing-typename.patch diff --git a/recipe/build.sh b/recipe/build.sh index 1ab9ef98..9844292e 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -17,24 +17,32 @@ export CXXFLAGS="${CXXFLAGS} -DNDEBUG" if [[ "${cuda_compiler_version:-None}" != "None" ]]; then if [[ ${cuda_compiler_version} == 11.8 ]]; then export HERMETIC_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_50,sm_60,sm_62,sm_70,sm_72,sm_75,sm_80,sm_86,sm_87,sm_89,sm_90,compute_90 - export TF_CUDA_PATHS="${CUDA_HOME},${PREFIX}" + export TF_CUDA_PATHS="${CUDA_HOME},${PREFIX}" elif [[ ${cuda_compiler_version} == 12* ]]; then export HERMETIC_CUDA_COMPUTE_CAPABILITIES=sm_60,sm_70,sm_75,sm_80,sm_86,sm_89,sm_90,compute_90 export CUDA_HOME="${BUILD_PREFIX}/targets/x86_64-linux" export TF_CUDA_PATHS="${BUILD_PREFIX}/targets/x86_64-linux,${PREFIX}/targets/x86_64-linux" - # Needed for some nvcc binaries - export PATH=$PATH:${BUILD_PREFIX}/nvvm/bin - # XLA can only cope with a single cuda header include directory, merge both - rsync -a ${PREFIX}/targets/x86_64-linux/include/ ${BUILD_PREFIX}/targets/x86_64-linux/include/ + # Needed for some nvcc binaries + export PATH=$PATH:${BUILD_PREFIX}/nvvm/bin + # XLA can only cope with a single cuda header include directory, merge both + rsync -a ${PREFIX}/targets/x86_64-linux/include/ ${BUILD_PREFIX}/targets/x86_64-linux/include/ # Although XLA supports a non-hermetic build, it still tries to find headers in the hermetic locations. + # We do this in the BUILD_PREFIX to not have any impact on the resulting jaxlib package. + # Otherwise, these copied files would be included in the package. rm -rf ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party mkdir -p ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party/gpus/cuda/extras/CUPTI cp -r ${PREFIX}/targets/x86_64-linux/include ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party/gpus/cuda/ cp -r ${PREFIX}/targets/x86_64-linux/include ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party/gpus/cuda/extras/CUPTI/ + mkdir -p ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party/gpus/cudnn + cp ${PREFIX}/include/cudnn.h ${BUILD_PREFIX}/targets/x86_64-linux/include/third_party/gpus/cudnn/ export LOCAL_CUDA_PATH="${BUILD_PREFIX}/targets/x86_64-linux" - export LOCAL_CUDNN_PATH="${PREFIX}" - export LOCAL_NCCL_PATH="${PREFIX}" + export LOCAL_CUDNN_PATH="${PREFIX}/targets/x86_64-linux" + export LOCAL_NCCL_PATH="${PREFIX}/targets/x86_64-linux" + mkdir -p ${BUILD_PREFIX}/targets/x86_64-linux/bin + ln -s $(which ptxas) ${BUILD_PREFIX}/targets/x86_64-linux/bin/ptxas + ln -s $(which nvlink) ${BUILD_PREFIX}/targets/x86_64-linux/bin/nvlink + ln -s $(which fatbinary) ${BUILD_PREFIX}/targets/x86_64-linux/bin/fatbinary else echo "unsupported cuda version." exit 1 diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 09a9741e..165f6ef5 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -19,6 +19,7 @@ source: - patches/0002-Consolidated-build-fixes-for-XLA.patch - patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch - patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch + - patches/0005-Add-missing-typename.patch build: number: {{ build }} diff --git a/recipe/patches/0001-Add-x64_windows-as-a-wheel-platform.patch b/recipe/patches/0001-Add-x64_windows-as-a-wheel-platform.patch deleted file mode 100644 index d0a4f5e8..00000000 --- a/recipe/patches/0001-Add-x64_windows-as-a-wheel-platform.patch +++ /dev/null @@ -1,24 +0,0 @@ -From e03061546d4b95610465b22227e36ebcdd36c728 Mon Sep 17 00:00:00 2001 -From: "Uwe L. Korn" -Date: Thu, 18 Jul 2024 15:55:38 +0200 -Subject: [PATCH] Add x64_windows as a wheel platform - ---- - build/build.py | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/build/build.py b/build/build.py -index 0c25cea..db4a2a4 100755 ---- a/build/build.py -+++ b/build/build.py -@@ -575,6 +575,7 @@ def main(): - "darwin_x86_64": "x86_64", - "ppc": "ppc64le", - "aarch64": "aarch64", -+ "x64_windows": "x64_windows", - } - # TODO(phawkins): support other bazel cpu overrides. - wheel_cpu = (wheel_cpus[args.target_cpu] if args.target_cpu is not None --- -2.39.3 (Apple Git-146) - diff --git a/recipe/patches/0001-Allow-for-custom-CUDA-build.patch b/recipe/patches/0001-Allow-for-custom-CUDA-build.patch index 1d37a1ff..1c1b9f32 100644 --- a/recipe/patches/0001-Allow-for-custom-CUDA-build.patch +++ b/recipe/patches/0001-Allow-for-custom-CUDA-build.patch @@ -1,7 +1,7 @@ -From 6464d446efa93f9449d49a1efb7eeca695a7f7af Mon Sep 17 00:00:00 2001 +From 1ec53ea591323e47c8ce53ed9b0736e98784ff68 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Sun, 8 Oct 2023 19:34:34 +0200 -Subject: [PATCH 1/4] Allow for custom CUDA build +Subject: [PATCH 1/5] Allow for custom CUDA build --- build/build.py | 5 +++++ diff --git a/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch b/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch index b10a51a8..364f7697 100644 --- a/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch +++ b/recipe/patches/0002-Consolidated-build-fixes-for-XLA.patch @@ -1,7 +1,7 @@ -From 5bb8f68ae81cf4e0484ccaaf399e569da030a76a Mon Sep 17 00:00:00 2001 +From a7e732c129f51d16dd59b353e8c66ceae9b5529c Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 14 Dec 2023 17:06:15 +0100 -Subject: [PATCH 2/4] Consolidated build fixes for XLA +Subject: [PATCH 2/5] Consolidated build fixes for XLA jax vendors xla, but only populates the sources through bazel, so we cannot patch as usual through conda, but rather need to teach the bazel build file diff --git a/recipe/patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch b/recipe/patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch index 582cfcc0..3d4e600c 100644 --- a/recipe/patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch +++ b/recipe/patches/0003-Simplify-logic-in-jaxlib-FFI_ASSIGN_OR_RETURN-macro-.patch @@ -1,7 +1,7 @@ -From 4576ed73aa2e53d7a235b9f9121b02411c5749c5 Mon Sep 17 00:00:00 2001 +From 69f982e2ce5408b0d782545e346c9c100b916b8b Mon Sep 17 00:00:00 2001 From: Dan Foreman-Mackey Date: Tue, 17 Sep 2024 11:22:49 -0700 -Subject: [PATCH 3/4] Simplify logic in jaxlib FFI_ASSIGN_OR_RETURN macro, and +Subject: [PATCH 3/5] Simplify logic in jaxlib FFI_ASSIGN_OR_RETURN macro, and fix gcc build. In https://github.com/google/jax/issues/23687, it was reported that recent jaxlib changes introduced issues when building from source using gcc, instead of the clang build that we test. I'm not 100% sure why the previous macro didn't work, but in investigating I found a version that seems to work on both clang and gcc with simpler logic. diff --git a/recipe/patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch b/recipe/patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch index fc2febbf..4c6e42e7 100644 --- a/recipe/patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch +++ b/recipe/patches/0004-Fix-XLA_FFIR_REGISTER-macros.patch @@ -1,7 +1,7 @@ -From d209bfd9298f14f2949366e7747a0f65da72d9d9 Mon Sep 17 00:00:00 2001 +From 39e78f89a5c0c92c5e8591aa458de268493b829d Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Tue, 8 Oct 2024 12:57:07 +0000 -Subject: [PATCH 4/4] Fix XLA_FFIR_REGISTER macros +Subject: [PATCH 4/5] Fix XLA_FFIR_REGISTER macros --- ..._FFI_REGISTER_-macros-global-qualifi.patch | 118 ++++++++++++++++++ diff --git a/recipe/patches/0005-Add-missing-typename.patch b/recipe/patches/0005-Add-missing-typename.patch new file mode 100644 index 00000000..6b3be9ba --- /dev/null +++ b/recipe/patches/0005-Add-missing-typename.patch @@ -0,0 +1,22 @@ +From 86454745f436411956850d19238bfd3b55aa2a7f Mon Sep 17 00:00:00 2001 +From: "Uwe L. Korn" +Date: Wed, 9 Oct 2024 09:39:46 +0000 +Subject: [PATCH 5/5] Add missing typename + +--- + jaxlib/gpu/solver_kernels_ffi.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/jaxlib/gpu/solver_kernels_ffi.cc b/jaxlib/gpu/solver_kernels_ffi.cc +index 3c74b85..e839494 100644 +--- a/jaxlib/gpu/solver_kernels_ffi.cc ++++ b/jaxlib/gpu/solver_kernels_ffi.cc +@@ -618,7 +618,7 @@ ffi::Error SyevdImpl(int64_t batch, int64_t size, gpuStream_t stream, + + auto a_data = static_cast(a.untyped_data()); + auto out_data = static_cast(out->untyped_data()); +- auto w_data = static_cast::Type*>(w->untyped_data()); ++ auto w_data = static_cast::Type*>(w->untyped_data()); + auto info_data = info->typed_data(); + if (a_data != out_data) { + FFI_RETURN_IF_ERROR_STATUS(JAX_AS_STATUS(gpuMemcpyAsync( From 8b8ab4ae0fe70d046ebd63dd9e7201723c6d38ed Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 10 Oct 2024 10:14:43 +0200 Subject: [PATCH 11/11] Apply fixes from https://github.com/conda-forge/jaxlib-feedstock/issues/283 --- recipe/meta.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 165f6ef5..b927b56c 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -60,6 +60,7 @@ requirements: - cuda-cudart-dev # [(cuda_compiler_version or "").startswith("12")] - cuda-nvml-dev # [(cuda_compiler_version or "").startswith("12")] - cuda-nvtx-dev # [(cuda_compiler_version or "").startswith("12")] + - cuda-nvcc-tools # [(cuda_compiler_version or "").startswith("12")] - libcublas-dev # [(cuda_compiler_version or "").startswith("12")] - libcusolver-dev # [(cuda_compiler_version or "").startswith("12")] - libcurand-dev # [(cuda_compiler_version or "").startswith("12")] @@ -86,9 +87,7 @@ requirements: - scipy >=1.9 - ml_dtypes >=0.2.0 - __cuda # [cuda_compiler_version != "None"] - - cuda-nvcc # [(cuda_compiler_version or "").startswith("12")] - # Workaround for https://github.com/conda-forge/cuda-cupti-feedstock/issues/14 - - cuda-cupti >=12.0.90,<13.0a0 # [(cuda_compiler_version or "").startswith("12")] + - cuda-nvcc-tools # [(cuda_compiler_version or "").startswith("12")] run_constrained: - jax >={{ version }}