diff --git a/.github/labeler.yml b/.github/labeler.yml index 21b34c23ba..1640f4904a 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -31,7 +31,7 @@ loader: opencl: - changed-files: - any-glob-to-any-file: - - source/adapter/opencl/** + - source/adapters/opencl/** level-zero: - changed-files: diff --git a/.github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch b/.github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch deleted file mode 100644 index 9738942aa4..0000000000 --- a/.github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch +++ /dev/null @@ -1,27 +0,0 @@ -From b5179dc4822eaab192361da05aa95d98f523960f Mon Sep 17 00:00:00 2001 -From: Lukasz Dorau -Date: Mon, 7 May 2018 12:05:40 +0200 -Subject: [PATCH] travis: fix travisci_build_coverity_scan.sh - ---- - travisci_build_coverity_scan.sh | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/travisci_build_coverity_scan.sh b/travisci_build_coverity_scan.sh -index ad9d4afcf..562b08bcc 100644 ---- a/travisci_build_coverity_scan.sh -+++ b/travisci_build_coverity_scan.sh -@@ -92,8 +92,8 @@ response=$(curl \ - --form description="Travis CI build" \ - $UPLOAD_URL) - status_code=$(echo "$response" | sed -n '$p') --if [ "$status_code" != "201" ]; then -+if [ "$status_code" != "200" ]; then - TEXT=$(echo "$response" | sed '$d') -- echo -e "\033[33;1mCoverity Scan upload failed: $TEXT.\033[0m" -+ echo -e "\033[33;1mCoverity Scan upload failed: $response.\033[0m" - exit 1 - fi --- -2.13.6 - diff --git a/.github/workflows/benchmarks-nightly.yml b/.github/workflows/benchmarks-nightly.yml new file mode 100644 index 0000000000..3da0d09c7a --- /dev/null +++ b/.github/workflows/benchmarks-nightly.yml @@ -0,0 +1,38 @@ +name: Compute Benchmarks Nightly + +on: + schedule: + - cron: '0 0 * * *' # Runs at midnight UTC every day + +permissions: + contents: read + pull-requests: write + +jobs: + nightly: + name: Compute Benchmarks Nightly level-zero + uses: ./.github/workflows/benchmarks-reusable.yml + with: + str_name: 'level_zero' + unit: 'gpu' + pr_no: 0 + bench_script_params: '--save baseline' + sycl_config_params: '' + sycl_repo: 'intel/llvm' + sycl_commit: '' + + nightly2: + # we need to wait until previous job is done so that the html report + # contains both runs + needs: nightly + name: Compute Benchmarks Nightly level-zero v2 + uses: ./.github/workflows/benchmarks-reusable.yml + with: + str_name: 'level_zero_v2' + unit: 'gpu' + pr_no: 0 + bench_script_params: '--save baseline-v2' + sycl_config_params: '' + sycl_repo: 'intel/llvm' + sycl_commit: '' + upload_report: true diff --git a/.github/workflows/benchmarks_compute.yml b/.github/workflows/benchmarks-reusable.yml similarity index 79% rename from .github/workflows/benchmarks_compute.yml rename to .github/workflows/benchmarks-reusable.yml index ee74a52ad0..79cb35748e 100644 --- a/.github/workflows/benchmarks_compute.yml +++ b/.github/workflows/benchmarks-reusable.yml @@ -1,50 +1,39 @@ -name: Compute Benchmarks +name: Benchmarks Reusable on: - # Can be triggered via manual "dispatch" (from workflow view in GitHub Actions tab) - workflow_dispatch: - # acceptable input for adapter-specific runs + workflow_call: inputs: str_name: - description: Formatted adapter name - type: choice required: true - default: 'level_zero' - options: - - level_zero - - level_zero_v2 + type: string unit: - description: Test unit (cpu/gpu) - type: choice required: true - default: 'gpu' - options: - - cpu - - gpu + type: string pr_no: - description: PR number (if 0, it'll run on the main) - type: number required: true - bench_script_params: - description: Parameters passed to script executing benchmark + # even though this is a number, this is a workaround for issues with + # reusable workflow calls that result in "Unexpected value '0'" error. type: string + bench_script_params: required: false + type: string default: '' sycl_config_params: - description: Extra params for SYCL configuration - type: string required: false + type: string default: '' sycl_repo: - description: 'Compiler repo' - type: string required: true + type: string default: 'intel/llvm' sycl_commit: - description: 'Compiler commit' - type: string required: false + type: string default: '' + upload_report: + required: false + type: boolean + default: false permissions: contents: read @@ -56,19 +45,17 @@ jobs: strategy: matrix: adapter: [ - {str_name: "${{inputs.str_name}}", - sycl_config: "${{inputs.sycl_config_params}}", - unit: "${{inputs.unit}}" + {str_name: "${{ inputs.str_name }}", + sycl_config: "${{ inputs.sycl_config_params }}", + unit: "${{ inputs.unit }}" } ] build_type: [Release] compiler: [{c: clang, cxx: clang++}] - runs-on: "${{inputs.str_name}}_PERF" + runs-on: "${{ inputs.str_name }}_PERF" steps: - # Workspace on self-hosted runners is not cleaned automatically. - # We have to delete the files created outside of using actions. - name: Cleanup self-hosted workspace if: always() run: | @@ -99,7 +86,8 @@ jobs: path: ur-repo - name: Install pip packages - run: pip install -r ${{github.workspace}}/ur-repo/third_party/requirements.txt + run: | + pip install --force-reinstall -r ${{github.workspace}}/ur-repo/third_party/benchmark_requirements.txt # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. - name: Fetch PR's merge commit @@ -169,13 +157,15 @@ jobs: run: cmake --install ${{github.workspace}}/ur_build - name: Run benchmarks + working-directory: ${{ github.workspace }}/ur-repo/ id: benchmarks run: > - numactl -N 0 ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py - ~/bench_workdir - ${{github.workspace}}/sycl_build - ${{github.workspace}}/ur_install - ${{ matrix.adapter.str_name }} + numactl -N 0 ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py + ~/bench_workdir + --sycl ${{ github.workspace }}/sycl_build + --ur ${{ github.workspace }}/ur_install + --adapter ${{ matrix.adapter.str_name }} + ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.bench_script_params }} - name: Add comment to PR @@ -186,7 +176,7 @@ jobs: let markdown = "" try { const fs = require('fs'); - markdown = fs.readFileSync('benchmark_results.md', 'utf8'); + markdown = fs.readFileSync('ur-repo/benchmark_results.md', 'utf8'); } catch(err) { } @@ -204,3 +194,10 @@ jobs: repo: context.repo.repo, body: body }) + + - name: Upload HTML report + if: ${{ always() && inputs.upload_report }} + uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + with: + path: ur-repo/benchmark_results.html + key: benchmark-results-${{ matrix.adapter.str_name }}-${{ github.run_id }} diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 0000000000..af62d40e85 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,68 @@ +name: Compute Benchmarks + +on: + workflow_dispatch: + inputs: + str_name: + description: Formatted adapter name + type: choice + required: true + default: 'level_zero' + options: + - level_zero + - level_zero_v2 + unit: + description: Test unit (cpu/gpu) + type: choice + required: true + default: 'gpu' + options: + - cpu + - gpu + pr_no: + description: PR number (if 0, it'll run on the main) + type: number + required: true + bench_script_params: + description: Parameters passed to script executing benchmark + type: string + required: false + default: '' + sycl_config_params: + description: Extra params for SYCL configuration + type: string + required: false + default: '' + sycl_repo: + description: 'Compiler repo' + type: string + required: true + default: 'intel/llvm' + sycl_commit: + description: 'Compiler commit' + type: string + required: false + default: '' + upload_report: + description: 'Upload HTML report' + type: boolean + required: false + default: false + +permissions: + contents: read + pull-requests: write + +jobs: + manual: + name: Compute Benchmarks + uses: ./.github/workflows/benchmarks-reusable.yml + with: + str_name: ${{ inputs.str_name }} + unit: ${{ inputs.unit }} + pr_no: ${{ inputs.pr_no }} + bench_script_params: ${{ inputs.bench_script_params }} + sycl_config_params: ${{ inputs.sycl_config_params }} + sycl_repo: ${{ inputs.sycl_repo }} + sycl_commit: ${{ inputs.sycl_commit }} + upload_report: ${{ inputs.upload_report }} diff --git a/.github/workflows/build-fuzz-reusable.yml b/.github/workflows/build-fuzz-reusable.yml index ae589540fb..0c7da5d478 100644 --- a/.github/workflows/build-fuzz-reusable.yml +++ b/.github/workflows/build-fuzz-reusable.yml @@ -41,12 +41,14 @@ jobs: - name: Build level zero with gcc run: | - git clone -b v1.17.6 --depth=1 https://github.com/oneapi-src/level-zero.git ${{github.workspace}}/level-zero + git clone -b v1.18.5 --depth=1 https://github.com/oneapi-src/level-zero.git ${{github.workspace}}/level-zero cd ${{github.workspace}}/level-zero cmake -B build -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ cmake --build build -j $(nproc) - name: Configure CMake + # CFI sanitization (or flto?) seems to cause linking to fail + # https://github.com/oneapi-src/unified-runtime/issues/2323 run: > cmake -B${{github.workspace}}/build @@ -58,6 +60,7 @@ jobs: -DUR_USE_ASAN=ON -DUR_USE_UBSAN=ON -DUR_BUILD_ADAPTER_L0=ON + -DUR_USE_CFI=OFF -DUR_LEVEL_ZERO_LOADER_LIBRARY=${{github.workspace}}/level-zero/build/lib/libze_loader.so -DUR_LEVEL_ZERO_INCLUDE_DIR=${{github.workspace}}/level-zero/include/ -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml index 77f696b958..bc45f5ca9b 100644 --- a/.github/workflows/build-hw-reusable.yml +++ b/.github/workflows/build-hw-reusable.yml @@ -7,10 +7,15 @@ on: adapter_name: required: true type: string + other_adapter_name: + required: false + type: string + default: "" runner_name: required: true type: string platform: + description: "Platform string, `UR_CTS_ADAPTER_PLATFORM` will be set to this." required: false type: string default: "" @@ -39,9 +44,13 @@ jobs: if: github.repository == 'oneapi-src/unified-runtime' # run only on upstream; forks won't have the HW strategy: matrix: - adapter: [ - {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}", static_adapter: "${{inputs.static_loader}}"}, - ] + adapter: [{ + name: "${{inputs.adapter_name}}", + other_name: "${{inputs.other_adapter_name}}", + platform: "${{inputs.platform}}", + static_Loader: "${{inputs.static_loader}}", + static_adapter: "${{inputs.static_loader}}" + }] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] # TODO: The latest L0 loader segfaults when built with clang. @@ -73,6 +82,8 @@ jobs: tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler - name: Configure CMake + # CFI sanitization seems to fail on our CUDA nodes + # https://github.com/oneapi-src/unified-runtime/issues/2309 run: > cmake -B${{github.workspace}}/build @@ -82,8 +93,10 @@ jobs: -DUR_ENABLE_TRACING=ON -DUR_DEVELOPER_MODE=ON -DUR_BUILD_TESTS=ON - -DUR_CONFORMANCE_TEST_LOADER=OFF -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON + -DUR_CONFORMANCE_TEST_LOADER=${{ matrix.adapter.other_name != '' && 'ON' || 'OFF' }} + ${{ matrix.adapter.other_name != '' && format('-DUR_BUILD_ADAPTER_{0}=ON', matrix.adapter.other_name) || '' }} + -DUR_USE_CFI=${{ matrix.adapter.name == 'CUDA' && 'OFF' || 'ON' }} -DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}} -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}} -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ @@ -103,6 +116,8 @@ jobs: - name: Test adapter specific working-directory: ${{github.workspace}}/build run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" --timeout 180 + # Don't run adapter specific tests when building multiple adapters + if: ${{ matrix.adapter.other_name == '' }} - name: Test adapters working-directory: ${{github.workspace}}/build diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 8c0e3bac86..2912475272 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -187,6 +187,24 @@ jobs: adapter_name: NATIVE_CPU runner_name: NATIVE_CPU + # Native CPU jobs are here to force the loader to be used (UR will not use the loader if there is only one target) + combined-opencl-native-cpu: + name: OpenCL + Native CPU (Loader) + uses: ./.github/workflows/build-hw-reusable.yml + with: + adapter_name: OPENCL + other_adapter_name: NATIVE_CPU + runner_name: OPENCL + platform: "OPENCL:Intel(R) OpenCL" + + combined-level-zero-native-cpu: + name: Level Zero + Native CPU (Loader) + uses: ./.github/workflows/build-hw-reusable.yml + with: + adapter_name: L0 + other_adapter_name: NATIVE_CPU + runner_name: L0 + e2e-level-zero: name: E2E L0 permissions: @@ -238,10 +256,12 @@ jobs: compiler: {c: clang-cl, cxx: clang-cl} build_type: [Debug, Release] - compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}] + # TODO: clang-cl seems to be fully broken (https://github.com/oneapi-src/unified-runtime/issues/2348) + #compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}] + compiler: [{c: cl, cxx: cl}] include: - - compiler: {c: clang-cl, cxx: clang-cl} - toolset: "-T ClangCL" + #- compiler: {c: clang-cl, cxx: clang-cl} + # toolset: "-T ClangCL" - os: 'windows-2022' adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} build_type: 'Release' diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index ba0230d600..d7d8bf937b 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -1,12 +1,5 @@ -# -# Copyright (C) 2023-2024 Intel Corporation -# -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# +# Coverity - static analysis build. It requires Coverity's token (set in CI's secret). name: coverity-unified-runtime -# It runs static analysis build - Coverity. It requires special token (set in CI's secret). on: workflow_dispatch: @@ -14,50 +7,75 @@ on: # Run every day at 22:00 UTC - cron: '0 22 * * *' -env: - WORKDIR: ${{ github.workspace }} - COVERITY_SCAN_NOTIFICATION_EMAIL: ${{ secrets.COVERITY_SCAN_NOTIFICATION_EMAIL }} - COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} - COVERITY_SCAN_PROJECT_NAME: ${{ github.repository }} - COVERITY_SCAN_BUILD_COMMAND: "cmake --build ${{github.workspace}}/build" - COVERITY_SCAN_BRANCH_PATTERN: "main" - TRAVIS_BRANCH: ${{ github.ref_name }} - permissions: contents: read jobs: - linux: + coverity: name: Coverity - runs-on: coverity + # run only on upstream; forks don't have token for upstream's cov project + if: github.repository == 'oneapi-src/unified-runtime' + runs-on: ubuntu-latest steps: - - name: Clone the git repo + - name: Checkout repository uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Install dependencies + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y libhwloc-dev libtbb-dev cuda-toolkit-12-6 - name: Install pip packages run: pip install -r third_party/requirements.txt + - name: Download Coverity + run: | + wget -O coverity_tool.tgz -nv https://scan.coverity.com/download/linux64 \ + --post-data "token=${{ secrets.COVERITY_SCAN_TOKEN }}&project=oneapi-src%2Funified-runtime" + + - name: Extract Coverity + run: tar xzf coverity_tool.tgz + + # TODO: enable HIP adapter as well (requires proper package(s) installation) - name: Configure CMake run: > cmake - -B $WORKDIR/build + -B ${{github.workspace}}/build + -DCMAKE_BUILD_TYPE=Release + -DUR_DEVELOPER_MODE=OFF + -DUR_FORMAT_CPP_STYLE=ON -DUR_ENABLE_TRACING=ON - -DUR_DEVELOPER_MODE=ON -DUR_BUILD_TESTS=ON - -DUMF_ENABLE_POOL_TRACKING=ON - -DUR_FORMAT_CPP_STYLE=ON - -DCMAKE_BUILD_TYPE=Debug -DUR_BUILD_ADAPTER_L0=ON -DUR_BUILD_ADAPTER_CUDA=ON - -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so + -DCUDA_CUDA_LIBRARY=/usr/local/cuda-12.6/targets/x86_64-linux/lib/stubs/libcuda.so -DUR_BUILD_ADAPTER_NATIVE_CPU=ON - -DUR_BUILD_ADAPTER_HIP=ON + -DUR_BUILD_ADAPTER_HIP=OFF -DUR_BUILD_ADAPTER_OPENCL=ON - - name: Run Coverity + - name: Build + run: | + export COVERITY_DIR=$(find . -maxdepth 1 -type d -name "cov-analysis-linux64-*" | head -n 1) + if [ -n "$COVERITY_DIR" ]; then + export PATH="$PATH:$COVERITY_DIR/bin" + fi + cov-build --dir ${{github.workspace}}/cov-int cmake --build ${{github.workspace}}/build --config Release -j$(nproc) + + - name: Create tarball to analyze + run: tar czvf cov-int_ur.tgz cov-int + + - name: Push tarball to scan run: | - cd $WORKDIR/build - wget https://scan.coverity.com/scripts/travisci_build_coverity_scan.sh - patch < "../.github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch" - bash ./travisci_build_coverity_scan.sh + BRANCH_NAME=$(echo ${GITHUB_REF_NAME}) + COMMIT_ID=$(echo $GITHUB_SHA) + curl --form token=${{ secrets.COVERITY_SCAN_TOKEN }} \ + --form email=bb-ur@intel.com \ + --form file=@cov-int_ur.tgz \ + --form version="$COMMIT_ID" \ + --form description="$BRANCH_NAME:$COMMIT_ID" \ + https://scan.coverity.com/builds\?project\=oneapi-src%2Funified-runtime diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 710aa659c8..b4c40334d4 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -45,7 +45,23 @@ jobs: - name: Build Documentation working-directory: ${{github.workspace}}/scripts - run: python3 run.py --core + run: | + python3 run.py --core + mkdir -p ${{ github.workspace }}/ur-repo/ + mkdir -p ${{github.workspace}}/docs/html + + - name: Download benchmark HTML + id: download-bench-html + uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + with: + path: ur-repo/benchmark_results.html + key: benchmark-results- + + - name: Move benchmark HTML + # exact or partial cache hit + if: steps.download-bench-html.outputs.cache-hit != '' + run: | + mv ${{ github.workspace }}/ur-repo/benchmark_results.html ${{ github.workspace }}/docs/html/ - name: Upload artifact uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 375f727d1c..bcbf0d7988 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,19 +16,6 @@ include(CheckCXXSourceCompiles) include(CMakePackageConfigHelpers) include(CTest) -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") -include(helpers) - -if(CMAKE_SYSTEM_NAME STREQUAL Darwin) - set(Python3_FIND_FRAMEWORK NEVER) - set(Python3_FIND_STRATEGY LOCATION) -endif() - -find_package(Python3 COMPONENTS Interpreter REQUIRED) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED YES) - # Build Options option(UR_BUILD_EXAMPLES "Build example applications." ON) option(UR_BUILD_TESTS "Build unit tests." ON) @@ -40,6 +27,7 @@ option(UR_USE_ASAN "enable AddressSanitizer" OFF) option(UR_USE_UBSAN "enable UndefinedBehaviorSanitizer" OFF) option(UR_USE_MSAN "enable MemorySanitizer" OFF) option(UR_USE_TSAN "enable ThreadSanitizer" OFF) +option(UR_USE_CFI "enable Control Flow Integrity checks (requires clang and implies -flto)" ON) option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF) option(UR_ENABLE_SANITIZER "enable device sanitizer" ON) option(UR_ENABLE_SYMBOLIZER "enable symoblizer for sanitizer" OFF) @@ -80,6 +68,19 @@ set(UR_ADAPTER_HIP_SOURCE_DIR "" CACHE PATH set(UR_ADAPTER_NATIVE_CPU_SOURCE_DIR "" CACHE PATH "Path to external 'native_cpu' adapter source dir") +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +include(helpers) + +if(CMAKE_SYSTEM_NAME STREQUAL Darwin) + set(Python3_FIND_FRAMEWORK NEVER) + set(Python3_FIND_STRATEGY LOCATION) +endif() + +find_package(Python3 COMPONENTS Interpreter REQUIRED) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED YES) + # There's little reason not to generate the compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS ON) diff --git a/README.md b/README.md index 262a861b9d..9f4eeef5ae 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,13 @@ [![Build and test](https://github.com/oneapi-src/unified-runtime/actions/workflows/cmake.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/cmake.yml) [![Bandit](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml) [![CodeQL](https://github.com/oneapi-src/unified-runtime/actions/workflows/codeql.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/codeql.yml) -[![Coverity](https://scan.coverity.com/projects/28213/badge.svg)](https://scan.coverity.com/projects/oneapi-src-unified-runtime) +[![Coverity build](https://github.com/oneapi-src/unified-runtime/actions/workflows/coverity.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-runtime/actions/workflows/coverity.yml) +[![Coverity report](https://scan.coverity.com/projects/28213/badge.svg)](https://scan.coverity.com/projects/oneapi-src-unified-runtime) [![Nightly](https://github.com/oneapi-src/unified-runtime/actions/workflows/nightly.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/nightly.yml) [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/unified-runtime/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/unified-runtime) [![Trivy](https://github.com/oneapi-src/unified-runtime/actions/workflows/trivy.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/trivy.yml) [![Deploy documentation to Pages](https://github.com/oneapi-src/unified-runtime/actions/workflows/docs.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/docs.yml) +[![Compute Benchmarks Nightly](https://github.com/oneapi-src/unified-runtime/actions/workflows/benchmarks-nightly.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/benchmarks-nightly.yml) @@ -128,6 +130,7 @@ List of options provided by CMake: | UR_USE_TSAN | Enable ThreadSanitizer | ON/OFF | OFF | | UR_USE_UBSAN | Enable UndefinedBehavior Sanitizer | ON/OFF | OFF | | UR_USE_MSAN | Enable MemorySanitizer (clang only) | ON/OFF | OFF | +| UR_USE_CFI | Enable Control Flow Integrity checks (clang only, also enables lto) | ON/OFF | ON | | UR_ENABLE_TRACING | Enable XPTI-based tracing layer | ON/OFF | OFF | | UR_ENABLE_SANITIZER | Enable device sanitizer layer | ON/OFF | ON | | UR_CONFORMANCE_TARGET_TRIPLES | SYCL triples to build CTS device binaries for | Comma-separated list | spir64 | diff --git a/cmake/FetchLevelZero.cmake b/cmake/FetchLevelZero.cmake index 75bd5db0e7..6d108c8a6f 100644 --- a/cmake/FetchLevelZero.cmake +++ b/cmake/FetchLevelZero.cmake @@ -40,7 +40,7 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) set(UR_LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") endif() if (UR_LEVEL_ZERO_LOADER_TAG STREQUAL "") - set(UR_LEVEL_ZERO_LOADER_TAG v1.17.39) + set(UR_LEVEL_ZERO_LOADER_TAG v1.19.2) endif() # Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104 diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index a6e3a344a4..d3c8a1aa85 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -63,6 +63,16 @@ if(CMAKE_SYSTEM_NAME STREQUAL Linux) check_cxx_compiler_flag("-fstack-clash-protection" CXX_HAS_FSTACK_CLASH_PROTECTION) endif() +if (UR_USE_CFI) + set(SAVED_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS "-flto -fvisibility=hidden") + check_cxx_compiler_flag("-fsanitize=cfi" CXX_HAS_CFI_SANITIZE) + set(CMAKE_REQUIRED_FLAGS ${SAVED_CMAKE_REQUIRED_FLAGS}) +else() + # If CFI checking is disabled, pretend we don't support it + set(CXX_HAS_CFI_SANITIZE OFF) +endif() + function(add_ur_target_compile_options name) if(NOT MSVC) target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) @@ -78,11 +88,10 @@ function(add_ur_target_compile_options name) # Hardening options -fPIC -fstack-protector-strong - -fvisibility=hidden # Required for -fsanitize=cfi - # -fsanitize=cfi requires -flto, which breaks a lot of things - # See: https://github.com/oneapi-src/unified-runtime/issues/2120 - # -flto - # $<$:-fsanitize=cfi> + -fvisibility=hidden + # cfi-icall requires called functions in shared libraries to also be built with cfi-icall, which we can't + # guarantee. -fsanitize=cfi depends on -flto + $<$:-flto -fsanitize=cfi -fno-sanitize=cfi-icall> $<$:-fcf-protection=full> $<$:-fstack-clash-protection> @@ -119,7 +128,10 @@ endfunction() function(add_ur_target_link_options name) if(NOT MSVC) if (NOT APPLE) - target_link_options(${name} PRIVATE "LINKER:-z,relro,-z,now,-z,noexecstack") + target_link_options(${name} PRIVATE + $<$:-flto -fsanitize=cfi -fno-sanitize=cfi-icall> + "LINKER:-z,relro,-z,now,-z,noexecstack" + ) if (UR_DEVELOPER_MODE) target_link_options(${name} PRIVATE -Werror -Wextra) endif() @@ -131,9 +143,9 @@ function(add_ur_target_link_options name) endif() elseif(MSVC) target_link_options(${name} PRIVATE - /DYNAMICBASE - /HIGHENTROPYVA - /NXCOMPAT + LINKER:/DYNAMICBASE + LINKER:/HIGHENTROPYVA + LINKER:/NXCOMPAT ) endif() endfunction() @@ -141,7 +153,7 @@ endfunction() function(add_ur_target_exec_options name) if(MSVC) target_link_options(${name} PRIVATE - /ALLOWISOLATION + LINKER:/ALLOWISOLATION ) endif() endfunction() @@ -159,7 +171,7 @@ function(add_ur_library name) add_ur_target_link_options(${name}) if(MSVC) target_link_options(${name} PRIVATE - $<$,link.exe>:/DEPENDENTLOADFLAG:0x2000> + $<$,link.exe>:LINKER:/DEPENDENTLOADFLAG:0x2000> ) endif() endfunction() diff --git a/include/ur_api.h b/include/ur_api.h index 844f191062..3205fcb207 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -230,6 +230,7 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP = 243, ///< Enumerator for ::urCommandBufferUpdateSignalEventExp UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp + UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, ///< Enumerator for ::urEnqueueEventsWaitWithBarrierExt /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -288,6 +289,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES = 0x2006, ///< ::ur_exp_sampler_cubemap_properties_t UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION = 0x2007, ///< ::ur_exp_image_copy_region_t UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES = 0x3000, ///< ::ur_exp_enqueue_native_command_properties_t + UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES = 0x4000, ///< ::ur_exp_enqueue_ext_properties_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1702,6 +1704,7 @@ typedef enum ur_device_info_t { ///< backed 2D sampled image data. UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native ///< work + UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports low-power events. /// @cond UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1727,7 +1730,7 @@ typedef enum ur_device_info_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -2515,8 +2518,12 @@ typedef enum ur_mem_type_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Memory Information type typedef enum ur_mem_info_t { - UR_MEM_INFO_SIZE = 0, ///< [size_t] actual size of of memory object in bytes - UR_MEM_INFO_CONTEXT = 1, ///< [::ur_context_handle_t] context in which the memory object was created + UR_MEM_INFO_SIZE = 0, ///< [size_t] actual size of of memory object in bytes + UR_MEM_INFO_CONTEXT = 1, ///< [::ur_context_handle_t] context in which the memory object was created + UR_MEM_INFO_REFERENCE_COUNT = 2, ///< [uint32_t] Reference count of the memory object. + ///< The reference count returned should be considered immediately stale. + ///< It is unsuitable for general use in applications. This feature is + ///< provided for identifying memory leaks. /// @cond UR_MEM_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -2650,6 +2657,7 @@ typedef struct ur_image_desc_t { /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR +/// + `pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype` /// + `pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type` /// + `pImageDesc && pImageDesc->numMipLevel != 0` /// + `pImageDesc && pImageDesc->numSamples != 0` @@ -2990,7 +2998,7 @@ urMemImageCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hMemory` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_MEM_INFO_CONTEXT < propName` +/// + `::UR_MEM_INFO_REFERENCE_COUNT < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -5442,13 +5450,17 @@ typedef enum ur_queue_flag_t { ///< ignore this flag. UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM = UR_BIT(10), ///< Synchronize with the default stream. Only meaningful for CUDA. Other ///< platforms may ignore this flag. + UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP = UR_BIT(11), ///< Hint: use low-power events. Only meaningful for Level Zero, where the + ///< implementation may use interrupt-driven events. May reduce CPU + ///< utilization at the cost of increased event completion latency. Other + ///< platforms may ignore this flag. /// @cond UR_QUEUE_FLAG_FORCE_UINT32 = 0x7fffffff /// @endcond } ur_queue_flag_t; /// @brief Bit Mask for validating ur_queue_flags_t -#define UR_QUEUE_FLAGS_MASK 0xfffff800 +#define UR_QUEUE_FLAGS_MASK 0xfffff000 /////////////////////////////////////////////////////////////////////////////// /// @brief Query information about a command queue @@ -8329,7 +8341,7 @@ typedef struct ur_exp_command_buffer_update_value_arg_desc_t { ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. - uint32_t argSize; ///< [in] Argument size. + size_t argSize; ///< [in] Argument size. const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optional] Pointer to value properties. const void *pNewValueArg; ///< [in][optional] Argument value representing matching kernel arg type to ///< set at argument index. @@ -9969,6 +9981,89 @@ urUsmP2PPeerAccessGetInfoExp( size_t *pPropSizeRet ///< [out][optional] pointer to the actual size in bytes of the queried propName. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental API for low-power events API +#if !defined(__GNUC__) +#pragma region low_power_events_(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Extended enqueue properties +typedef uint32_t ur_exp_enqueue_ext_flags_t; +typedef enum ur_exp_enqueue_ext_flag_t { + UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS = UR_BIT(11), ///< Hint: use low-power events. Only meaningful for Level Zero, where the + ///< implementation may use interrupt-driven events. May reduce CPU + ///< utilization at the cost of increased event completion latency. Other + ///< platforms may ignore this flag. + /// @cond + UR_EXP_ENQUEUE_EXT_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_enqueue_ext_flag_t; +/// @brief Bit Mask for validating ur_exp_enqueue_ext_flags_t +#define UR_EXP_ENQUEUE_EXT_FLAGS_MASK 0xfffff7ff + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Extended enqueue properties +typedef struct ur_exp_enqueue_ext_properties_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + ur_exp_enqueue_ext_flags_t flags; ///< [in] extended enqueue flags + +} ur_exp_enqueue_ext_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a barrier command which waits a list of events to complete +/// before it completes, with optional extended properties +/// +/// @details +/// - If the event list is empty, it waits for all previously enqueued +/// commands to complete. +/// - It blocks command execution - any following commands enqueued after it +/// do not execute until it completes. +/// - It returns an event which can be waited on. +/// +/// @remarks +/// _Analogues_ +/// - **clEnqueueBarrierWithWaitList** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_EXT_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t *pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -11445,6 +11540,18 @@ typedef struct ur_enqueue_kernel_launch_custom_exp_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_kernel_launch_custom_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueEventsWaitWithBarrierExt +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_events_wait_with_barrier_ext_params_t { + ur_queue_handle_t *phQueue; + const ur_exp_enqueue_ext_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + ur_event_handle_t **pphEvent; +} ur_enqueue_events_wait_with_barrier_ext_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index a7ca4d88a0..4920245369 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -126,6 +126,7 @@ _UR_API(urEnqueueDeviceGlobalVariableWrite) _UR_API(urEnqueueDeviceGlobalVariableRead) _UR_API(urEnqueueReadHostPipe) _UR_API(urEnqueueWriteHostPipe) +_UR_API(urEnqueueEventsWaitWithBarrierExt) _UR_API(urEnqueueKernelLaunchCustomExp) _UR_API(urEnqueueCooperativeKernelLaunchExp) _UR_API(urEnqueueTimestampRecordingExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 80a0003fca..40a6c5c269 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1398,6 +1398,15 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueWriteHostPipe_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueEventsWaitWithBarrierExt +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueEventsWaitWithBarrierExt_t)( + ur_queue_handle_t, + const ur_exp_enqueue_ext_properties_t *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Enqueue functions pointers typedef struct ur_enqueue_dditable_t { @@ -1426,6 +1435,7 @@ typedef struct ur_enqueue_dditable_t { ur_pfnEnqueueDeviceGlobalVariableRead_t pfnDeviceGlobalVariableRead; ur_pfnEnqueueReadHostPipe_t pfnReadHostPipe; ur_pfnEnqueueWriteHostPipe_t pfnWriteHostPipe; + ur_pfnEnqueueEventsWaitWithBarrierExt_t pfnEventsWaitWithBarrierExt; } ur_enqueue_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/ur_print.h b/include/ur_print.h index 1dd874e5a5..93597d232f 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -1058,6 +1058,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpLaunchProperty(const struct ur_exp /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_ext_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueExtFlags(enum ur_exp_enqueue_ext_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_ext_properties_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueExtProperties(const struct ur_exp_enqueue_ext_properties_t params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_enqueue_native_command_flag_t enum /// @returns @@ -2034,6 +2050,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueKernelLaunchCustomExpParams(const struct ur_enqueue_kernel_launch_custom_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_events_wait_with_barrier_ext_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueEventsWaitWithBarrierExtParams(const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_enqueue_cooperative_kernel_launch_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index cfb077177e..1d28b8eac0 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -213,6 +213,9 @@ inline ur_result_t printUnion( template <> inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_info_t value, size_t size); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -348,6 +351,8 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, enum ur_exp_launch_property_id_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_launch_property_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_peer_info_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_ext_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_ext_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_native_command_flag_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_native_command_properties_t params); @@ -954,6 +959,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP"; break; + case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: + os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT"; + break; default: os << "unknown enumerator"; break; @@ -1113,6 +1121,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_structure_type_t value case UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES"; break; + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES: + os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES"; + break; default: os << "unknown enumerator"; break; @@ -1374,6 +1385,11 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { const ur_exp_enqueue_native_command_properties_t *pstruct = (const ur_exp_enqueue_native_command_properties_t *)ptr; printPtr(os, pstruct); } break; + + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES: { + const ur_exp_enqueue_ext_properties_t *pstruct = (const ur_exp_enqueue_ext_properties_t *)ptr; + printPtr(os, pstruct); + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -2646,6 +2662,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP: os << "UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + os << "UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP"; + break; default: os << "unknown enumerator"; break; @@ -3688,7 +3707,8 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ", "; } - os << tptr[i]; + os << static_cast( + tptr[i]); } os << "}"; } break; @@ -4440,6 +4460,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -5631,6 +5663,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_mem_info_t value) { case UR_MEM_INFO_CONTEXT: os << "UR_MEM_INFO_CONTEXT"; break; + case UR_MEM_INFO_REFERENCE_COUNT: + os << "UR_MEM_INFO_REFERENCE_COUNT"; + break; default: os << "unknown enumerator"; break; @@ -5672,6 +5707,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_mem_info_t os << ")"; } break; + case UR_MEM_INFO_REFERENCE_COUNT: { + const uint32_t *tptr = (const uint32_t *)ptr; + if (sizeof(uint32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -7513,15 +7560,19 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_program_proper os << (params.count); os << ", "; - os << ".pMetadatas = {"; - for (size_t i = 0; (params.pMetadatas) != NULL && i < params.count; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pMetadatas = "; + ur::details::printPtr(os, reinterpret_cast((params.pMetadatas))); + if ((params.pMetadatas) != NULL) { + os << " {"; + for (size_t i = 0; i < params.count; ++i) { + if (i != 0) { + os << ", "; + } - os << ((params.pMetadatas))[i]; + os << ((params.pMetadatas))[i]; + } + os << "}"; } - os << "}"; os << "}"; return os; @@ -8646,6 +8697,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_queue_flag_t value) { case UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM: os << "UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM"; break; + case UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP: + os << "UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP"; + break; default: os << "unknown enumerator"; break; @@ -8770,6 +8824,16 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { } os << UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; } + + if ((val & UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP) == (uint32_t)UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP) { + val ^= (uint32_t)UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP; + } if (val != 0) { std::bitset<32> bits(val); if (!first) { @@ -10148,70 +10212,94 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu os << (params.newWorkDim); os << ", "; - os << ".pNewMemObjArgList = {"; - for (size_t i = 0; (params.pNewMemObjArgList) != NULL && i < params.numNewMemObjArgs; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pNewMemObjArgList = "; + ur::details::printPtr(os, reinterpret_cast((params.pNewMemObjArgList))); + if ((params.pNewMemObjArgList) != NULL) { + os << " {"; + for (size_t i = 0; i < params.numNewMemObjArgs; ++i) { + if (i != 0) { + os << ", "; + } - os << ((params.pNewMemObjArgList))[i]; + os << ((params.pNewMemObjArgList))[i]; + } + os << "}"; } - os << "}"; os << ", "; - os << ".pNewPointerArgList = {"; - for (size_t i = 0; (params.pNewPointerArgList) != NULL && i < params.numNewPointerArgs; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pNewPointerArgList = "; + ur::details::printPtr(os, reinterpret_cast((params.pNewPointerArgList))); + if ((params.pNewPointerArgList) != NULL) { + os << " {"; + for (size_t i = 0; i < params.numNewPointerArgs; ++i) { + if (i != 0) { + os << ", "; + } - os << ((params.pNewPointerArgList))[i]; + os << ((params.pNewPointerArgList))[i]; + } + os << "}"; } - os << "}"; os << ", "; - os << ".pNewValueArgList = {"; - for (size_t i = 0; (params.pNewValueArgList) != NULL && i < params.numNewValueArgs; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pNewValueArgList = "; + ur::details::printPtr(os, reinterpret_cast((params.pNewValueArgList))); + if ((params.pNewValueArgList) != NULL) { + os << " {"; + for (size_t i = 0; i < params.numNewValueArgs; ++i) { + if (i != 0) { + os << ", "; + } - os << ((params.pNewValueArgList))[i]; + os << ((params.pNewValueArgList))[i]; + } + os << "}"; } - os << "}"; os << ", "; - os << ".pNewGlobalWorkOffset = {"; - for (size_t i = 0; (params.pNewGlobalWorkOffset) != NULL && i < params.newWorkDim; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pNewGlobalWorkOffset = "; + ur::details::printPtr(os, reinterpret_cast((params.pNewGlobalWorkOffset))); + if ((params.pNewGlobalWorkOffset) != NULL) { + os << " {"; + for (size_t i = 0; i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } - os << ((params.pNewGlobalWorkOffset))[i]; + os << ((params.pNewGlobalWorkOffset))[i]; + } + os << "}"; } - os << "}"; os << ", "; - os << ".pNewGlobalWorkSize = {"; - for (size_t i = 0; (params.pNewGlobalWorkSize) != NULL && i < params.newWorkDim; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pNewGlobalWorkSize = "; + ur::details::printPtr(os, reinterpret_cast((params.pNewGlobalWorkSize))); + if ((params.pNewGlobalWorkSize) != NULL) { + os << " {"; + for (size_t i = 0; i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } - os << ((params.pNewGlobalWorkSize))[i]; + os << ((params.pNewGlobalWorkSize))[i]; + } + os << "}"; } - os << "}"; os << ", "; - os << ".pNewLocalWorkSize = {"; - for (size_t i = 0; (params.pNewLocalWorkSize) != NULL && i < params.newWorkDim; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pNewLocalWorkSize = "; + ur::details::printPtr(os, reinterpret_cast((params.pNewLocalWorkSize))); + if ((params.pNewLocalWorkSize) != NULL) { + os << " {"; + for (size_t i = 0; i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } - os << ((params.pNewLocalWorkSize))[i]; + os << ((params.pNewLocalWorkSize))[i]; + } + os << "}"; } - os << "}"; os << "}"; return os; @@ -10354,6 +10442,77 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_in } } // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_enqueue_ext_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_ext_flag_t value) { + switch (value) { + case UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS: + os << "UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_ext_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS) == (uint32_t)UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS) { + val ^= (uint32_t)UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_enqueue_ext_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_enqueue_ext_properties_t params) { + os << "(struct ur_exp_enqueue_ext_properties_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".flags = "; + + ur::details::printFlag(os, + (params.flags)); + + os << "}"; + return os; +} /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_enqueue_native_command_flag_t type /// @returns @@ -10573,16 +10732,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct /// std::ostream & inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_platform_get_params_t *params) { - os << ".phAdapters = {"; - for (size_t i = 0; *(params->pphAdapters) != NULL && i < *params->pNumAdapters; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phAdapters = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphAdapters))); + if (*(params->pphAdapters) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pNumAdapters; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphAdapters))[i]); + ur::details::printPtr(os, + (*(params->pphAdapters))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".NumAdapters = "; @@ -10595,16 +10758,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pNumEntries); os << ", "; - os << ".phPlatforms = {"; - for (size_t i = 0; *(params->pphPlatforms) != NULL && i < *params->pNumEntries; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phPlatforms = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphPlatforms))); + if (*(params->pphPlatforms) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pNumEntries; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphPlatforms))[i]); + ur::details::printPtr(os, + (*(params->pphPlatforms))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pNumPlatforms = "; @@ -10758,16 +10925,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pDeviceCount); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pDeviceCount; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphDevices))); + if (*(params->pphDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pDeviceCount; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pProperties = "; @@ -10889,16 +11060,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumDevices); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphDevices))); + if (*(params->pphDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pProperties = "; @@ -11020,16 +11195,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEvents); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEvents; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEvents; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; return os; } @@ -11199,39 +11378,51 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumDevices); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphDevices))); + if (*(params->pphDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; - os << ".pLengths = {"; - for (size_t i = 0; *(params->ppLengths) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pLengths = "; + ur::details::printPtr(os, reinterpret_cast(*(params->ppLengths))); + if (*(params->ppLengths) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - os << (*(params->ppLengths))[i]; + os << (*(params->ppLengths))[i]; + } + os << "}"; } - os << "}"; os << ", "; - os << ".ppBinaries = {"; - for (size_t i = 0; *(params->pppBinaries) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".ppBinaries = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pppBinaries))); + if (*(params->pppBinaries) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pppBinaries))[i]); + ur::details::printPtr(os, + (*(params->pppBinaries))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pProperties = "; @@ -11291,16 +11482,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumDevices); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphDevices))); + if (*(params->pphDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pOptions = "; @@ -11354,16 +11549,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumDevices); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphDevices))); + if (*(params->pphDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pOptions = "; @@ -11391,16 +11590,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pcount); os << ", "; - os << ".phPrograms = {"; - for (size_t i = 0; *(params->pphPrograms) != NULL && i < *params->pcount; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phPrograms = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphPrograms))); + if (*(params->pphPrograms) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pcount; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphPrograms))[i]); + ur::details::printPtr(os, + (*(params->pphPrograms))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pOptions = "; @@ -11434,16 +11637,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumDevices); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphDevices))); + if (*(params->pphDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".count = "; @@ -11451,16 +11658,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pcount); os << ", "; - os << ".phPrograms = {"; - for (size_t i = 0; *(params->pphPrograms) != NULL && i < *params->pcount; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phPrograms = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphPrograms))); + if (*(params->pphPrograms) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pcount; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphPrograms))[i]); + ur::details::printPtr(os, + (*(params->pphPrograms))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pOptions = "; @@ -11666,15 +11877,19 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pcount); os << ", "; - os << ".pSpecConstants = {"; - for (size_t i = 0; *(params->ppSpecConstants) != NULL && i < *params->pcount; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pSpecConstants = "; + ur::details::printPtr(os, reinterpret_cast(*(params->ppSpecConstants))); + if (*(params->ppSpecConstants) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pcount; ++i) { + if (i != 0) { + os << ", "; + } - os << (*(params->ppSpecConstants))[i]; + os << (*(params->ppSpecConstants))[i]; + } + os << "}"; } - os << "}"; return os; } @@ -12966,16 +13181,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pNumEntries); os << ", "; - os << ".phAdapters = {"; - for (size_t i = 0; *(params->pphAdapters) != NULL && i < *params->pNumEntries; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phAdapters = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphAdapters))); + if (*(params->pphAdapters) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pNumEntries; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphAdapters))[i]); + ur::details::printPtr(os, + (*(params->pphAdapters))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pNumAdapters = "; @@ -13120,16 +13339,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13157,16 +13380,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13194,16 +13421,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13258,16 +13489,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13322,16 +13557,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13411,16 +13650,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13500,16 +13743,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13564,16 +13811,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13648,16 +13899,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13712,16 +13967,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13786,16 +14045,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13860,16 +14123,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13924,16 +14191,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -13988,16 +14259,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14043,16 +14318,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14102,16 +14381,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14161,16 +14444,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14215,16 +14502,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14321,16 +14612,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14395,16 +14690,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14465,16 +14764,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14535,16 +14838,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14600,16 +14907,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14665,16 +14976,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14725,15 +15040,19 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumPropsInLaunchPropList); os << ", "; - os << ".launchPropList = {"; - for (size_t i = 0; *(params->plaunchPropList) != NULL && i < *params->pnumPropsInLaunchPropList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".launchPropList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->plaunchPropList))); + if (*(params->plaunchPropList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumPropsInLaunchPropList; ++i) { + if (i != 0) { + os << ", "; + } - os << (*(params->plaunchPropList))[i]; + os << (*(params->plaunchPropList))[i]; + } + os << "}"; } - os << "}"; os << ", "; os << ".numEventsInWaitList = "; @@ -14741,16 +15060,67 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); } + os << "}"; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_events_wait_with_barrier_ext_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14807,16 +15177,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14849,16 +15223,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -14898,16 +15276,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumMemsInMemList); os << ", "; - os << ".phMemList = {"; - for (size_t i = 0; *(params->pphMemList) != NULL && i < *params->pnumMemsInMemList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phMemList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphMemList))); + if (*(params->pphMemList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumMemsInMemList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphMemList))[i]); + ur::details::printPtr(os, + (*(params->pphMemList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pProperties = "; @@ -14921,16 +15303,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -15216,16 +15602,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -15586,16 +15976,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -15639,16 +16033,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -16154,16 +16552,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumKernelAlternatives); os << ", "; - os << ".phKernelAlternatives = {"; - for (size_t i = 0; *(params->pphKernelAlternatives) != NULL && i < *params->pnumKernelAlternatives; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phKernelAlternatives = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphKernelAlternatives))); + if (*(params->pphKernelAlternatives) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumKernelAlternatives; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphKernelAlternatives))[i]); + ur::details::printPtr(os, + (*(params->pphKernelAlternatives))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".numSyncPointsInWaitList = "; @@ -16182,16 +16584,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -16259,16 +16665,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -16341,16 +16751,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -16428,16 +16842,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -16510,16 +16928,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -16592,16 +17014,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -16699,16 +17125,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -16806,16 +17236,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -16913,16 +17347,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -17000,16 +17438,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -17077,16 +17519,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -17154,16 +17600,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pSyncPoint = "; @@ -17209,16 +17659,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".phEvent = "; @@ -17314,16 +17768,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pnumEventsInWaitList); os << ", "; - os << ".phEventWaitList = {"; - for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; } - os << "}"; return os; } @@ -17766,16 +18224,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pNumEntries); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pNumEntries; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphDevices))); + if (*(params->pphDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pNumEntries; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pNumDevices = "; @@ -17808,16 +18270,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pNumEntries); os << ", "; - os << ".phDevices = {"; - for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pNumEntries; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphDevices))); + if (*(params->pphDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pNumEntries; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphDevices))[i]); + ur::details::printPtr(os, + (*(params->pphDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pNumDevices = "; @@ -17913,16 +18379,20 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->pNumDevices); os << ", "; - os << ".phSubDevices = {"; - for (size_t i = 0; *(params->pphSubDevices) != NULL && i < *params->pNumDevices; ++i) { - if (i != 0) { - os << ", "; - } + os << ".phSubDevices = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphSubDevices))); + if (*(params->pphSubDevices) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pNumDevices; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - (*(params->pphSubDevices))[i]); + ur::details::printPtr(os, + (*(params->pphSubDevices))[i]); + } + os << "}"; } - os << "}"; os << ", "; os << ".pNumDevicesRet = "; @@ -18448,6 +18918,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP: { os << (const struct ur_enqueue_kernel_launch_custom_exp_params_t *)params; } break; + case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: { + os << (const struct ur_enqueue_events_wait_with_barrier_ext_params_t *)params; + } break; case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: { os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params; } break; diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md index 64a7a3eeb9..bd6de60a0a 100644 --- a/scripts/benchmarks/README.md +++ b/scripts/benchmarks/README.md @@ -37,9 +37,10 @@ By default, the benchmark results are not stored. To store them, use the option To compare a benchmark run with a previously stored result, use the option `--compare `. You can compare with more than one result. -If no `--compare` option is specified, the benchmark run is compared against a previously stored `baseline`. This baseline is **not** automatically updated. To update it, use the `--save baseline` option. -The recommended way of updating the baseline is running the benchmarking -job on main after a merge of relevant changes. +If no `--compare` option is specified, the benchmark run is compared against a previously stored `baseline`. + +Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results +are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html). ## Requirements diff --git a/scripts/benchmarks/benches/SobelFilter.py b/scripts/benchmarks/benches/SobelFilter.py deleted file mode 100644 index b9e7619e47..0000000000 --- a/scripts/benchmarks/benches/SobelFilter.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import re - -class SobelFilter(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("sobel_filter", "sobel_filter", vb) - - def download_deps(self): - self.download_untar("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz") - return - - def name(self): - return "Velocity-Bench Sobel Filter" - - def unit(self): - return "ms" - - def bin_args(self) -> list[str]: - return ["-i", f"{self.data_path}/sobel_filter_data/silverfalls_32Kx32K.png", - "-n", "5"] - - def extra_env_vars(self) -> dict: - return {"OPENCV_IO_MAX_IMAGE_PIXELS" : "1677721600"} - - def parse_output(self, stdout: str) -> float: - match = re.search(r'sobelfilter - total time for whole calculation: (\d+\.\d+) s', stdout) - if match: - return round(float(match.group(1)) * 1000, 3) - else: - raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") - diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index 36f252cb42..84e1b8287c 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -8,7 +8,7 @@ from pathlib import Path from .result import Result from .options import options -from utils.utils import run +from utils.utils import download, run import urllib.request import tarfile @@ -20,21 +20,24 @@ def __init__(self, directory): def get_adapter_full_path(): for libs_dir_name in ['lib', 'lib64']: adapter_path = os.path.join( - options.ur_dir, libs_dir_name, f"libur_adapter_{options.ur_adapter_name}.so") + options.ur, libs_dir_name, f"libur_adapter_{options.ur_adapter}.so") if os.path.isfile(adapter_path): return adapter_path assert False, \ f"could not find adapter file {adapter_path} (and in similar lib paths)" - def run_bench(self, command, env_vars): + def run_bench(self, command, env_vars, ld_library=[]): env_vars_with_forced_adapter = env_vars.copy() - env_vars_with_forced_adapter.update( - {'UR_ADAPTERS_FORCE_LOAD': Benchmark.get_adapter_full_path()}) + if options.ur is not None: + env_vars_with_forced_adapter.update( + {'UR_ADAPTERS_FORCE_LOAD': Benchmark.get_adapter_full_path()}) + return run( command=command, env_vars=env_vars_with_forced_adapter, add_sycl=True, - cwd=options.benchmark_cwd + cwd=options.benchmark_cwd, + ld_library=ld_library ).stdout.decode() def create_data_path(self, name): @@ -47,17 +50,9 @@ def create_data_path(self, name): return data_path - def download_untar(self, name, url, file): + def download(self, name, url, file, untar = False): self.data_path = self.create_data_path(name) - data_file = os.path.join(self.data_path, file) - if not Path(data_file).exists(): - print(f"{data_file} does not exist, downloading") - urllib.request.urlretrieve(url, data_file) - file = tarfile.open(data_file) - file.extractall(self.data_path) - file.close() - else: - print(f"{data_file} exists, skipping...") + return download(self.data_path, url, file, True) def name(self): raise NotImplementedError() @@ -76,3 +71,13 @@ def run(self, env_vars) -> list[Result]: def teardown(self): raise NotImplementedError() + + def ignore_iterations(self): + return False + +class Suite: + def benchmarks(self) -> list[Benchmark]: + raise NotImplementedError() + + def setup(self): + return diff --git a/scripts/benchmarks/benches/bitcracker.py b/scripts/benchmarks/benches/bitcracker.py deleted file mode 100644 index bb198433fa..0000000000 --- a/scripts/benchmarks/benches/bitcracker.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import os -import re - -class Bitcracker(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("bitcracker", "bitcracker", vb) - self.data_path = os.path.join(vb.repo_path, "bitcracker", "hash_pass") - - def name(self): - return "Velocity-Bench Bitcracker" - - def unit(self): - return "s" - - def bin_args(self) -> list[str]: - return ["-f", f"{self.data_path}/img_win8_user_hash.txt", - "-d", f"{self.data_path}/user_passwords_60000.txt", - "-b", "60000"] - - def parse_output(self, stdout: str) -> float: - match = re.search(r'bitcracker - total time for whole calculation: (\d+\.\d+) s', stdout) - if match: - return float(match.group(1)) - else: - raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py index 9c84739f75..57bed7624a 100644 --- a/scripts/benchmarks/benches/compute.py +++ b/scripts/benchmarks/benches/compute.py @@ -7,20 +7,19 @@ import csv import io from utils.utils import run, git_clone, create_build_path -from .base import Benchmark +from .base import Benchmark, Suite from .result import Result from .options import options -class ComputeBench: +class ComputeBench(Suite): def __init__(self, directory): self.directory = directory - self.built = False def setup(self): - if self.built: + if options.sycl is None: return - repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "aa6a3b2108bb86202b654ad28129156fa746d41d") + repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "c80ddec9f0b4905bcbeb0f264f710093dc70340d") build_path = create_build_path(self.directory, 'compute-benchmarks-build') configure_command = [ @@ -31,10 +30,14 @@ def setup(self): f"-DBUILD_SYCL=ON", f"-DSYCL_COMPILER_ROOT={options.sycl}", f"-DALLOW_WARNINGS=ON", - f"-DBUILD_UR=ON", - f"-Dunified-runtime_DIR={options.ur_dir}/lib/cmake/unified-runtime", ] + if options.ur is not None: + configure_command += [ + f"-DBUILD_UR=ON", + f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime", + ] + print(f"{self.__class__.__name__}: Run {configure_command}") run(configure_command, add_sycl=True) print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j") @@ -42,6 +45,38 @@ def setup(self): self.built = True + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + benches = [ + SubmitKernelSYCL(self, 0), + SubmitKernelSYCL(self, 1), + QueueInOrderMemcpy(self, 0, 'Device', 'Device', 1024), + QueueInOrderMemcpy(self, 0, 'Host', 'Device', 1024), + QueueMemcpy(self, 'Device', 'Device', 1024), + StreamMemory(self, 'Triad', 10 * 1024, 'Device'), + ExecImmediateCopyQueue(self, 0, 1, 'Device', 'Device', 1024), + ExecImmediateCopyQueue(self, 1, 1, 'Device', 'Host', 1024), + VectorSum(self), + MemcpyExecute(self, 400, 1, 102400, 10, 1, 1), + MemcpyExecute(self, 100, 8, 102400, 10, 1, 1), + MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1), + MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1), + MemcpyExecute(self, 400, 1, 102400, 10, 0, 1), + MemcpyExecute(self, 100, 8, 102400, 10, 0, 1), + MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1), + MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1), + ] + + if options.ur is not None: + benches += [ + SubmitKernelUR(self, 0), + SubmitKernelUR(self, 1), + ] + + return benches + class ComputeBenchmark(Benchmark): def __init__(self, bench, name, test): self.bench = bench @@ -60,7 +95,6 @@ def unit(self): def setup(self): self.benchmark_bin = os.path.join(self.bench.directory, 'compute-benchmarks-build', 'bin', self.bench_name) - self.bench.setup() def run(self, env_vars) -> list[Result]: command = [ @@ -75,7 +109,7 @@ def run(self, env_vars) -> list[Result]: result = self.run_bench(command, env_vars) (label, mean) = self.parse_output(result) - return [ Result(label=self.name(), value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ] + return [ Result(label=self.name(), value=mean, command=command, env=env_vars, stdout=result) ] def parse_output(self, output): csv_file = io.StringIO(output) @@ -233,15 +267,17 @@ def bin_args(self) -> list[str]: ] class MemcpyExecute(ComputeBenchmark): - def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations): + def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations, srcUSM, dstUSM): self.numOpsPerThread = numOpsPerThread self.numThreads = numThreads self.allocSize = allocSize self.iterations = iterations + self.srcUSM = srcUSM + self.dstUSM = dstUSM super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute") def name(self): - return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize}" + return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" def bin_args(self) -> list[str]: return [ @@ -252,5 +288,7 @@ def bin_args(self) -> list[str]: f"--AllocSize={self.allocSize}", f"--NumThreads={self.numThreads}", f"--NumOpsPerThread={self.numOpsPerThread}", - f"--iterations={self.iterations}" + f"--iterations={self.iterations}", + f"--SrcUSM={self.srcUSM}", + f"--DstUSM={self.dstUSM}", ] diff --git a/scripts/benchmarks/benches/cudaSift.py b/scripts/benchmarks/benches/cudaSift.py deleted file mode 100644 index 482d258052..0000000000 --- a/scripts/benchmarks/benches/cudaSift.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import os -import re -import shutil - -class CudaSift(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("cudaSift", "cudaSift", vb) - - def download_deps(self): - images = os.path.join(self.vb.repo_path, self.bench_name, 'inputData') - dest = os.path.join(self.directory, 'inputData') - if not os.path.exists(dest): - shutil.copytree(images, dest) - - def name(self): - return "Velocity-Bench CudaSift" - - def unit(self): - return "ms" - - def parse_output(self, stdout: str) -> float: - match = re.search(r'Avg workload time = (\d+\.\d+) ms', stdout) - if match: - return float(match.group(1)) - else: - raise ValueError("Failed to parse benchmark output.") diff --git a/scripts/benchmarks/benches/easywave.py b/scripts/benchmarks/benches/easywave.py deleted file mode 100644 index 2f89482329..0000000000 --- a/scripts/benchmarks/benches/easywave.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -from .options import options -import re -import os - -class Easywave(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("easywave", "easyWave_sycl", vb) - - def download_deps(self): - self.download_untar("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz") - - def name(self): - return "Velocity-Bench Easywave" - - def unit(self): - return "ms" - - def bin_args(self) -> list[str]: - return ["-grid", f"{self.data_path}/examples/e2Asean.grd", - "-source", f"{self.data_path}/examples/BengkuluSept2007.flt", - "-time", "120"] - - # easywave doesn't output a useful single perf value. Instead, we parse the - # output logs looking for the very last line containing the elapsed time of the - # application. - def get_last_elapsed_time(self, log_file_path) -> float: - elapsed_time_pattern = re.compile(r'Model time = (\d{2}:\d{2}:\d{2}),\s+elapsed: (\d+) msec') - last_elapsed_time = None - - try: - with open(log_file_path, 'r') as file: - for line in file: - match = elapsed_time_pattern.search(line) - if match: - last_elapsed_time = int(match.group(2)) - - if last_elapsed_time is not None: - return last_elapsed_time - else: - raise ValueError("No elapsed time found in the log file.") - except FileNotFoundError: - raise FileNotFoundError(f"The file {log_file_path} does not exist.") - except Exception as e: - raise e - - def parse_output(self, stdout: str) -> float: - return self.get_last_elapsed_time(os.path.join(options.benchmark_cwd, "easywave.log")) diff --git a/scripts/benchmarks/benches/hashtable.py b/scripts/benchmarks/benches/hashtable.py deleted file mode 100644 index c5ed397dbb..0000000000 --- a/scripts/benchmarks/benches/hashtable.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import os -import re - -class Hashtable(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("hashtable", "hashtable_sycl", vb) - - def name(self): - return "Velocity-Bench Hashtable" - - def unit(self): - return "M keys/sec" - - def bin_args(self) -> list[str]: - return ["--no-verify"] - - def lower_is_better(self): - return False - - def parse_output(self, stdout: str) -> float: - match = re.search(r'(\d+\.\d+) million keys/second', stdout) - if match: - return float(match.group(1)) - else: - raise ValueError("{self.__class__.__name__}: Failed to parse keys per second from benchmark output.") diff --git a/scripts/benchmarks/benches/llamacpp.py b/scripts/benchmarks/benches/llamacpp.py new file mode 100644 index 0000000000..3ff7963bd1 --- /dev/null +++ b/scripts/benchmarks/benches/llamacpp.py @@ -0,0 +1,196 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import csv +import io +from pathlib import Path +import re +import shutil +from utils.utils import download, git_clone +from .base import Benchmark, Suite +from .result import Result +from utils.utils import run, create_build_path +from .options import options +import os + +class OneAPI: + # random unique number for benchmark oneAPI installation + ONEAPI_BENCHMARK_INSTANCE_ID = 98765 + def __init__(self, directory): + self.oneapi_dir = os.path.join(directory, 'oneapi') + Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True) + # delete if some option is set? + + # can we just hardcode these links? + self.install_package('dnnl', 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh') + self.install_package('mkl', 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh') + return + + def install_package(self, name, url): + package_path = os.path.join(self.oneapi_dir, name) + if Path(package_path).exists(): + print(f"{package_path} exists, skipping installing oneAPI package {name}...") + return + + package = download(self.oneapi_dir, url, f'package_{name}.sh') + try: + print(f"installing f{name}") + run(f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}") + except: + print("oneAPI installation likely exists already") + return + print(f"f{name} installation complete") + + def package_dir(self, package, dir): + return os.path.join(self.oneapi_dir, package, 'latest', dir) + + def package_cmake(self, package): + package_lib = self.package_dir(package, 'lib') + return os.path.join(package_lib, 'cmake', package) + + def mkl_lib(self): + return self.package_dir('mkl', 'lib') + + def mkl_include(self): + return self.package_dir('mkl', 'include') + + def mkl_cmake(self): + return self.package_cmake('mkl') + + def dnn_lib(self): + return self.package_dir('dnnl', 'lib') + + def dnn_include(self): + return self.package_dir('dnnl', 'include') + + def dnn_cmake(self): + return self.package_cmake('dnnl') + + def tbb_lib(self): + return self.package_dir('tbb', 'lib') + + def tbb_cmake(self): + return self.package_cmake('tbb') + + def compiler_lib(self): + return self.package_dir('compiler', 'lib') + + def ld_libraries(self): + return [ + self.compiler_lib(), + self.mkl_lib(), + self.tbb_lib(), + self.dnn_lib() + ] + +class LlamaCppBench(Suite): + def __init__(self, directory): + if options.sycl is None: + return + + self.directory = directory + + def setup(self): + if options.sycl is None: + return + + repo_path = git_clone(self.directory, "llamacpp-repo", "https://github.com/ggerganov/llama.cpp", "1ee9eea094fe5846c7d8d770aa7caa749d246b23") + + self.models_dir = os.path.join(self.directory, 'models') + Path(self.models_dir).mkdir(parents=True, exist_ok=True) + + self.model = download(self.models_dir, "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", "Phi-3-mini-4k-instruct-q4.gguf") + + self.oneapi = OneAPI(self.directory) + + self.build_path = create_build_path(self.directory, 'llamacpp-build') + + configure_command = [ + "cmake", + f"-B {self.build_path}", + f"-S {repo_path}", + f"-DCMAKE_BUILD_TYPE=Release", + f"-DGGML_SYCL=ON", + f"-DCMAKE_C_COMPILER=clang", + f"-DCMAKE_CXX_COMPILER=clang++", + f"-DDNNL_DIR={self.oneapi.dnn_cmake()}", + f"-DTBB_DIR={self.oneapi.tbb_cmake()}", + f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"', + f'-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}' + ] + print(f"{self.__class__.__name__}: Run {configure_command}") + run(configure_command, add_sycl=True) + print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j") + run(f"cmake --build {self.build_path} -j", add_sycl=True, ld_library=self.oneapi.ld_libraries()) + + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + return [ + LlamaBench(self) + ] + +class LlamaBench(Benchmark): + def __init__(self, bench): + self.bench = bench + super().__init__(bench.directory) + + def unit(self): + return "token/s" + + def setup(self): + self.benchmark_bin = os.path.join(self.bench.build_path, 'bin', 'llama-bench') + + def name(self): + return f"llama.cpp" + + def lower_is_better(self): + return False + + def ignore_iterations(self): + return True + + def run(self, env_vars) -> list[Result]: + command = [ + f"{self.benchmark_bin}", + "--output", "csv", + "-n", "128", + "-p", "512", + "-b", "128,256,512", + "--numa", "isolate", + "-t", "56", # TODO: use only as many threads as numa node 0 has cpus + "--model", f"{self.bench.model}", + ] + + result = self.run_bench(command, env_vars, ld_library=self.bench.oneapi.ld_libraries()) + parsed = self.parse_output(result) + results = [] + for r in parsed: + (extra_label, mean) = r + label = f"{self.name()} {extra_label}" + results.append(Result(label=label, value=mean, command=command, env=env_vars, stdout=result)) + return results + + def parse_output(self, output): + csv_file = io.StringIO(output) + reader = csv.DictReader(csv_file) + + results = [] + for row in reader: + try: + n_batch = row["n_batch"] + avg_ts = float(row["avg_ts"]) + n_prompt = int(row["n_prompt"]) + label = "Prompt Processing" if n_prompt != 0 else "Text Generation" + label += f" Batched {n_batch}" + results.append((label, avg_ts)) + except KeyError as e: + raise ValueError(f"Error parsing output: {e}") + + return results + + def teardown(self): + return diff --git a/scripts/benchmarks/benches/options.py b/scripts/benchmarks/benches/options.py index c035ce6800..5997cdedb8 100644 --- a/scripts/benchmarks/benches/options.py +++ b/scripts/benchmarks/benches/options.py @@ -1,13 +1,26 @@ from dataclasses import dataclass +from enum import Enum + +class Compare(Enum): + LATEST = 'latest' + AVERAGE = 'average' + MEDIAN = 'median' @dataclass class Options: - sycl: str = "" + sycl: str = None + ur: str = None + ur_adapter: str = None rebuild: bool = True benchmark_cwd: str = "INVALID" timeout: float = 600 iterations: int = 5 verbose: bool = False + compare: Compare = Compare.LATEST + compare_max: int = 10 # average/median over how many results + output_html: bool = False + output_markdown: bool = True + dry_run: bool = False options = Options() diff --git a/scripts/benchmarks/benches/quicksilver.py b/scripts/benchmarks/benches/quicksilver.py deleted file mode 100644 index b7600d11be..0000000000 --- a/scripts/benchmarks/benches/quicksilver.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import os -import re - -class QuickSilver(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("QuickSilver", "qs", vb) - self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering") - - def run(self, env_vars) -> list[Result]: - # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0 - if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0': - return None - - return super().run(env_vars) - - def name(self): - return "Velocity-Bench QuickSilver" - - def unit(self): - return "MMS/CTT" - - def lower_is_better(self): - return False - - def bin_args(self) -> list[str]: - return ["-i", f"{self.data_path}/scatteringOnly.inp"] - - def extra_env_vars(self) -> dict: - return {"QS_DEVICE" : "GPU"} - - def parse_output(self, stdout: str) -> float: - match = re.search(r'Figure Of Merit\s+(\d+\.\d+)', stdout) - if match: - return float(match.group(1)) - else: - raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") diff --git a/scripts/benchmarks/benches/result.py b/scripts/benchmarks/benches/result.py index 6fc7e16095..07ee70148a 100644 --- a/scripts/benchmarks/benches/result.py +++ b/scripts/benchmarks/benches/result.py @@ -4,7 +4,9 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from dataclasses import dataclass +from typing import Optional from dataclasses_json import dataclass_json +from datetime import datetime @dataclass_json @dataclass @@ -15,6 +17,17 @@ class Result: env: str stdout: str passed: bool = True + # values should not be set by the benchmark unit: str = "" name: str = "" lower_is_better: bool = True + git_hash: str = '' + date: Optional[datetime] = None + +@dataclass_json +@dataclass +class BenchmarkRun: + results: list[Result] + name: str = 'This PR' + git_hash: str = '' + date: datetime = None diff --git a/scripts/benchmarks/benches/syclbench.py b/scripts/benchmarks/benches/syclbench.py index b9d6e50623..fbfd009935 100644 --- a/scripts/benchmarks/benches/syclbench.py +++ b/scripts/benchmarks/benches/syclbench.py @@ -7,19 +7,20 @@ import csv import io from utils.utils import run, git_clone, create_build_path -from .base import Benchmark +from .base import Benchmark, Suite from .result import Result from .options import options -class SyclBench: +class SyclBench(Suite): def __init__(self, directory): + if options.sycl is None: + return + self.directory = directory - self.built = False - self.setup() return def setup(self): - if self.built: + if options.sycl is None: return build_path = create_build_path(self.directory, 'sycl-bench-build') @@ -40,6 +41,50 @@ def setup(self): self.built = True + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + return [ + # Blocked_transform(self), # run time < 1ms + DagTaskI(self), + DagTaskS(self), + HostDevBandwidth(self), + LocalMem(self), + Pattern_L2(self), + Reduction(self), + ScalarProd(self), + SegmentReduction(self), + UsmAccLatency(self), + UsmAllocLatency(self), + UsmInstrMix(self), + UsmPinnedOverhead(self), + VecAdd(self), + + # *** sycl-bench single benchmarks + # TwoDConvolution(self), # run time < 1ms + Two_mm(self), + Three_mm(self), + # Arith(self), # run time < 1ms + Atax(self), + # Atomic_reduction(self), # run time < 1ms + Bicg(self), + Correlation(self), + Covariance(self), + Gemm(self), + Gesumv(self), + Gramschmidt(self), + KMeans(self), + LinRegCoeff(self), + # LinRegError(self), # run time < 1ms + MatmulChain(self), + MolDyn(self), + Mvt(self), + Sf(self), + Syr2k(self), + Syrk(self), + ] + class SyclBenchmark(Benchmark): def __init__(self, bench, name, test): self.bench = bench @@ -58,7 +103,6 @@ def unit(self): return "ms" def setup(self): - self.bench.setup() self.benchmark_bin = os.path.join(self.directory, 'sycl-bench-build', self.bench_name) def run(self, env_vars) -> list[Result]: diff --git a/scripts/benchmarks/benches/test.py b/scripts/benchmarks/benches/test.py new file mode 100644 index 0000000000..88bc29a649 --- /dev/null +++ b/scripts/benchmarks/benches/test.py @@ -0,0 +1,68 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import random +from utils.utils import git_clone +from .base import Benchmark, Suite +from .result import Result +from utils.utils import run, create_build_path +from .options import options +import os + +class TestSuite(Suite): + def __init__(self): + return + + def setup(self): + return + + def benchmarks(self) -> list[Benchmark]: + bench_configs = [ + ("Memory Bandwidth", 2000, 200), + ("Latency", 100, 20), + ("Throughput", 1500, 150), + ("FLOPS", 3000, 300), + ("Cache Miss Rate", 250, 25), + ] + + result = [] + for base_name, base_value, base_diff in bench_configs: + for variant in range(6): + value_multiplier = 1.0 + (variant * 0.2) + name = f"{base_name} {variant+1}" + value = base_value * value_multiplier + diff = base_diff * value_multiplier + + result.append(TestBench(name, value, diff)) + + return result + +class TestBench(Benchmark): + def __init__(self, name, value, diff): + self.bname = name + self.value = value + self.diff = diff + super().__init__("") + + def name(self): + return self.bname + + def unit(self): + return "ms" + + def lower_is_better(self): + return True + + def setup(self): + return + + def run(self, env_vars) -> list[Result]: + random_value = self.value + random.uniform(-1 * (self.diff), self.diff) + return [ + Result(label=self.name(), value=random_value, command="", env={"A": "B"}, stdout="no output") + ] + + def teardown(self): + return diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py index 3c903bf11b..856fd993db 100644 --- a/scripts/benchmarks/benches/velocity.py +++ b/scripts/benchmarks/benches/velocity.py @@ -3,18 +3,41 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import re +import shutil from utils.utils import git_clone -from .base import Benchmark +from .base import Benchmark, Suite from .result import Result from utils.utils import run, create_build_path from .options import options import os -class VelocityBench: +class VelocityBench(Suite): def __init__(self, directory): + if options.sycl is None: + return + self.directory = directory + + def setup(self): + if options.sycl is None: + return + self.repo_path = git_clone(self.directory, "velocity-bench-repo", "https://github.com/oneapi-src/Velocity-Bench/", "b22215c16f789100449c34bf4eaa3fb178983d69") + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + return [ + Hashtable(self), + Bitcracker(self), + CudaSift(self), + Easywave(self), + QuickSilver(self), + SobelFilter(self) + ] + class VelocityBase(Benchmark): def __init__(self, name: str, bin_name: str, vb: VelocityBench): super().__init__(vb.directory) @@ -60,7 +83,185 @@ def run(self, env_vars) -> list[Result]: result = self.run_bench(command, env_vars) - return [ Result(label=self.name(), value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ] + return [ Result(label=self.name(), value=self.parse_output(result), command=command, env=env_vars, stdout=result) ] def teardown(self): return + +class Hashtable(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("hashtable", "hashtable_sycl", vb) + + def name(self): + return "Velocity-Bench Hashtable" + + def unit(self): + return "M keys/sec" + + def bin_args(self) -> list[str]: + return ["--no-verify"] + + def lower_is_better(self): + return False + + def parse_output(self, stdout: str) -> float: + match = re.search(r'(\d+\.\d+) million keys/second', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("{self.__class__.__name__}: Failed to parse keys per second from benchmark output.") + + +class Bitcracker(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("bitcracker", "bitcracker", vb) + self.data_path = os.path.join(vb.repo_path, "bitcracker", "hash_pass") + + def name(self): + return "Velocity-Bench Bitcracker" + + def unit(self): + return "s" + + def bin_args(self) -> list[str]: + return ["-f", f"{self.data_path}/img_win8_user_hash.txt", + "-d", f"{self.data_path}/user_passwords_60000.txt", + "-b", "60000"] + + def parse_output(self, stdout: str) -> float: + match = re.search(r'bitcracker - total time for whole calculation: (\d+\.\d+) s', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") + +class SobelFilter(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("sobel_filter", "sobel_filter", vb) + + def download_deps(self): + self.download("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz", untar=True) + return + + def name(self): + return "Velocity-Bench Sobel Filter" + + def unit(self): + return "ms" + + def bin_args(self) -> list[str]: + return ["-i", f"{self.data_path}/sobel_filter_data/silverfalls_32Kx32K.png", + "-n", "5"] + + def extra_env_vars(self) -> dict: + return {"OPENCV_IO_MAX_IMAGE_PIXELS" : "1677721600"} + + def parse_output(self, stdout: str) -> float: + match = re.search(r'sobelfilter - total time for whole calculation: (\d+\.\d+) s', stdout) + if match: + return round(float(match.group(1)) * 1000, 3) + else: + raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") + + +class QuickSilver(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("QuickSilver", "qs", vb) + self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering") + + def run(self, env_vars) -> list[Result]: + # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0 + if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0': + return None + + return super().run(env_vars) + + def name(self): + return "Velocity-Bench QuickSilver" + + def unit(self): + return "MMS/CTT" + + def lower_is_better(self): + return False + + def bin_args(self) -> list[str]: + return ["-i", f"{self.data_path}/scatteringOnly.inp"] + + def extra_env_vars(self) -> dict: + return {"QS_DEVICE" : "GPU"} + + def parse_output(self, stdout: str) -> float: + match = re.search(r'Figure Of Merit\s+(\d+\.\d+)', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") + +class Easywave(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("easywave", "easyWave_sycl", vb) + + def download_deps(self): + self.download("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz", untar=True) + + def name(self): + return "Velocity-Bench Easywave" + + def unit(self): + return "ms" + + def bin_args(self) -> list[str]: + return ["-grid", f"{self.data_path}/examples/e2Asean.grd", + "-source", f"{self.data_path}/examples/BengkuluSept2007.flt", + "-time", "120"] + + # easywave doesn't output a useful single perf value. Instead, we parse the + # output logs looking for the very last line containing the elapsed time of the + # application. + def get_last_elapsed_time(self, log_file_path) -> float: + elapsed_time_pattern = re.compile(r'Model time = (\d{2}:\d{2}:\d{2}),\s+elapsed: (\d+) msec') + last_elapsed_time = None + + try: + with open(log_file_path, 'r') as file: + for line in file: + match = elapsed_time_pattern.search(line) + if match: + last_elapsed_time = int(match.group(2)) + + if last_elapsed_time is not None: + return last_elapsed_time + else: + raise ValueError("No elapsed time found in the log file.") + except FileNotFoundError: + raise FileNotFoundError(f"The file {log_file_path} does not exist.") + except Exception as e: + raise e + + def parse_output(self, stdout: str) -> float: + return self.get_last_elapsed_time(os.path.join(options.benchmark_cwd, "easywave.log")) + + +class CudaSift(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("cudaSift", "cudaSift", vb) + + def download_deps(self): + images = os.path.join(self.vb.repo_path, self.bench_name, 'inputData') + dest = os.path.join(self.directory, 'inputData') + if not os.path.exists(dest): + shutil.copytree(images, dest) + + def name(self): + return "Velocity-Bench CudaSift" + + def unit(self): + return "ms" + + def parse_output(self, stdout: str) -> float: + match = re.search(r'Avg workload time = (\d+\.\d+) ms', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("Failed to parse benchmark output.") diff --git a/scripts/benchmarks/history.py b/scripts/benchmarks/history.py new file mode 100644 index 0000000000..5b83ef9479 --- /dev/null +++ b/scripts/benchmarks/history.py @@ -0,0 +1,135 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import json +from pathlib import Path +from benches.result import Result, BenchmarkRun +from benches.options import Compare, options +from datetime import datetime, timezone +from utils.utils import run; + +class BenchmarkHistory: + benchmark_run_index_max = 0 + runs = [] + + def __init__(self, dir): + self.dir = dir + + def load_result(self, file_path: Path) -> BenchmarkRun: + if file_path.exists(): + with file_path.open('r') as file: + data = json.load(file) + return BenchmarkRun.from_json(data) + else: + return None + + def load(self, n: int): + results_dir = Path(self.dir) / 'results' + if not results_dir.exists() or not results_dir.is_dir(): + return [] + + # Get all JSON files in the results directory + benchmark_files = list(results_dir.glob('*.json')) + + # Extract index numbers and sort files by index number + def extract_index(file_path: Path) -> int: + try: + return int(file_path.stem.split('_')[0]) + except (IndexError, ValueError): + return -1 + + benchmark_files = [file for file in benchmark_files if extract_index(file) != -1] + benchmark_files.sort(key=extract_index) + + # Load the first n benchmark files + benchmark_runs = [] + for file_path in benchmark_files[n::-1]: + benchmark_run = self.load_result(file_path) + if benchmark_run: + benchmark_runs.append(benchmark_run) + + if benchmark_files: + self.benchmark_run_index_max = extract_index(benchmark_files[-1]) + + self.runs = benchmark_runs + + def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: + try: + result = run("git rev-parse --short HEAD") + git_hash = result.stdout.decode().strip() + except: + git_hash = 'unknown' + + return BenchmarkRun(name = name, git_hash = git_hash, date = datetime.now(tz=timezone.utc), results = results) + + def save(self, save_name, results: list[Result], to_file = True): + benchmark_data = self.create_run(save_name, results) + self.runs.append(benchmark_data) + + if not to_file: + return + + serialized = benchmark_data.to_json() + results_dir = Path(os.path.join(self.dir, 'results')) + os.makedirs(results_dir, exist_ok=True) + + self.benchmark_run_index_max += 1 + file_path = Path(os.path.join(results_dir, f"{self.benchmark_run_index_max}_{save_name}.json")) + with file_path.open('w') as file: + json.dump(serialized, file, indent=4) + print(f"Benchmark results saved to {file_path}") + + def find_first(self, name: str) -> BenchmarkRun: + for r in self.runs: + if r.name == name: + return r + return None + + def compute_average(self, data: list[BenchmarkRun]): + first_run = data[0] + average_results = [] + + for i in range(len(first_run.results)): + all_values = [run.results[i].value for run in data] + + # Calculate the average value for the current result index + average_value = sum(all_values) / len(all_values) + + average_result = first_run.results[i] + average_result.value = average_value + + average_results.append(average_result) + + average_benchmark_run = BenchmarkRun( + results = average_results, + name = first_run.name, + git_hash = "average", + date = first_run.date # should this be different? + ) + + return average_benchmark_run + + def get_compare(self, name: str) -> BenchmarkRun: + if options.compare == Compare.LATEST: + return self.find_first(name) + + data = [] + for r in self.runs: + if r.name == name: + data.append(r) + if len(data) == options.compare_max: + break + + if len(data) == 0: + return None + + if options.compare == Compare.MEDIAN: + return data[len(data) // 2] + + if options.compare == Compare.AVERAGE: + return self.compute_average(data) + + raise Exception("invalid compare type") diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index 85d9b6b608..9dd77f14b2 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -5,108 +5,55 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from utils.utils import prepare_workdir, load_benchmark_results, save_benchmark_results; from benches.compute import * -from benches.hashtable import Hashtable -from benches.bitcracker import Bitcracker -from benches.cudaSift import CudaSift -from benches.easywave import Easywave -from benches.quicksilver import QuickSilver -from benches.SobelFilter import SobelFilter from benches.velocity import VelocityBench from benches.syclbench import * -from benches.options import options -from output import generate_markdown +from benches.llamacpp import * +from benches.test import TestSuite +from benches.options import Compare, options +from output_markdown import generate_markdown +from output_html import generate_html +from history import BenchmarkHistory +from utils.utils import prepare_workdir; + import argparse import re -import subprocess # Update this if you are changing the layout of the results files -INTERNAL_WORKDIR_VERSION = '1.7' +INTERNAL_WORKDIR_VERSION = '2.0' def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) - cb = ComputeBench(directory) - vb = VelocityBench(directory) - sb = SyclBench(directory) - - benchmarks = [ - # *** Compute benchmarks - SubmitKernelSYCL(cb, 0), - SubmitKernelSYCL(cb, 1), - SubmitKernelUR(cb, 0), - SubmitKernelUR(cb, 1), - QueueInOrderMemcpy(cb, 0, 'Device', 'Device', 1024), - QueueInOrderMemcpy(cb, 0, 'Host', 'Device', 1024), - QueueMemcpy(cb, 'Device', 'Device', 1024), - StreamMemory(cb, 'Triad', 10 * 1024, 'Device'), - ExecImmediateCopyQueue(cb, 0, 1, 'Device', 'Device', 1024), - ExecImmediateCopyQueue(cb, 1, 1, 'Device', 'Host', 1024), - VectorSum(cb), - MemcpyExecute(cb, 400, 8, 1024, 100), - MemcpyExecute(cb, 400, 8, 102400, 10), - MemcpyExecute(cb, 500, 8, 102400, 10), - MemcpyExecute(cb, 400, 1, 1024, 1000), - MemcpyExecute(cb, 10, 16, 1024, 1000), - MemcpyExecute(cb, 10, 16, 102400, 100), - - # *** Velocity benchmarks - Hashtable(vb), - Bitcracker(vb), - CudaSift(vb), - Easywave(vb), - QuickSilver(vb), - SobelFilter(vb), - - # *** sycl-bench multi benchmarks - # Blocked_transform(sb), # run time < 1ms - DagTaskI(sb), - DagTaskS(sb), - HostDevBandwidth(sb), - LocalMem(sb), - Pattern_L2(sb), - Reduction(sb), - ScalarProd(sb), - SegmentReduction(sb), - UsmAccLatency(sb), - UsmAllocLatency(sb), - UsmInstrMix(sb), - UsmPinnedOverhead(sb), - VecAdd(sb), - - # *** sycl-bench single benchmarks - # TwoDConvolution(sb), # run time < 1ms - Two_mm(sb), - Three_mm(sb), - # Arith(sb), # run time < 1ms - Atax(sb), - # Atomic_reduction(sb), # run time < 1ms - Bicg(sb), - Correlation(sb), - Covariance(sb), - Gemm(sb), - Gesumv(sb), - Gramschmidt(sb), - KMeans(sb), - LinRegCoeff(sb), - # LinRegError(sb), # run time < 1ms - MatmulChain(sb), - MolDyn(sb), - Mvt(sb), - Sf(sb), - Syr2k(sb), - Syrk(sb), - ] + suites = [ + ComputeBench(directory), + VelocityBench(directory), + SyclBench(directory), + LlamaCppBench(directory), + #TestSuite() + ] if not options.dry_run else [] + + benchmarks = [] + + for s in suites: + print(f"Setting up {type(s).__name__}") + s.setup() + print(f"{type(s).__name__} setup complete.") + + for s in suites: + benchmarks += s.benchmarks() if filter: benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())] + for b in benchmarks: + print(b.name()) + for benchmark in benchmarks: try: - print(f"setting up {benchmark.name()}... ", end='', flush=True) + print(f"Setting up {benchmark.name()}... ") benchmark.setup() - print("complete.") + print(f"{benchmark.name()} setup complete.") except Exception as e: if options.exit_on_failure: @@ -119,7 +66,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): try: merged_env_vars = {**additional_env_vars} iteration_results = [] - for iter in range(options.iterations): + iterations = options.iterations if not benchmark.ignore_iterations() else 1 + for iter in range(iterations): print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True) bench_results = benchmark.run(merged_env_vars) if bench_results is not None: @@ -131,7 +79,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): iteration_results.append(bench_result) else: print(f"did not finish (OK for sycl-bench).") - break; + break if len(iteration_results) == 0: continue @@ -145,6 +93,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): median_result.unit = benchmark.unit() median_result.name = label + median_result.lower_is_better = benchmark.lower_is_better() results.append(median_result) except Exception as e: @@ -158,23 +107,44 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): benchmark.teardown() print("complete.") - chart_data = {"This PR" : results} + this_name = "This PR" + + chart_data = {this_name : results} + + history = BenchmarkHistory(directory) + # limit how many files we load. + # should this be configurable? + history.load(1000) for name in compare_names: - print(f"compare name: {name}") - compare_result = load_benchmark_results(directory, name) + compare_result = history.get_compare(name) if compare_result: - chart_data[name] = compare_result + chart_data[name] = compare_result.results + + if options.output_markdown: + markdown_content = generate_markdown(this_name, chart_data) + + with open('benchmark_results.md', 'w') as file: + file.write(markdown_content) + + print(f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md") + + saved_name = save_name if save_name is not None else this_name - if save_name: - save_benchmark_results(directory, save_name, results) + # It's important we don't save the current results into history before + # we calculate historical averages or get latest results for compare. + # Otherwise we might be comparing the results to themselves. + if not options.dry_run: + history.save(saved_name, results, save_name is not None) + compare_names.append(saved_name) - markdown_content = generate_markdown(chart_data) + if options.output_html: + html_content = generate_html(history.runs, 'oneapi-src/unified-runtime', compare_names) - with open('benchmark_results.md', 'w') as file: - file.write(markdown_content) + with open('benchmark_results.html', 'w') as file: + file.write(html_content) - print(f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md") + print(f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html") def validate_and_parse_env_args(env_args): env_vars = {} @@ -188,9 +158,9 @@ def validate_and_parse_env_args(env_args): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Unified Runtime Benchmark Runner') parser.add_argument('benchmark_directory', type=str, help='Working directory to setup benchmarks.') - parser.add_argument('sycl', type=str, help='Root directory of the SYCL compiler.') - parser.add_argument('ur_dir', type=str, help='UR install prefix path') - parser.add_argument('ur_adapter_name', type=str, help='Options to build the Unified Runtime as part of the benchmark') + parser.add_argument('--sycl', type=str, help='Root directory of the SYCL compiler.', default=None) + parser.add_argument('--ur', type=str, help='UR install prefix path', default=None) + parser.add_argument('--adapter', type=str, help='Options to build the Unified Runtime as part of the benchmark', default="level_zero") parser.add_argument("--no-rebuild", help='Rebuild the benchmarks from scratch.', action="store_true") parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[]) parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.') @@ -200,7 +170,12 @@ def validate_and_parse_env_args(env_args): parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None) parser.add_argument("--epsilon", type=float, help='Threshold to consider change of performance significant', default=0.005) parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true") - parser.add_argument("--exit_on_failure", help='Exit on first failure.', action="store_true") + parser.add_argument("--exit-on-failure", help='Exit on first failure.', action="store_true") + parser.add_argument("--compare-type", type=str, choices=[e.value for e in Compare], help='Compare results against previously saved data.', default=Compare.LATEST.value) + parser.add_argument("--compare-max", type=int, help='How many results to read for comparisions', default=10) + parser.add_argument("--output-html", help='Create HTML output', action="store_true", default=False) + parser.add_argument("--output-markdown", help='Create Markdown output', action="store_true", default=True) + parser.add_argument("--dry-run", help='Do not run any actual benchmarks', action="store_true", default=False) args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) @@ -211,9 +186,14 @@ def validate_and_parse_env_args(env_args): options.iterations = args.iterations options.timeout = args.timeout options.epsilon = args.epsilon - options.ur_dir = args.ur_dir - options.ur_adapter_name = args.ur_adapter_name + options.ur = args.ur + options.ur_adapter = args.adapter options.exit_on_failure = args.exit_on_failure + options.compare = Compare(args.compare_type) + options.compare_max = args.compare_max + options.output_html = args.output_html + options.output_markdown = args.output_markdown + options.dry_run = args.dry_run benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/scripts/benchmarks/output_html.py b/scripts/benchmarks/output_html.py new file mode 100644 index 0000000000..4a04252797 --- /dev/null +++ b/scripts/benchmarks/output_html.py @@ -0,0 +1,396 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import re +import matplotlib.pyplot as plt +import mpld3 +from collections import defaultdict +from dataclasses import dataclass +import matplotlib.dates as mdates +import numpy as np +from benches.result import BenchmarkRun, Result + +@dataclass +class BenchmarkMetadata: + unit: str + lower_is_better: bool + +@dataclass +class BenchmarkSeries: + label: str + metadata: BenchmarkMetadata + runs: list[BenchmarkRun] + +@dataclass +class LatestResults: + benchmark_label: str + run_values: dict[str, float] + + @classmethod + def from_dict(cls, label: str, values: dict[str, float]) -> 'LatestResults': + return cls(benchmark_label=label, run_values=values) + +def get_latest_results(benchmarks: list[BenchmarkSeries]) -> dict[str, LatestResults]: + latest_results: dict[str, LatestResults] = {} + for benchmark in benchmarks: + run_values = { + run.name: max(run.results, key=lambda x: x.date).value + for run in benchmark.runs + } + latest_results[benchmark.label] = LatestResults.from_dict(benchmark.label, run_values) + return latest_results + +def prepare_normalized_data(latest_results: dict[str, LatestResults], + benchmarks: list[BenchmarkSeries], + group_benchmarks: list[str], + non_baseline_runs: list[str], + baseline_name: str) -> list[list[float]]: + normalized_data = [] + benchmark_map = {b.label: b for b in benchmarks} + + for run_name in non_baseline_runs: + run_data: list[float] = [] + for benchmark_label in group_benchmarks: + benchmark_data = latest_results[benchmark_label].run_values + if run_name not in benchmark_data or baseline_name not in benchmark_data: + run_data.append(None) + continue + + baseline_value = benchmark_data[baseline_name] + current_value = benchmark_data[run_name] + + normalized_value = ((baseline_value / current_value) if benchmark_map[benchmark_label].metadata.lower_is_better + else (current_value / baseline_value)) * 100 + run_data.append(normalized_value) + normalized_data.append(run_data) + return normalized_data + +def format_benchmark_label(label: str) -> list[str]: + words = re.split(' |_', label) + lines = [] + current_line = [] + + # max line length 30 + for word in words: + if len(' '.join(current_line + [word])) > 30: + lines.append(' '.join(current_line)) + current_line = [word] + else: + current_line.append(word) + + if current_line: + lines.append(' '.join(current_line)) + + return lines + +def create_bar_plot(ax: plt.Axes, + normalized_data: list[list[float]], + group_benchmarks: list[str], + non_baseline_runs: list[str], + latest_results: dict[str, LatestResults], + benchmarks: list[BenchmarkSeries], + baseline_name: str) -> float: + x = np.arange(len(group_benchmarks)) + width = 0.8 / len(non_baseline_runs) + max_height = 0 + benchmark_map = {b.label: b for b in benchmarks} + + for i, (run_name, run_data) in enumerate(zip(non_baseline_runs, normalized_data)): + offset = width * i - width * (len(non_baseline_runs) - 1) / 2 + positions = x + offset + valid_data = [v if v is not None else 0 for v in run_data] + rects = ax.bar(positions, valid_data, width, label=run_name) + + for rect, value, benchmark_label in zip(rects, run_data, group_benchmarks): + if value is not None: + height = rect.get_height() + if height > max_height: + max_height = height + + ax.text(rect.get_x() + rect.get_width()/2., height + 2, + f'{value:.1f}%', + ha='center', va='bottom') + + benchmark_data = latest_results[benchmark_label].run_values + baseline_value = benchmark_data[baseline_name] + current_value = benchmark_data[run_name] + unit = benchmark_map[benchmark_label].metadata.unit + + tooltip_labels = [ + f"Run: {run_name}\n" + f"Value: {current_value:.2f} {unit}\n" + f"Normalized to ({baseline_name}): {baseline_value:.2f} {unit}\n" + f"Normalized: {value:.1f}%" + ] + tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}') + mpld3.plugins.connect(ax.figure, tooltip) + + return max_height + +def add_chart_elements(ax: plt.Axes, + group_benchmarks: list[str], + group_name: str, + max_height: float) -> None: + top_padding = max_height * 0.2 + ax.set_ylim(0, max_height + top_padding) + ax.set_ylabel('Performance relative to baseline (%)') + ax.set_title(f'Performance Comparison (Normalized to Baseline) - {group_name} Group') + ax.set_xticks([]) + + for idx, label in enumerate(group_benchmarks): + split_labels = format_benchmark_label(label) + for i, sublabel in enumerate(split_labels): + y_pos = max_height + (top_padding * 0.5) + 2 - (i * top_padding * 0.15) + ax.text(idx, y_pos, sublabel, + ha='center', + style='italic', + color='#666666') + + ax.grid(True, axis='y', alpha=0.2) + ax.legend(bbox_to_anchor=(1, 1), loc='upper left') + +def split_large_groups(benchmark_groups): + miscellaneous = [] + new_groups = defaultdict(list) + + split_happened = False + for group, labels in benchmark_groups.items(): + if len(labels) == 1: + miscellaneous.extend(labels) + elif len(labels) > 5: + split_happened = True + mid = len(labels) // 2 + new_groups[group] = labels[:mid] + new_groups[group + '_'] = labels[mid:] + else: + new_groups[group] = labels + + if miscellaneous: + new_groups['Miscellaneous'] = miscellaneous + + if split_happened: + return split_large_groups(new_groups) + else: + return new_groups + +def group_benchmark_labels(benchmark_labels): + benchmark_groups = defaultdict(list) + for label in benchmark_labels: + group = re.match(r'^[^_\s]+', label)[0] + benchmark_groups[group].append(label) + return split_large_groups(benchmark_groups) + +def create_normalized_bar_chart(benchmarks: list[BenchmarkSeries], baseline_name: str) -> list[str]: + latest_results = get_latest_results(benchmarks) + + run_names = sorted(list(set( + name for result in latest_results.values() + for name in result.run_values.keys() + ))) + + if baseline_name not in run_names: + return [] + + benchmark_labels = [b.label for b in benchmarks] + + benchmark_groups = group_benchmark_labels(benchmark_labels) + + html_charts = [] + + for group_name, group_benchmarks in benchmark_groups.items(): + plt.close('all') + non_baseline_runs = [n for n in run_names if n != baseline_name] + + if len(non_baseline_runs) == 0: + continue + + normalized_data = prepare_normalized_data( + latest_results, benchmarks, group_benchmarks, + non_baseline_runs, baseline_name + ) + + fig, ax = plt.subplots(figsize=(10, 6)) + max_height = create_bar_plot( + ax, normalized_data, group_benchmarks, non_baseline_runs, + latest_results, benchmarks, baseline_name + ) + add_chart_elements(ax, group_benchmarks, group_name, max_height) + + plt.tight_layout() + html_charts.append(mpld3.fig_to_html(fig)) + plt.close(fig) + + return html_charts + +def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> str: + plt.close('all') + + num_benchmarks = len(benchmarks) + if num_benchmarks == 0: + return + + fig, axes = plt.subplots(num_benchmarks, 1, figsize=(10, max(4 * num_benchmarks, 30))) + + if num_benchmarks == 1: + axes = [axes] + + for idx, benchmark in enumerate(benchmarks): + ax = axes[idx] + + for run in benchmark.runs: + sorted_points = sorted(run.results, key=lambda x: x.date) + dates = [point.date for point in sorted_points] + values = [point.value for point in sorted_points] + + ax.plot_date(dates, values, '-', label=run.name, alpha=0.5) + scatter = ax.scatter(dates, values, picker=True) + + tooltip_labels = [ + f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n" + f"Value: {point.value:.2f}\n" + f"Git Hash: {point.git_hash}" + for point in sorted_points + ] + + targets = [f"https://github.com/{github_repo}/commit/{point.git_hash}" + for point in sorted_points] + + tooltip = mpld3.plugins.PointHTMLTooltip(scatter, tooltip_labels, + css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}', + targets=targets) + mpld3.plugins.connect(fig, tooltip) + + ax.set_title(benchmark.label, pad=20) + performance_indicator = "lower is better" if benchmark.metadata.lower_is_better else "higher is better" + ax.text(0.5, 1.05, f"({performance_indicator})", + ha='center', + transform=ax.transAxes, + style='italic', + fontsize=7, + color='#666666') + + ax.set_xlabel('') + unit = benchmark.metadata.unit + ax.set_ylabel(f"Value ({unit})" if unit else "Value") + ax.grid(True, alpha=0.2) + ax.legend(bbox_to_anchor=(1, 1), loc='upper left') + ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S')) + ax.xaxis.set_major_locator(mdates.AutoDateLocator()) + + plt.tight_layout() + html = mpld3.fig_to_html(fig) + + plt.close(fig) + return html + +def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: list[str]) -> list[BenchmarkSeries]: + benchmark_metadata: dict[str, BenchmarkMetadata] = {} + run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list)) + + for run in benchmark_runs: + if run.name not in compare_names: + continue + + for result in run.results: + if result.label not in benchmark_metadata: + benchmark_metadata[result.label] = BenchmarkMetadata( + unit=result.unit, + lower_is_better=result.lower_is_better + ) + + result.date = run.date + result.git_hash = run.git_hash + run_map[result.label][run.name].append(result) + + benchmark_series = [] + for label, metadata in benchmark_metadata.items(): + runs = [ + BenchmarkRun(name=run_name, results=results) + for run_name, results in run_map[label].items() + ] + benchmark_series.append(BenchmarkSeries( + label=label, + metadata=metadata, + runs=runs + )) + + return benchmark_series + +def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]) -> str: + baseline_name = compare_names[0] + benchmarks = process_benchmark_data(benchmark_runs, compare_names) + + comparison_html_charts = create_normalized_bar_chart(benchmarks, baseline_name) + timeseries_html = create_time_series_chart(benchmarks, github_repo) + comparison_charts_html = '\n'.join(f'
{chart}
' for chart in comparison_html_charts) + + html_template = f""" + + + + + + Benchmark Results + + + +
+

Benchmark Results

+

Latest Results Comparison

+
+ {comparison_charts_html} +
+

Historical Results

+
+ {timeseries_html} +
+
+ + + """ + + return html_template diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output_markdown.py similarity index 96% rename from scripts/benchmarks/output.py rename to scripts/benchmarks/output_markdown.py index eec8957fe7..177869f8f0 100644 --- a/scripts/benchmarks/output.py +++ b/scripts/benchmarks/output_markdown.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import collections, re -from benches.base import Result +from benches.result import Result from benches.options import options import math @@ -126,7 +126,7 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]): if oln.diff != None: oln.row += f" | {(oln.diff - 1)*100:.2f}%" delta = oln.diff - 1 - oln.bars = round(10*(oln.diff - 1)/max_diff) + oln.bars = round(10*(oln.diff - 1)/max_diff) if max_diff != 0.0 else 0 if oln.bars == 0 or abs(delta) < options.epsilon: oln.row += " | . |" elif oln.bars > 0: @@ -155,7 +155,6 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]): if options.verbose: print(oln.row) summary_table += oln.row + "\n" - grouped_objects = collections.defaultdict(list) for oln in output_detailed_list: @@ -211,7 +210,7 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]): return summary_line, summary_table -def generate_markdown(chart_data: dict[str, list[Result]]): +def generate_markdown(name: str, chart_data: dict[str, list[Result]]): (summary_line, summary_table) = generate_summary_table_and_chart(chart_data) return f""" @@ -220,5 +219,5 @@ def generate_markdown(chart_data: dict[str, list[Result]]): (result is better)\n {summary_table} # Details -{generate_markdown_details(chart_data["This PR"])} +{generate_markdown_details(chart_data[name])} """ diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py index 586837fc6f..d077184e5c 100644 --- a/scripts/benchmarks/utils/utils.py +++ b/scripts/benchmarks/utils/utils.py @@ -4,20 +4,25 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import os -import json import shutil -import subprocess # nosec B404 -from pathlib import Path -from benches.result import Result +import subprocess + +import tarfile +import urllib # nosec B404 from benches.options import options +from pathlib import Path -def run(command, env_vars={}, cwd=None, add_sycl=False): +def run(command, env_vars={}, cwd=None, add_sycl=False, ld_library=[]): try: if isinstance(command, str): command = command.split() env = os.environ.copy() + for ldlib in ld_library: + env['LD_LIBRARY_PATH'] = ldlib + os.pathsep + env.get('LD_LIBRARY_PATH', '') + + # order is important, we want provided sycl rt libraries to be first if add_sycl: sycl_bin_path = os.path.join(options.sycl, 'bin') env['PATH'] = sycl_bin_path + os.pathsep + env.get('PATH', '') @@ -25,6 +30,7 @@ def run(command, env_vars={}, cwd=None, add_sycl=False): env['LD_LIBRARY_PATH'] = sycl_lib_path + os.pathsep + env.get('LD_LIBRARY_PATH', '') env.update(env_vars) + result = subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, timeout=options.timeout) # nosec B603 if options.verbose: @@ -51,25 +57,6 @@ def git_clone(dir, name, repo, commit): raise Exception(f"The directory {repo_path} exists but is not a git repository.") return repo_path -def save_benchmark_results(dir, save_name, benchmark_data: list[Result]): - serialized = [res.to_json() for res in benchmark_data] - results_dir = Path(os.path.join(dir, 'results')) - os.makedirs(results_dir, exist_ok=True) - - file_path = Path(os.path.join(results_dir, f"{save_name}.json")) - with file_path.open('w') as file: - json.dump(serialized, file, indent=4) - print(f"Benchmark results saved to {file_path}") - -def load_benchmark_results(dir, compare_name) -> list[Result]: - file_path = Path(os.path.join(dir, 'results', f"{compare_name}.json")) - if file_path.exists(): - with file_path.open('r') as file: - data = json.load(file) - return [Result.from_json(item) for item in data] - else: - return None - def prepare_bench_cwd(dir): # we need 2 deep to workaround a problem with a fixed relative path in cudaSift options.benchmark_cwd = os.path.join(dir, 'bcwd', 'bcwd') @@ -109,3 +96,16 @@ def create_build_path(directory, name): Path(build_path).mkdir(parents=True, exist_ok=True) return build_path + +def download(dir, url, file, untar = False): + data_file = os.path.join(dir, file) + if not Path(data_file).exists(): + print(f"{data_file} does not exist, downloading") + urllib.request.urlretrieve(url, data_file) + if untar: + file = tarfile.open(data_file) + file.extractall(dir) + file.close() + else: + print(f"{data_file} exists, skipping...") + return data_file diff --git a/scripts/core/CONTRIB.rst b/scripts/core/CONTRIB.rst index 80aa32d6aa..b7ede116b0 100644 --- a/scripts/core/CONTRIB.rst +++ b/scripts/core/CONTRIB.rst @@ -367,6 +367,45 @@ the following command from the build directory. ctest -L "conformance" +Conformance Match Files +----------------------- + +At the moment, not all tests currently pass with all adapters. Some tests are +selectively marked as failing on certain adapters using a .match file located +at ``test/conformance//_adapter_.match``. If +that file exists, then it must contain a list of test specifiers which +specify tests that fail for the given adapter. + +when run through ``ctest``, each failing test will be ran in a separate +invocation (to capture any crashes) to verify that they are still failing. All +tests not matched by the filters will also be ran in a single invocation which +must succeed. + +This behaviour can be disabled by setting the environment variable +``GTEST_OUTPUT``. If this is set, the test runner assumes it is being ran to +collect testing statistics, and just runs the test suite with no filters. + +The format of the match files are as follows: + +* Each line consists of the name of a test as understood by gtest. This is the + name printed next to ``[ RUN ]`` in the test log. +* ``*`` is a wildcard that matches any number of characters in a test name. ``?`` + matches a single character. +* Empty lines or lines beginning with ``#`` are ignored. +* A line beginning with ``{{OPT}}`` is a optional test; see below. + +Normally tests in the match file must fail (either by crashing or having a test +failure) for the given adapter. However this can be disabled by prepending +``{{OPT}}`` to the match line. This can be used if the test is flaky or +depends on a particular environment. + +This matching is done via ``test/conformance/cts_exe.py``, which is designed to be +called from ctest. However, it can be run manually as follows: + +.. code-block:: console + + test/conformance/cts_exe.py --test_command build/bin/test-adapter --failslist test/conformance/adapter/adapter_adapter_mytarget.match -- --backend=BACKEND + Experimental Features ===================== diff --git a/scripts/core/EXP-LOW-POWER-EVENTS.rst b/scripts/core/EXP-LOW-POWER-EVENTS.rst new file mode 100644 index 0000000000..43f2032527 --- /dev/null +++ b/scripts/core/EXP-LOW-POWER-EVENTS.rst @@ -0,0 +1,87 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-low-power-events: + +================================================================================ +Low Power Events +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- + +By default, level-zero uses busy polling for waiting on event completion when +performing host-based synchronization through APIs such as `${x}QueueFinish`. +This provides the lowest possible latency for the calling thread, but +it may lead to increased CPU utilization. + +This extension introduces a new hint flag for `${x}QueueCreate`, allowing users to +indicate to the runtime that they are willing to sacrifice event completion +latency in order to reduce CPU utilization. This may be implemented using +interrupt-driven event completion, where the calling thread yields until +woken up by the driver. + +For applications that want to selectively choose which events should utilize +the low-power mode, this extension also adds a new `${x}EnqueueEventsWaitWithBarrierExt` function. +This enqueue method can be used with an analogous property flag that may cause +its output event to be low-power. This barrier is meant to be used on a regular event +just before calling synchronization APIs (such as `${x}QueueFinish`) to introduce a low-power event. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}_device_info_t + * ${X}_DEVICE_INFO_LOW_POWER_EVENTS_EXP +* ${x}_queue_flags_t + * ${X}_QUEUE_FLAG_LOW_POWER_EVENTS_EXP +* ${x}_exp_enqueue_ext_flags_t + * ${X}_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS +* ${x}_structure_type_t + * {X}_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES + +Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_enqueue_ext_properties_t + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}EnqueueEventsWaitWithBarrierExt + +Changelog +-------------------------------------------------------------------------------- + ++-----------+---------------------------+ +| Revision | Changes | ++===========+===========================+ +| 1.0 | Initial Draft | ++-----------+---------------------------+ + + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return true for the new +``${X}_DEVICE_INFO_LOW_POWER_EVENTS_EXP`` device info query. + + +Contributors +-------------------------------------------------------------------------------- + +* Piotr Balcer `piotr.balcer@intel.com `_ diff --git a/scripts/core/LEVEL_ZERO.rst b/scripts/core/LEVEL_ZERO.rst new file mode 100644 index 0000000000..caffa388a2 --- /dev/null +++ b/scripts/core/LEVEL_ZERO.rst @@ -0,0 +1,153 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +========================== +Level Zero UR Reference Document +========================== + +This document gives general guidelines on differences in the UR L0 adapter for customer usecases. + +Environment Variables +===================== + ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| Environment Variable | Description | Possible Values | Default Value | ++=============================================+==============================================================+==============================================================+==================+ +| UR_L0_USE_COPY_ENGINE | Controls the use of copy engines. | "0": Copy engines will not be used. | "1" | +| | | "1": All available copy engines can be used. | | +| | | "lower_index:upper_index": Specifies a range of copy engines | | +| | | to be used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_IMMEDIATE_COMMANDLISTS | Determines the mode of immediate command lists. | "0": Immediate command lists are not used. | "0" | +| | | "1": Immediate command lists are used per queue. | | +| | | "2": Immediate command lists are used per thread per queue. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_RELAXED_ALLOCATION_LIMITS | Controls the use of relaxed allocation limits. | "0": Relaxed allocation limits are not used. | "0" | +| | | "1": Relaxed allocation limits are used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_DRIVER_IN_ORDER_LISTS | Controls the use of in-order lists from the driver. | "0": In-order lists from the driver are not used. | "0" | +| | | "1": In-order lists from the driver are used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USM_ALLOCATOR_TRACE | Enables tracing for the USM allocator. | "0": Tracing is disabled. | "0" | +| | | "1": Tracing is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USM_ALLOCATOR | Configures the USM allocator. | Specifies the configuration for the USM allocator. | All Configs | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DEBUG_BASIC | Enables basic debugging for Level Zero. | "0": Debugging is disabled. | "0" | +| | | "1": Debugging is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_SYSMAN_ENV_DEFAULT | Controls the default SysMan environment initialization. | "1" or unset: Enables SysMan environment initialization. | "1" | +| | | "0": Disables SysMan environment initialization. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_ZESINIT_DEFAULT | Controls the default SysMan initialization with zesInit. | "1": Enables SysMan initialization with zesInit. | "0" | +| | | "0" or unset: Disables SysMan initialization with zesInit. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| SYCL_ENABLE_PCI | Deprecated and no longer needed. | Any value: Triggers a warning message. | None | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_COPY_ENGINE_FOR_FILL | Controls the use of copy engines for memory fill operations. | "0": Copy engines will not be used for fill operations. | "0" | +| | | "1": Copy engines will be used for fill operations. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DEBUG | Controls the debug level for Level Zero. | "0": No debug information. | "0" | +| | | "1": Basic debug information. | | +| | | "2": Validation debug information. | | +| | | "-1": All debug information. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_LEAKS_DEBUG | Enables debugging for memory leaks. | "0": Memory leaks debugging is disabled. | "0" | +| | | "1": Memory leaks debugging is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_INIT_ALL_DRIVERS | Controls the initialization of all Level Zero drivers. | "0": Only currently used drivers are initialized. | "0" | +| | | "1": All drivers on the system are initialized. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_SERIALIZE | Controls serialization of Level Zero calls. | "0": No locking or blocking. | "0" | +| | | "1": Locking around each UR_CALL. | | +| | | "2": Blocking UR calls where supported. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_QUEUE_SYNCHRONIZE_NON_BLOCKING | Controls non-blocking synchronization of queues. | "0": Non-blocking synchronization is disabled. | "0" | +| | | "1": Non-blocking synchronization is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_OOQ_INTEGRATED_SIGNAL_EVENT | Controls signal events for commands on integrated GPUs. | "0": Signal events are not created. | "0" | +| | | "1": Signal events are created. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_TRACK_INDIRECT_ACCESS_MEMORY | Enables tracking of indirect access memory. | "0": Tracking is disabled. | "0" | +| | | "1": Tracking is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING| Controls exposure of CSlice in affinity partitioning. | "0": CSlice is not exposed. | "0" | +| | | "1": CSlice is exposed. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL | Sets the maximum number of events per event pool. | Any positive integer: Specifies the maximum number of events | 256 | +| | | per event pool. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_COMMANDLISTS_CLEANUP_THRESHOLD | Sets the threshold for command lists cleanup. | Any positive integer: Specifies the threshold for cleanup. | 20 | +| | | Negative value: Disables the threshold. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_NATIVE_USM_MEMCPY2D | Controls the use of native USM memcpy2D operations. | "0": Native USM memcpy2D operations are not used. | "0" | +| | | "1": Native USM memcpy2D operations are used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_USM_HOSTPTR_IMPORT | Enables USM host pointer import. | "0": USM host pointer import is disabled. | "0" | +| | | "1": USM host pointer import is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_USM_HOSTPTR_RELEASE | Enables USM host pointer release. | "0": USM host pointer release is disabled. | "0" | +| | | "1": USM host pointer release is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_USM_HOST_UNIFIED_MEMORY | Enables USM host unified memory. | "0": USM host unified memory is disabled. | "0" | +| | | "1": USM host unified memory is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS | Controls the use of multiple command lists for barriers. | "0": Multiple command lists are not used. | "0" | +| | | "1": Multiple command lists are used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_IN_ORDER_BARRIER_BY_SIGNAL | Controls if in-order barriers are implemented by signal. | "0": Barriers are implemented by true barrier command. | "0" | +| | | "1": Barriers are implemented by signal. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DISABLE_EVENTS_CACHING | Controls the caching of events in the context. | "0" or unset: Event caching is enabled. | "0" | +| | | "1": Event caching is disabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_REUSE_DISCARDED_EVENTS | Controls the reuse of uncompleted events in in-order queues. | "0": Reuse of discarded events is disabled. | "1" | +| | | "1" or unset: Reuse of discarded events is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST | Controls filtering of event wait lists. | "0" or unset: Filtering is disabled. | "0" | +| | | "1": Filtering is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DEVICE_SCOPE_EVENTS | Controls the scope of device events. | "0": All events are host-visible. | "0" | +| | | "1": On-demand host-visible proxy events. | | +| | | "2": Last command in batch host-visible. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_COPY_ENGINE_FOR_D2D_COPY | Controls the use of copy engines for device-to-device copy | "0": Copy engines will not be used for D2D copy operations. | "0" | +| | operations. | "1": Copy engines will be used for D2D copy operations. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_BATCH_SIZE | Controls the batch size for command lists. | "0": Dynamic batch size adjustment. | "0" | +| | | Any positive integer: Specifies the fixed batch size. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_COPY_BATCH_SIZE | Controls the batch size for copy command lists. | "0": Dynamic batch size adjustment. | "0" | +| | | Any positive integer: Specifies the fixed batch size. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_MAX | Sets the maximum number of immediate command lists batches. | Any positive integer: Specifies the maximum number of batches| 10 | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +|UR_L0_IMMEDIATE_COMMANDLISTS_EVENTS_PER_BATCH| Sets the number of events per batch for immediate command | Any positive integer: Specifies the number of events per | 256 | +| | lists. | batch. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_COMPUTE_ENGINE | Controls the use of compute engines. | "0": Only the first compute engine is used. | "0" | +| | | Any positive integer: Specifies the index of the compute | | +| | | engine to be used. | | +| | | Negative value: All available compute engines may be used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USM_RESIDENT | Controls memory residency for USM allocations. | "0xHSD": Specifies residency for host, shared, and device | 0x2 | +| | | allocations. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DISABLE_USM_ALLOCATOR | Controls the use of the USM allocator. | "0": USM allocator is enabled. | "0" | +| | | Any other value: USM allocator is disabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_CMD_BUFFER_USE_IMMEDIATE_APPEND_PATH | Controls which command-buffer implementation path is used. | "1": the immediate append path will always be enabled as | Unset | +| | The paths rely on different APIs to enqueue command-buffers. | long as the pre-requisites are met. | | +| | The immediate append path relies on | "0": the immediate append path will always be disabled. | | +| | zeCommandListImmediateAppendCommandListsExp(). | "unset": The default behaviour will be used which enables | | +| | The wait-event path relies on | the immediate append path only for some devices when the | | +| | zeCommandQueueExecuteCommandLists() | pre-requisites are met. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +Contributors +------------ + +* Neil Spruit `neil.r.spruit@intel.com `_ +* Fábio Mestre `fabio.mestre@codeplay.com `_ \ No newline at end of file diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index ad15db4592..9708059b0a 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -188,7 +188,7 @@ members: - type: uint32_t name: argIndex desc: "[in] Argument index." - - type: uint32_t + - type: size_t name: argSize desc: "[in] Argument size." - type: "const ur_kernel_arg_value_properties_t *" diff --git a/scripts/core/exp-low-power-events.yml b/scripts/core/exp-low-power-events.yml new file mode 100644 index 0000000000..f116eaf73a --- /dev/null +++ b/scripts/core/exp-low-power-events.yml @@ -0,0 +1,114 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental API for low-power events API" +ordinal: "100" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support low-power events." +name: $x_device_info_t +etors: + - name: LOW_POWER_EVENTS_EXP + value: "0x2021" + desc: "[$x_bool_t] returns true if the device supports low-power events." + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Extension enums to $x_queue_flags_t to support low power events." +name: $x_queue_flags_t +etors: + - name: LOW_POWER_EVENTS_EXP + desc: > + Hint: use low-power events. Only meaningful for Level Zero, where the implementation may use interrupt-driven events. + May reduce CPU utilization at the cost of increased event completion latency. + Other platforms may ignore this flag. + value: "$X_BIT(11)" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Extended enqueue properties" +name: $x_exp_enqueue_ext_flags_t +etors: + - name: LOW_POWER_EVENTS + desc: > + Hint: use low-power events. Only meaningful for Level Zero, where the implementation may use interrupt-driven events. + May reduce CPU utilization at the cost of increased event completion latency. + Other platforms may ignore this flag. + value: "$X_BIT(11)" + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Extended enqueue properties" +name: $x_exp_enqueue_ext_properties_t +base: $x_base_properties_t +members: + - type: $x_exp_enqueue_ext_flags_t + name: flags + desc: "[in] extended enqueue flags" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Structure type experimental enumerations" +name: $x_structure_type_t +etors: + - name: EXP_ENQUEUE_EXT_PROPERTIES + desc: $x_exp_enqueue_ext_properties_t + value: "0x4000" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue a barrier command which waits a list of events to complete before it completes, with optional extended properties" +class: $xEnqueue +name: EventsWaitWithBarrierExt +ordinal: "0" +details: + - "If the event list is empty, it waits for all previously enqueued commands to complete." + - "It blocks command execution - any following commands enqueued after it do not execute until it completes." + - "It returns an event which can be waited on." +analogue: + - "**clEnqueueBarrierWithWaitList**" +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: const $x_exp_enqueue_ext_properties_t* + name: pProperties + desc: "[in][optional] pointer to the extended enqueue properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before this command can be executed. + If nullptr, the numEventsInWaitList must be 0, indicating that all previously enqueued commands + must be complete. + - type: $x_event_handle_t* + name: phEvent + desc: | + [out][optional] return an event object that identifies this particular command instance. If phEventWaitList and phEvent are not NULL, phEvent must not refer to an element of the phEventWaitList array. +returns: + - $X_RESULT_ERROR_INVALID_QUEUE + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES diff --git a/scripts/core/memory.yml b/scripts/core/memory.yml index 0fde537f37..7cc7467da4 100644 --- a/scripts/core/memory.yml +++ b/scripts/core/memory.yml @@ -62,6 +62,11 @@ etors: desc: "[size_t] actual size of of memory object in bytes" - name: CONTEXT desc: "[$x_context_handle_t] context in which the memory object was created" + - name: REFERENCE_COUNT + desc: | + [uint32_t] Reference count of the memory object. + The reference count returned should be considered immediately stale. + It is unsuitable for general use in applications. This feature is provided for identifying memory leaks. --- #-------------------------------------------------------------------------- type: enum desc: "Image channel order info: number of channels and the channel layout" @@ -241,6 +246,7 @@ returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_VALUE - $X_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR: + - "`pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype`" - "`pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type`" - "`pImageDesc && pImageDesc->numMipLevel != 0`" - "`pImageDesc && pImageDesc->numSamples != 0`" diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index f4ba983bfc..2133e1c889 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -604,6 +604,9 @@ etors: - name: BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP desc: Enumerator for $xBindlessImagesMapExternalLinearMemoryExp value: '245' +- name: ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT + desc: Enumerator for $xEnqueueEventsWaitWithBarrierExt + value: '246' --- type: enum desc: Defines structure types diff --git a/scripts/templates/index.rst.mako b/scripts/templates/index.rst.mako index 1d5ba6a9b0..d1884bdf1b 100644 --- a/scripts/templates/index.rst.mako +++ b/scripts/templates/index.rst.mako @@ -16,5 +16,6 @@ core/CONTRIB.rst core/CUDA.rst core/HIP.rst + core/LEVEL_ZERO.rst exp-features.rst api.rst diff --git a/scripts/templates/print.hpp.mako b/scripts/templates/print.hpp.mako index 4180231ea4..f066a2560c 100644 --- a/scripts/templates/print.hpp.mako +++ b/scripts/templates/print.hpp.mako @@ -42,6 +42,9 @@ from templates import helper as th ${x}::details::printPtr(os, ${caller.body()}); %elif iname and iname.startswith("pfn"): os << reinterpret_cast(${caller.body()}); + %elif "int8_t" in itype: + ## Cast to int so bytes are printed as numbers + os << static_cast(${caller.body()}); %else: os << ${caller.body()}; %endif @@ -75,8 +78,11 @@ def findMemberType(_item): %endif ## can't iterate over 'void *'... %if th.param_traits.is_range(item) and "void*" not in itype: - os << ".${iname} = {"; - for (size_t i = ${th.param_traits.range_start(item)}; ${deref}(params${access}${pname}) != NULL && i < ${deref}params${access}${prefix + th.param_traits.range_end(item)}; ++i) { + os << ".${iname} = "; + ${x}::details::printPtr(os, reinterpret_cast(${deref}(params${access}${pname}))); + if (${deref}(params${access}${pname}) != NULL) { + os << " {"; + for (size_t i = ${th.param_traits.range_start(item)}; i < ${deref}params${access}${prefix + th.param_traits.range_end(item)}; ++i) { if (i != 0) { os << ", "; } @@ -85,6 +91,7 @@ def findMemberType(_item): } os << "}"; + } %elif findMemberType(item) is not None and findMemberType(item)['type'] == "union": os << ".${iname} = "; ${x}::details::printUnion(os, ${deref}(params${access}${item['name']}), params${access}${th.param_traits.tagged_member(item)}); diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako index fcfa89d258..efb8e85e8e 100644 --- a/scripts/templates/queue_api.cpp.mako +++ b/scripts/templates/queue_api.cpp.mako @@ -20,6 +20,7 @@ from templates import helper as th */ #include "queue_api.hpp" +#include "ur_util.hpp" ur_queue_handle_t_::~ur_queue_handle_t_() {} @@ -32,8 +33,10 @@ ${th.make_func_name(n, tags, obj)}( ${line} %endfor ) -{ +try { return ${obj['params'][0]['name']}->${th.transform_queue_related_function_name(n, tags, obj, format=["name"])}; +} catch(...) { + return exceptionToResult(std::current_exception()); } %endfor } \ No newline at end of file diff --git a/scripts/templates/queue_api.hpp.mako b/scripts/templates/queue_api.hpp.mako index dcc86265f7..69a9af328b 100644 --- a/scripts/templates/queue_api.hpp.mako +++ b/scripts/templates/queue_api.hpp.mako @@ -25,6 +25,9 @@ from templates import helper as th struct ur_queue_handle_t_ { virtual ~ur_queue_handle_t_(); + + virtual void deferEventFree(ur_event_handle_t hEvent) = 0; + %for obj in th.get_queue_related_functions(specs, n, tags): virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0; %endfor diff --git a/scripts/templates/ur_api.hpp.mako b/scripts/templates/ur_api.hpp.mako deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/source/adapters/CMakeLists.txt b/source/adapters/CMakeLists.txt index f981c17dd5..66cd8b7648 100644 --- a/source/adapters/CMakeLists.txt +++ b/source/adapters/CMakeLists.txt @@ -13,7 +13,7 @@ function(add_ur_adapter name) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../adapter.def.in ${ADAPTER_VERSION_SCRIPT} @ONLY) set_target_properties(${name} PROPERTIES - LINK_FLAGS "/DEF:${ADAPTER_VERSION_SCRIPT}" + LINK_OPTIONS "LINKER:/DEF:${ADAPTER_VERSION_SCRIPT}" ) elseif(APPLE) target_compile_options(${name} PRIVATE "-fvisibility=hidden") diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 2029903c92..527c339783 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -1304,7 +1304,12 @@ updateKernelArguments(kernel_command_handle *Command, ur_result_t Result = UR_RESULT_SUCCESS; try { - Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + // Local memory args are passed as value args with nullptr value + if (ArgValue) { + Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + } else { + Kernel->setKernelLocalArg(ArgIndex, ArgSize); + } } catch (ur_result_t Err) { Result = Err; return Result; diff --git a/source/adapters/cuda/context.cpp b/source/adapters/cuda/context.cpp index 69796cf79d..ffd991d59f 100644 --- a/source/adapters/cuda/context.cpp +++ b/source/adapters/cuda/context.cpp @@ -98,6 +98,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; return ReturnValue(Capabilities); } + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: // 2D USM memcpy is supported. return ReturnValue(true); diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index be5867628d..cb6b757dd3 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1107,7 +1107,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >= 9; return ReturnValue(static_cast(Value)); } - + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(false); default: break; } diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 0e00f680f6..fc3d0220e8 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -402,6 +402,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( } } +UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, + phEventWaitList, phEvent); +} + /// Enqueues a wait on the given CUstream for all events. /// See \ref enqueueEventWait /// TODO: Add support for multiple streams once the Event class is properly @@ -953,35 +961,71 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( // CUDA has no memset functions that allow setting values more than 4 bytes. UR // API lets you pass an arbitrary "pattern" to the buffer fill, which can be -// more than 4 bytes. We must break up the pattern into 1 byte values, and set -// the buffer using multiple strided calls. The first 4 patterns are set using -// cuMemsetD32Async then all subsequent 1 byte patterns are set using -// cuMemset2DAsync which is called for each pattern. +// more than 4 bytes. We must break up the pattern into 1, 2 or 4-byte values +// and set the buffer using multiple strided calls. ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize, size_t Size, const void *pPattern, CUdeviceptr Ptr) { - // Calculate the number of patterns, stride, number of times the pattern - // needs to be applied, and the number of times the first 32 bit pattern - // needs to be applied. - auto NumberOfSteps = PatternSize / sizeof(uint8_t); - auto Pitch = NumberOfSteps * sizeof(uint8_t); - auto Height = Size / NumberOfSteps; - auto Count32 = Size / sizeof(uint32_t); - - // Get 4-byte chunk of the pattern and call cuMemsetD32Async - auto Value = *(static_cast(pPattern)); - UR_CHECK_ERROR(cuMemsetD32Async(Ptr, Value, Count32, Stream)); - for (auto step = 4u; step < NumberOfSteps; ++step) { - // take 1 byte of the pattern - Value = *(static_cast(pPattern) + step); - - // offset the pointer to the part of the buffer we want to write to - auto OffsetPtr = Ptr + (step * sizeof(uint8_t)); - - // set all of the pattern chunks - UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, Value, sizeof(uint8_t), - Height, Stream)); + // Find the largest supported word size into which the pattern can be divided + auto BackendWordSize = PatternSize % 4u == 0u ? 4u + : PatternSize % 2u == 0u ? 2u + : 1u; + + // Calculate the number of words in the pattern, the stride, and the number of + // times the pattern needs to be applied + auto NumberOfSteps = PatternSize / BackendWordSize; + auto Pitch = NumberOfSteps * BackendWordSize; + auto Height = Size / PatternSize; + + // Same implementation works for any pattern word type (uint8_t, uint16_t, + // uint32_t) + auto memsetImpl = [BackendWordSize, NumberOfSteps, Pitch, Height, Size, Ptr, + &Stream](const auto *pPatternWords, + auto &&continuousMemset, auto &&stridedMemset) { + // If the pattern is 1 word or the first word is repeated throughout, a fast + // continuous fill can be used without the need for slower strided fills + bool UseOnlyFirstValue{true}; + for (auto Step{1u}; (Step < NumberOfSteps) && UseOnlyFirstValue; ++Step) { + if (*(pPatternWords + Step) != *pPatternWords) { + UseOnlyFirstValue = false; + } + } + auto OptimizedNumberOfSteps{UseOnlyFirstValue ? 1u : NumberOfSteps}; + + // Fill the pattern in steps of BackendWordSize bytes. Use a continuous + // fill in the first step because it's faster than a strided fill. Then, + // overwrite the other values in subsequent steps. + for (auto Step{0u}; Step < OptimizedNumberOfSteps; ++Step) { + if (Step == 0) { + UR_CHECK_ERROR(continuousMemset(Ptr, *(pPatternWords), + Size / BackendWordSize, Stream)); + } else { + UR_CHECK_ERROR(stridedMemset(Ptr + Step * BackendWordSize, Pitch, + *(pPatternWords + Step), 1u, Height, + Stream)); + } + } + }; + + // Apply the implementation to the chosen pattern word type + switch (BackendWordSize) { + case 4u: { + memsetImpl(static_cast(pPattern), cuMemsetD32Async, + cuMemsetD2D32Async); + break; } + case 2u: { + memsetImpl(static_cast(pPattern), cuMemsetD16Async, + cuMemsetD2D16Async); + break; + } + default: { + memsetImpl(static_cast(pPattern), cuMemsetD8Async, + cuMemsetD2D8Async); + break; + } + } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/cuda/memory.cpp b/source/adapters/cuda/memory.cpp index ea55c1669a..8b4db742ac 100644 --- a/source/adapters/cuda/memory.cpp +++ b/source/adapters/cuda/memory.cpp @@ -171,6 +171,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, case UR_MEM_INFO_CONTEXT: { return ReturnValue(hMemory->getContext()); } + case UR_MEM_INFO_REFERENCE_COUNT: { + return ReturnValue(hMemory->getReferenceCount()); + } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/cuda/platform.cpp b/source/adapters/cuda/platform.cpp index 218cf9b0db..7ce0bba9e7 100644 --- a/source/adapters/cuda/platform.cpp +++ b/source/adapters/cuda/platform.cpp @@ -168,5 +168,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( *ppPlatformOption = ""; return UR_RESULT_SUCCESS; } + if (pFrontendOption == "-foffload-fp32-prec-div"sv || + pFrontendOption == "-foffload-fp32-prec-sqrt"sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index afd15c1bd4..9fed5db2f8 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -1013,7 +1013,12 @@ updateKernelArguments(ur_exp_command_buffer_command_handle_t Command, const void *ArgValue = ValueArgDesc.pNewValueArg; try { - Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + // Local memory args are passed as value args with nullptr value + if (ArgValue) { + Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + } else { + Kernel->setKernelLocalArg(ArgIndex, ArgSize); + } } catch (ur_result_t Err) { return Err; } diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index 761eab954d..b36ed73560 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -75,7 +75,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { // These queries should be dealt with in context_impl.cpp by calling the // queries of each device separately and building the intersection set. - return UR_RESULT_ERROR_INVALID_ENUMERATION; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: // 2D USM memcpy is supported. diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index dbac5d37f1..5271f73709 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -935,6 +935,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: { + return ReturnValue(false); + } default: break; } diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 293f3eea7a..025a3f41f4 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -436,6 +436,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( } } +UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, + phEventWaitList, phEvent); +} + /// General 3D memory copy operation. /// This function requires the corresponding HIP context to be at the top of /// the context stack @@ -704,25 +712,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( static inline void memsetRemainPattern(hipStream_t Stream, uint32_t PatternSize, size_t Size, const void *pPattern, - hipDeviceptr_t Ptr) { + hipDeviceptr_t Ptr, + uint32_t StartOffset) { + // Calculate the number of times the pattern needs to be applied + auto Height = Size / PatternSize; - // Calculate the number of patterns, stride and the number of times the - // pattern needs to be applied. - auto NumberOfSteps = PatternSize / sizeof(uint8_t); - auto Pitch = NumberOfSteps * sizeof(uint8_t); - auto Height = Size / NumberOfSteps; - - for (auto step = 4u; step < NumberOfSteps; ++step) { + for (auto step = StartOffset; step < PatternSize; ++step) { // take 1 byte of the pattern auto Value = *(static_cast(pPattern) + step); // offset the pointer to the part of the buffer we want to write to - auto OffsetPtr = reinterpret_cast(reinterpret_cast(Ptr) + - (step * sizeof(uint8_t))); + auto OffsetPtr = + reinterpret_cast(reinterpret_cast(Ptr) + step); // set all of the pattern chunks - UR_CHECK_ERROR(hipMemset2DAsync(OffsetPtr, Pitch, Value, sizeof(uint8_t), - Height, Stream)); + UR_CHECK_ERROR( + hipMemset2DAsync(OffsetPtr, PatternSize, Value, 1u, Height, Stream)); } } @@ -735,11 +740,55 @@ static inline void memsetRemainPattern(hipStream_t Stream, uint32_t PatternSize, ur_result_t commonMemSetLargePattern(hipStream_t Stream, uint32_t PatternSize, size_t Size, const void *pPattern, hipDeviceptr_t Ptr) { + // Find the largest supported word size into which the pattern can be divided + auto BackendWordSize = PatternSize % 4u == 0u ? 4u + : PatternSize % 2u == 0u ? 2u + : 1u; + + // Calculate the number of patterns + auto NumberOfSteps = PatternSize / BackendWordSize; + + // If the pattern is 1 word or the first word is repeated throughout, a fast + // continuous fill can be used without the need for slower strided fills + bool UseOnlyFirstValue{true}; + auto checkIfFirstWordRepeats = [&UseOnlyFirstValue, + NumberOfSteps](const auto *pPatternWords) { + for (auto Step{1u}; (Step < NumberOfSteps) && UseOnlyFirstValue; ++Step) { + if (*(pPatternWords + Step) != *pPatternWords) { + UseOnlyFirstValue = false; + } + } + }; - // Get 4-byte chunk of the pattern and call hipMemsetD32Async - auto Count32 = Size / sizeof(uint32_t); - auto Value = *(static_cast(pPattern)); - UR_CHECK_ERROR(hipMemsetD32Async(Ptr, Value, Count32, Stream)); + // Use a continuous fill for the first word in the pattern because it's faster + // than a strided fill. Then, overwrite the other values in subsequent steps. + switch (BackendWordSize) { + case 4u: { + auto *pPatternWords = static_cast(pPattern); + checkIfFirstWordRepeats(pPatternWords); + UR_CHECK_ERROR( + hipMemsetD32Async(Ptr, *pPatternWords, Size / BackendWordSize, Stream)); + break; + } + case 2u: { + auto *pPatternWords = static_cast(pPattern); + checkIfFirstWordRepeats(pPatternWords); + UR_CHECK_ERROR( + hipMemsetD16Async(Ptr, *pPatternWords, Size / BackendWordSize, Stream)); + break; + } + default: { + auto *pPatternWords = static_cast(pPattern); + checkIfFirstWordRepeats(pPatternWords); + UR_CHECK_ERROR( + hipMemsetD8Async(Ptr, *pPatternWords, Size / BackendWordSize, Stream)); + break; + } + } + + if (UseOnlyFirstValue) { + return UR_RESULT_SUCCESS; + } // There is a bug in ROCm prior to 6.0.0 version which causes hipMemset2D // to behave incorrectly when acting on host pinned memory. @@ -753,7 +802,7 @@ ur_result_t commonMemSetLargePattern(hipStream_t Stream, uint32_t PatternSize, // we need to check that isManaged attribute is false. if (ptrAttribs.hostPointer && !ptrAttribs.isManaged) { const auto NumOfCopySteps = Size / PatternSize; - const auto Offset = sizeof(uint32_t); + const auto Offset = BackendWordSize; const auto LeftPatternSize = PatternSize - Offset; const auto OffsetPatternPtr = reinterpret_cast( reinterpret_cast(pPattern) + Offset); @@ -768,10 +817,12 @@ ur_result_t commonMemSetLargePattern(hipStream_t Stream, uint32_t PatternSize, Stream)); } } else { - memsetRemainPattern(Stream, PatternSize, Size, pPattern, Ptr); + memsetRemainPattern(Stream, PatternSize, Size, pPattern, Ptr, + BackendWordSize); } #else - memsetRemainPattern(Stream, PatternSize, Size, pPattern, Ptr); + memsetRemainPattern(Stream, PatternSize, Size, pPattern, Ptr, + BackendWordSize); #endif return UR_RESULT_SUCCESS; } @@ -1561,8 +1612,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( // which makes the HIP runtime not correctly derive the copy kind // (direction) for the copies since ROCm 5.6.0+. See: // https://github.com/ROCm/clr/issues/40 - // TODO: Add maximum HIP_VERSION when bug has been fixed. -#if HIP_VERSION >= 50600000 + // Fixed by commit + // https://github.com/ROCm/clr/commit/d3bfb55d7a934355257a72fab538a0a634b43cad + // included in releases starting from ROCm 6.1.0. +#if HIP_VERSION >= 50600000 && HIP_VERSION < 60100000 hipPointerAttribute_t srcAttribs{}; hipPointerAttribute_t dstAttribs{}; diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index aa7b5f4040..93d8450862 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -231,10 +231,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, size_t propSize, void *pMemInfo, size_t *pPropSizeRet) { - - UR_ASSERT(MemInfoType <= UR_MEM_INFO_CONTEXT, - UR_RESULT_ERROR_INVALID_ENUMERATION); - // FIXME: Only getting info for the first device in the context. This // should be fine in general auto Device = hMemory->getContext()->getDevices()[0]; @@ -286,6 +282,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, case UR_MEM_INFO_CONTEXT: { return ReturnValue(hMemory->getContext()); } + case UR_MEM_INFO_REFERENCE_COUNT: { + return ReturnValue(hMemory->getReferenceCount()); + } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -316,14 +315,18 @@ urMemGetNativeHandle(ur_mem_handle_t hMem, ur_device_handle_t Device, return UR_RESULT_ERROR_INVALID_MEM_OBJECT; } } - *phNativeMem = reinterpret_cast( - std::get(hMem->Mem).getPtr(Device)); -#elif defined(__HIP_PLATFORM_AMD__) - *phNativeMem = reinterpret_cast( - std::get(hMem->Mem).getPtr(Device)); -#else +#elif !defined(__HIP_PLATFORM_AMD__) #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif + if (std::holds_alternative(hMem->Mem)) { + *phNativeMem = reinterpret_cast( + std::get(hMem->Mem).getPtr(Device)); + } else if (std::holds_alternative(hMem->Mem)) { + *phNativeMem = reinterpret_cast( + std::get(hMem->Mem).getSurface(Device)); + } else { + return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/platform.cpp b/source/adapters/hip/platform.cpp index ebfd422a3b..007889f138 100644 --- a/source/adapters/hip/platform.cpp +++ b/source/adapters/hip/platform.cpp @@ -77,6 +77,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, for (auto i = 0u; i < static_cast(NumDevices); ++i) { hipDevice_t Device; UR_CHECK_ERROR(hipDeviceGet(&Device, i)); + UR_CHECK_ERROR(hipSetDevice(i)); hipEvent_t EvBase; UR_CHECK_ERROR(hipEventCreate(&EvBase)); @@ -153,5 +154,10 @@ urPlatformGetBackendOption(ur_platform_handle_t, const char *pFrontendOption, *ppPlatformOption = ""; return UR_RESULT_SUCCESS; } + if (pFrontendOption == "-foffload-fp32-prec-div"sv || + pFrontendOption == "-foffload-fp32-prec-sqrt"sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index d700fbb2c3..6465ebaa51 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -100,7 +100,7 @@ if(UR_BUILD_ADAPTER_L0) if (WIN32) # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800) + target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero PRIVATE @@ -194,7 +194,7 @@ if(UR_BUILD_ADAPTER_L0_V2) if (WIN32) # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800) + target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero_v2 PRIVATE diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 9dd2a31268..8995a5e25b 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -76,19 +76,92 @@ ur_result_t getZesDeviceHandle(zes_uuid_t coreDeviceUuid, return UR_RESULT_ERROR_INVALID_ARGUMENT; } +/** + * @brief Initializes the platforms by querying Level Zero drivers and devices. + * + * This function initializes the platforms by querying the available Level Zero + * drivers and devices. It handles different behaviors based on the presence of + * drivers obtained through `zeDriverGet` and initialized drivers through + * `zeInitDrivers`. + * + * @param platforms A vector to store the initialized platform handles. + * @param ZesResult The result of a previous ZES (Level Zero System) operation. + * @return ur_result_t The result of the initialization process. + * + * The function performs the following steps: + * 1. Queries the number of Level Zero drivers using `zeDriverGet`. + * 2. If drivers are found, it retrieves their handles. + * 3. If no drivers are found in either `zeInitDrivers` or `zeDriverGet`, + * it logs a message and returns success. + * 4. If `zeInitDrivers` is supported by the global adapter, it retrieves + * their handles and properties. + * 5. It compares the drivers obtained from `zeDriverGet` and `zeInitDrivers`, + * adding unique drivers to the list. + * 6. If `zeInitDrivers` is not supported, it uses the drivers obtained + * from `zeDriverGet`. + * 7. For each driver, it queries the devices and checks if they are GPU + * devices. + * 8. If a GPU device is found, it initializes a platform for the driver and + * adds it to the platforms vector. + * 9. If ZES operations are successful, it populates the ZES/ZE device mapping + * for the devices into the platform. + * 10. The function handles exceptions and returns the appropriate result. + */ ur_result_t initPlatforms(PlatformVec &platforms, ze_result_t ZesResult) noexcept try { + std::vector ZeDrivers; + std::vector ZeDriverGetHandles; + std::vector ZeInitDriversHandles; + std::vector ZeDevices; uint32_t ZeDriverCount = 0; - ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr)); - if (ZeDriverCount == 0) { + uint32_t ZeDriverGetCount = 0; + + auto ZeDriverGetResult = + ZE_CALL_NOCHECK(zeDriverGet, (&ZeDriverGetCount, nullptr)); + if (ZeDriverGetCount > 0 && ZeDriverGetResult == ZE_RESULT_SUCCESS) { + ZeDriverGetHandles.resize(ZeDriverGetCount); + ZE2UR_CALL(zeDriverGet, (&ZeDriverGetCount, ZeDriverGetHandles.data())); + } + if (ZeDriverGetCount == 0 && GlobalAdapter->ZeInitDriversCount == 0) { + logger::debug("\nNo Valid L0 Drivers found.\n"); return UR_RESULT_SUCCESS; } - std::vector ZeDrivers; - std::vector ZeDevices; - ZeDrivers.resize(ZeDriverCount); - - ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data())); + if (GlobalAdapter->InitDriversSupported) { + ZeInitDriversHandles.resize(GlobalAdapter->ZeInitDriversCount); + ZeDrivers.resize(GlobalAdapter->ZeInitDriversCount); + ZE2UR_CALL(GlobalAdapter->initDriversFunctionPtr, + (&GlobalAdapter->ZeInitDriversCount, ZeInitDriversHandles.data(), + &GlobalAdapter->InitDriversDesc)); + ZeDrivers.assign(ZeInitDriversHandles.begin(), ZeInitDriversHandles.end()); + if (ZeDriverGetCount > 0 && GlobalAdapter->ZeInitDriversCount > 0) { + for (uint32_t X = 0; X < GlobalAdapter->ZeInitDriversCount; ++X) { + for (uint32_t Y = 0; Y < ZeDriverGetCount; ++Y) { + ZeStruct ZeDriverGetProperties; + ZeStruct ZeInitDriverProperties; + ZE2UR_CALL(zeDriverGetProperties, + (ZeDriverGetHandles[Y], &ZeDriverGetProperties)); + ZE2UR_CALL(zeDriverGetProperties, + (ZeInitDriversHandles[X], &ZeInitDriverProperties)); + // If zeDriverGet driver is different from zeInitDriver driver, add it + // to the list. This allows for older drivers to be used alongside + // newer drivers. + if (ZeDriverGetProperties.driverVersion != + ZeInitDriverProperties.driverVersion) { + logger::debug("\nzeDriverHandle {} added to the zeInitDrivers list " + "of possible handles.\n", + ZeDriverGetHandles[Y]); + ZeDrivers.push_back(ZeDriverGetHandles[Y]); + } + } + } + } + } else { + ZeDrivers.resize(ZeDriverGetCount); + ZeDrivers.assign(ZeDriverGetHandles.begin(), ZeDriverGetHandles.end()); + } + ZeDriverCount = ZeDrivers.size(); + logger::debug("\n{} L0 Drivers found.\n", ZeDriverCount); for (uint32_t I = 0; I < ZeDriverCount; ++I) { // Keep track of the first platform init for this Driver bool DriverPlatformInit = false; @@ -214,6 +287,15 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() return std::atoi(UrRet); }(); + // Dynamically load the new L0 apis separately. + // This must be done to avoid attempting to use symbols that do + // not exist in older loader runtimes. +#ifdef _WIN32 + HMODULE processHandle = GetModuleHandle(NULL); +#else + HMODULE processHandle = nullptr; +#endif + // initialize level zero only once. if (GlobalAdapter->ZeResult == std::nullopt) { // Setting these environment variables before running zeInit will enable @@ -235,20 +317,80 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() // called multiple times. Declaring the return value as "static" ensures // it's only called once. + // Set ZES_ENABLE_SYSMAN by default if the user has not set it. + if (UrSysManEnvInitEnabled) { + setEnvVar("ZES_ENABLE_SYSMAN", "1"); + } + // Init with all flags set to enable for all driver types to be init in // the application. ze_init_flags_t L0InitFlags = ZE_INIT_FLAG_GPU_ONLY; if (UrL0InitAllDrivers) { L0InitFlags |= ZE_INIT_FLAG_VPU_ONLY; } - - // Set ZES_ENABLE_SYSMAN by default if the user has not set it. - if (UrSysManEnvInitEnabled) { - setEnvVar("ZES_ENABLE_SYSMAN", "1"); - } logger::debug("\nzeInit with flags value of {}\n", static_cast(L0InitFlags)); - GlobalAdapter->ZeResult = ZE_CALL_NOCHECK(zeInit, (L0InitFlags)); + GlobalAdapter->ZeInitResult = ZE_CALL_NOCHECK(zeInit, (L0InitFlags)); + if (*GlobalAdapter->ZeInitResult != ZE_RESULT_SUCCESS) { + logger::debug("\nzeInit failed with {}\n", + *GlobalAdapter->ZeInitResult); + } + + bool useInitDrivers = false; + zel_version_t loader_version = {}; + size_t num_components; + auto result = zelLoaderGetVersions(&num_components, nullptr); + if (result == ZE_RESULT_SUCCESS) { + zel_component_version_t *versions = + new zel_component_version_t[num_components]; + result = zelLoaderGetVersions(&num_components, versions); + if (result == ZE_RESULT_SUCCESS) { + for (size_t i = 0; i < num_components; ++i) { + if (strncmp(versions[i].component_name, "loader", + strlen("loader")) == 0) { + loader_version = versions[i].component_lib_version; + logger::debug("\nLevel Zero Loader Version: {}.{}.{}\n", + loader_version.major, loader_version.minor, + loader_version.patch); + break; + } + } + } + delete[] versions; + if (loader_version.major > 1 || + (loader_version.major == 1 && loader_version.minor > 19) || + (loader_version.major == 1 && loader_version.minor == 19 && + loader_version.patch >= 2)) { + useInitDrivers = true; + } + } + + if (useInitDrivers) { + GlobalAdapter->initDriversFunctionPtr = + (ze_pfnInitDrivers_t)ur_loader::LibLoader::getFunctionPtr( + processHandle, "zeInitDrivers"); + if (GlobalAdapter->initDriversFunctionPtr) { + logger::debug("\nzeInitDrivers with flags value of {}\n", + static_cast(GlobalAdapter->InitDriversDesc.flags)); + GlobalAdapter->ZeInitDriversResult = + ZE_CALL_NOCHECK(GlobalAdapter->initDriversFunctionPtr, + (&GlobalAdapter->ZeInitDriversCount, nullptr, + &GlobalAdapter->InitDriversDesc)); + if (*GlobalAdapter->ZeInitDriversResult == ZE_RESULT_SUCCESS) { + GlobalAdapter->InitDriversSupported = true; + } else { + logger::debug("\nzeInitDrivers failed with {}\n", + *GlobalAdapter->ZeInitDriversResult); + } + } + } + + if (*GlobalAdapter->ZeInitResult == ZE_RESULT_SUCCESS || + *GlobalAdapter->ZeInitDriversResult == ZE_RESULT_SUCCESS) { + GlobalAdapter->ZeResult = ZE_RESULT_SUCCESS; + } else { + GlobalAdapter->ZeResult = ZE_RESULT_ERROR_UNINITIALIZED; + } } assert(GlobalAdapter->ZeResult != std::nullopt); // verify that level-zero is initialized @@ -260,7 +402,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() return; } if (*GlobalAdapter->ZeResult != ZE_RESULT_SUCCESS) { - logger::error("zeInit: Level Zero initialization failure\n"); + logger::error("Level Zero initialization failure\n"); result = ze2urResult(*GlobalAdapter->ZeResult); return; @@ -269,9 +411,9 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() // separately. This must be done to avoid attempting to use symbols that do // not exist in older loader runtimes. #ifdef _WIN32 - HMODULE processHandle = GetModuleHandle(NULL); + GlobalAdapter->processHandle = GetModuleHandle(NULL); #else - HMODULE processHandle = nullptr; + GlobalAdapter->processHandle = nullptr; #endif // Check if the user has enabled the default L0 SysMan initialization. @@ -288,13 +430,13 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() GlobalAdapter->getDeviceByUUIdFunctionPtr = (zes_pfnDriverGetDeviceByUuidExp_t) ur_loader::LibLoader::getFunctionPtr( - processHandle, "zesDriverGetDeviceByUuidExp"); + GlobalAdapter->processHandle, "zesDriverGetDeviceByUuidExp"); GlobalAdapter->getSysManDriversFunctionPtr = (zes_pfnDriverGet_t)ur_loader::LibLoader::getFunctionPtr( - processHandle, "zesDriverGet"); + GlobalAdapter->processHandle, "zesDriverGet"); GlobalAdapter->sysManInitFunctionPtr = - (zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr(processHandle, - "zesInit"); + (zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr( + GlobalAdapter->processHandle, "zesInit"); } if (GlobalAdapter->getDeviceByUUIdFunctionPtr && GlobalAdapter->getSysManDriversFunctionPtr && diff --git a/source/adapters/level_zero/adapter.hpp b/source/adapters/level_zero/adapter.hpp index 53a58793e5..277d2334b9 100644 --- a/source/adapters/level_zero/adapter.hpp +++ b/source/adapters/level_zero/adapter.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include using PlatformVec = std::vector>; @@ -31,11 +32,20 @@ struct ur_adapter_handle_t_ { zes_pfnDriverGetDeviceByUuidExp_t getDeviceByUUIdFunctionPtr = nullptr; zes_pfnDriverGet_t getSysManDriversFunctionPtr = nullptr; zes_pfnInit_t sysManInitFunctionPtr = nullptr; + ze_pfnInitDrivers_t initDriversFunctionPtr = nullptr; + ze_init_driver_type_desc_t InitDriversDesc = { + ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC, nullptr, + ZE_INIT_DRIVER_TYPE_FLAG_GPU}; + uint32_t ZeInitDriversCount = 0; + bool InitDriversSupported = false; + std::optional ZeInitDriversResult; + std::optional ZeInitResult; std::optional ZeResult; std::optional ZesResult; ZeCache> PlatformCache; logger::Logger &logger; + HMODULE processHandle = nullptr; }; extern ur_adapter_handle_t_ *GlobalAdapter; diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 206787a68e..56c53b5331 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -22,6 +22,71 @@ namespace { +// Checks whether zeCommandListImmediateAppendCommandListsExp can be used for a +// given Context and Device. +bool checkImmediateAppendSupport(ur_context_handle_t Context, + ur_device_handle_t Device) { + // TODO The L0 driver is not reporting this extension yet. Once it does, + // switch to using the variable zeDriverImmediateCommandListAppendFound. + + // Minimum version that supports zeCommandListImmediateAppendCommandListsExp. + constexpr uint32_t MinDriverVersion = 30898; + bool DriverSupportsImmediateAppend = + Context->getPlatform()->isDriverVersionNewerOrSimilar(1, 3, + MinDriverVersion); + + // If this environment variable is: + // - Set to 1: the immediate append path will always be enabled as long the + // pre-requisites are met. + // - Set to 0: the immediate append path will always be disabled. + // - Not Defined: The default behaviour will be used which enables the + // immediate append path only for some devices when the pre-requisites are + // met. + const char *AppendEnvVarName = "UR_L0_CMD_BUFFER_USE_IMMEDIATE_APPEND_PATH"; + const char *UrRet = std::getenv(AppendEnvVarName); + + if (UrRet) { + const bool EnableAppendPath = std::atoi(UrRet) == 1; + + if (EnableAppendPath && !Device->ImmCommandListUsed) { + logger::error("{} is set but immediate command-lists are currently " + "disabled. Immediate command-lists are " + "required to use the immediate append path.", + AppendEnvVarName); + std::abort(); + } + if (EnableAppendPath && !DriverSupportsImmediateAppend) { + logger::error("{} is set but " + "the current driver does not support the " + "zeCommandListImmediateAppendCommandListsExp entrypoint. A " + "driver version of at least {} is required to use the " + "immediate append path.", + AppendEnvVarName, MinDriverVersion); + std::abort(); + } + + return EnableAppendPath; + } + + return Device->isPVC() && Device->ImmCommandListUsed && + DriverSupportsImmediateAppend; +} + +// Checks whether counter based events are supported for a given Device. +bool checkCounterBasedEventsSupport(ur_device_handle_t Device) { + static const bool useDriverCounterBasedEvents = [] { + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS"); + if (!UrRet) { + return true; + } + return std::atoi(UrRet) != 0; + }(); + + return Device->ImmCommandListUsed && Device->useDriverInOrderLists() && + useDriverCounterBasedEvents && + Device->Platform->ZeDriverEventPoolCountingEventsExtensionFound; +} + // Gets a C pointer from a vector. If the vector is empty returns nullptr // instead. This is different from the behaviour of the data() member function // of the vector class which might not return nullptr when the vector is empty. @@ -288,18 +353,24 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ze_command_list_handle_t CommandList, ze_command_list_handle_t CommandListTranslated, ze_command_list_handle_t CommandListResetEvents, - ze_command_list_handle_t CopyCommandList, ur_event_handle_t SignalEvent, - ur_event_handle_t WaitEvent, ur_event_handle_t AllResetEvent, - const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList) + ze_command_list_handle_t CopyCommandList, + ur_event_handle_t ExecutionFinishedEvent, ur_event_handle_t WaitEvent, + ur_event_handle_t AllResetEvent, ur_event_handle_t CopyFinishedEvent, + ur_event_handle_t ComputeFinishedEvent, + const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList, + const bool UseImmediateAppendPath) : Context(Context), Device(Device), ZeComputeCommandList(CommandList), ZeComputeCommandListTranslated(CommandListTranslated), ZeCommandListResetEvents(CommandListResetEvents), - ZeCopyCommandList(CopyCommandList), SignalEvent(SignalEvent), - WaitEvent(WaitEvent), AllResetEvent(AllResetEvent), ZeFencesMap(), + ZeCopyCommandList(CopyCommandList), + ExecutionFinishedEvent(ExecutionFinishedEvent), WaitEvent(WaitEvent), + AllResetEvent(AllResetEvent), CopyFinishedEvent(CopyFinishedEvent), + ComputeFinishedEvent(ComputeFinishedEvent), ZeFencesMap(), ZeActiveFence(nullptr), SyncPoints(), NextSyncPoint(0), IsUpdatable(Desc ? Desc->isUpdatable : false), IsProfilingEnabled(Desc ? Desc->enableProfiling : false), - IsInOrderCmdList(IsInOrderCmdList) { + IsInOrderCmdList(IsInOrderCmdList), + UseImmediateAppendPath(UseImmediateAppendPath) { ur::level_zero::urContextRetain(Context); ur::level_zero::urDeviceRetain(Device); } @@ -326,11 +397,11 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandListResetEvents)); } - // Release additional signal and wait events used by command_buffer - if (SignalEvent) { - CleanupCompletedEvent(SignalEvent, false /*QueueLocked*/, + // Release additional events used by the command_buffer. + if (ExecutionFinishedEvent) { + CleanupCompletedEvent(ExecutionFinishedEvent, false /*QueueLocked*/, false /*SetEventCompleted*/); - urEventReleaseInternal(SignalEvent); + urEventReleaseInternal(ExecutionFinishedEvent); } if (WaitEvent) { CleanupCompletedEvent(WaitEvent, false /*QueueLocked*/, @@ -343,6 +414,22 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { urEventReleaseInternal(AllResetEvent); } + if (CopyFinishedEvent) { + CleanupCompletedEvent(CopyFinishedEvent, false /*QueueLocked*/, + false /*SetEventCompleted*/); + urEventReleaseInternal(CopyFinishedEvent); + } + + if (ComputeFinishedEvent) { + CleanupCompletedEvent(ComputeFinishedEvent, false /*QueueLocked*/, + false /*SetEventCompleted*/); + urEventReleaseInternal(ComputeFinishedEvent); + } + + if (CurrentSubmissionEvent) { + urEventReleaseInternal(CurrentSubmissionEvent); + } + // Release events added to the command_buffer for (auto &Sync : SyncPoints) { auto &Event = Sync.second; @@ -501,78 +588,132 @@ bool canBeInOrder(ur_context_handle_t Context, : false; } +/** + * Append the initial barriers to the Compute and Copy command-lists. + * @param CommandBuffer The CommandBuffer + * @return UR_RESULT_SUCCESS or an error code on failure. + */ +ur_result_t appendExecutionWaits(ur_exp_command_buffer_handle_t CommandBuffer) { + + std::vector PrecondEvents; + if (CommandBuffer->ZeCommandListResetEvents) { + PrecondEvents.push_back(CommandBuffer->AllResetEvent->ZeEvent); + } + if (!CommandBuffer->UseImmediateAppendPath) { + PrecondEvents.push_back(CommandBuffer->WaitEvent->ZeEvent); + } + + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandBuffer->ZeComputeCommandList, nullptr, + PrecondEvents.size(), PrecondEvents.data())); + + if (CommandBuffer->ZeCopyCommandList) { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandBuffer->ZeCopyCommandList, nullptr, PrecondEvents.size(), + PrecondEvents.data())); + } + + return UR_RESULT_SUCCESS; +} + ur_result_t urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, const ur_exp_command_buffer_desc_t *CommandBufferDesc, ur_exp_command_buffer_handle_t *CommandBuffer) { - bool IsInOrder = canBeInOrder(Context, CommandBufferDesc); bool EnableProfiling = - CommandBufferDesc && CommandBufferDesc->enableProfiling; + CommandBufferDesc && CommandBufferDesc->enableProfiling && !IsInOrder; bool IsUpdatable = CommandBufferDesc && CommandBufferDesc->isUpdatable; + bool ImmediateAppendPath = checkImmediateAppendSupport(Context, Device); + const bool WaitEventPath = !ImmediateAppendPath; + bool UseCounterBasedEvents = checkCounterBasedEventsSupport(Device) && + IsInOrder && ImmediateAppendPath; if (IsUpdatable) { UR_ASSERT(Context->getPlatform()->ZeMutableCmdListExt.Supported, UR_RESULT_ERROR_UNSUPPORTED_FEATURE); } - ur_event_handle_t SignalEvent; - ur_event_handle_t WaitEvent; - ur_event_handle_t AllResetEvent; - - UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, - false /*HostVisible*/, &SignalEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); - UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, - false /*HostVisible*/, &WaitEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); - UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, - false /*HostVisible*/, &AllResetEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); - std::vector PrecondEvents = {WaitEvent->ZeEvent, - AllResetEvent->ZeEvent}; - ze_command_list_handle_t ZeComputeCommandList = nullptr; + ze_command_list_handle_t ZeCopyCommandList = nullptr; + ze_command_list_handle_t ZeCommandListResetEvents = nullptr; + ze_command_list_handle_t ZeComputeCommandListTranslated = nullptr; + UR_CALL(createMainCommandList(Context, Device, IsInOrder, IsUpdatable, false, ZeComputeCommandList)); - ZE2UR_CALL(zeCommandListAppendBarrier, - (ZeComputeCommandList, nullptr, PrecondEvents.size(), - PrecondEvents.data())); - - ze_command_list_handle_t ZeCommandListResetEvents = nullptr; - UR_CALL(createMainCommandList(Context, Device, false, false, false, - ZeCommandListResetEvents)); - ZE2UR_CALL(zeCommandListAppendEventReset, - (ZeCommandListResetEvents, SignalEvent->ZeEvent)); // Create a list for copy commands. Note that to simplify the implementation, // the current implementation only uses the main copy engine and does not use // the link engine even if available. - ze_command_list_handle_t ZeCopyCommandList = nullptr; if (Device->hasMainCopyEngine()) { UR_CALL(createMainCommandList(Context, Device, false, false, true, ZeCopyCommandList)); - ZE2UR_CALL(zeCommandListAppendBarrier, - (ZeCopyCommandList, nullptr, PrecondEvents.size(), - PrecondEvents.data())); } - ze_command_list_handle_t ZeComputeCommandListTranslated = nullptr; ZE2UR_CALL(zelLoaderTranslateHandle, (ZEL_HANDLE_COMMAND_LIST, ZeComputeCommandList, (void **)&ZeComputeCommandListTranslated)); + // The CopyFinishedEvent and ComputeFinishedEvent are needed only when using + // the ImmediateAppend Path. + ur_event_handle_t CopyFinishedEvent = nullptr; + ur_event_handle_t ComputeFinishedEvent = nullptr; + if (ImmediateAppendPath) { + if (Device->hasMainCopyEngine()) { + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false, false, + &CopyFinishedEvent, UseCounterBasedEvents, + !EnableProfiling)); + } + + if (EnableProfiling) { + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &ComputeFinishedEvent, + UseCounterBasedEvents, !EnableProfiling)); + } + } + + // The WaitEvent is needed only when using WaitEvent Path. + ur_event_handle_t WaitEvent = nullptr; + if (WaitEventPath) { + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &WaitEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); + } + + // Create ZeCommandListResetEvents only if counter-based events are not being + // used. Using counter-based events means that there is no need to reset any + // events between executions. Counter-based events can only be enabled on the + // ImmediateAppend Path. + ur_event_handle_t AllResetEvent = nullptr; + ur_event_handle_t ExecutionFinishedEvent = nullptr; + if (!UseCounterBasedEvents) { + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &AllResetEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); + + UR_CALL(createMainCommandList(Context, Device, false, false, false, + ZeCommandListResetEvents)); + + // The ExecutionFinishedEvent is only waited on by ZeCommandListResetEvents. + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &ExecutionFinishedEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); + } + try { *CommandBuffer = new ur_exp_command_buffer_handle_t_( Context, Device, ZeComputeCommandList, ZeComputeCommandListTranslated, - ZeCommandListResetEvents, ZeCopyCommandList, SignalEvent, WaitEvent, - AllResetEvent, CommandBufferDesc, IsInOrder); + ZeCommandListResetEvents, ZeCopyCommandList, ExecutionFinishedEvent, + WaitEvent, AllResetEvent, CopyFinishedEvent, ComputeFinishedEvent, + CommandBufferDesc, IsInOrder, ImmediateAppendPath); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { return UR_RESULT_ERROR_UNKNOWN; } + UR_CALL(appendExecutionWaits(*CommandBuffer)); + return UR_RESULT_SUCCESS; } @@ -592,16 +733,84 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } +/* Finalizes the command-buffer so that it can later be enqueued using + * enqueueImmediateAppendPath() which uses the + * zeCommandListImmediateAppendCommandListsExp API. */ ur_result_t -urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { - UR_ASSERT(CommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); - // It is not allowed to append to command list from multiple threads. - std::scoped_lock Guard(CommandBuffer->Mutex); +finalizeImmediateAppendPath(ur_exp_command_buffer_handle_t CommandBuffer) { + + // Wait for the Copy Queue to finish at the end of the compute command list. + if (!CommandBuffer->MCopyCommandListEmpty) { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandBuffer->ZeCopyCommandList, + CommandBuffer->CopyFinishedEvent->ZeEvent, 0, nullptr)); + + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandBuffer->ZeComputeCommandList, nullptr, 1, + &CommandBuffer->CopyFinishedEvent->ZeEvent)); + } + + if (CommandBuffer->ZeCommandListResetEvents) { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandBuffer->ZeCommandListResetEvents, nullptr, 1, + &CommandBuffer->ExecutionFinishedEvent->ZeEvent)); + + // Reset the L0 events we use for command-buffer sync-points to the + // non-signaled state. This is required for multiple submissions. + for (auto &Event : CommandBuffer->ZeEventsList) { + ZE2UR_CALL(zeCommandListAppendEventReset, + (CommandBuffer->ZeCommandListResetEvents, Event)); + } + + if (!CommandBuffer->MCopyCommandListEmpty) { + ZE2UR_CALL(zeCommandListAppendEventReset, + (CommandBuffer->ZeCommandListResetEvents, + CommandBuffer->CopyFinishedEvent->ZeEvent)); + } + + // Only the profiling command-list has a wait on the ExecutionFinishedEvent + if (CommandBuffer->IsProfilingEnabled) { + ZE2UR_CALL(zeCommandListAppendEventReset, + (CommandBuffer->ZeCommandListResetEvents, + CommandBuffer->ComputeFinishedEvent->ZeEvent)); + } + + ZE2UR_CALL(zeCommandListAppendEventReset, + (CommandBuffer->ZeCommandListResetEvents, + CommandBuffer->ExecutionFinishedEvent->ZeEvent)); + + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandBuffer->ZeCommandListResetEvents, + CommandBuffer->AllResetEvent->ZeEvent, 0, nullptr)); + + // Reset the all-reset-event for the UR command-buffer that is signaled + // when all events of the main command-list have been reset. + ZE2UR_CALL(zeCommandListAppendEventReset, + (CommandBuffer->ZeComputeCommandList, + CommandBuffer->AllResetEvent->ZeEvent)); + + // All the events are reset by default. So signal the all reset event for + // the first run of the command buffer + ZE2UR_CALL(zeEventHostSignal, (CommandBuffer->AllResetEvent->ZeEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/* Finalizes the command-buffer so that it can later be enqueued using + * enqueueWaitEventPath() which uses the zeCommandQueueExecuteCommandLists API. + */ +ur_result_t +finalizeWaitEventPath(ur_exp_command_buffer_handle_t CommandBuffer) { + + ZE2UR_CALL(zeCommandListAppendEventReset, + (CommandBuffer->ZeCommandListResetEvents, + CommandBuffer->ExecutionFinishedEvent->ZeEvent)); if (CommandBuffer->IsInOrderCmdList) { ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandBuffer->ZeComputeCommandList, - CommandBuffer->SignalEvent->ZeEvent)); + CommandBuffer->ExecutionFinishedEvent->ZeEvent)); } else { // Reset the L0 events we use for command-buffer sync-points to the // non-signaled state. This is required for multiple submissions. @@ -614,7 +823,7 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { // command-buffer signal-event when they are done. ZE2UR_CALL(zeCommandListAppendBarrier, (CommandBuffer->ZeComputeCommandList, - CommandBuffer->SignalEvent->ZeEvent, + CommandBuffer->ExecutionFinishedEvent->ZeEvent, CommandBuffer->ZeEventsList.size(), CommandBuffer->ZeEventsList.data())); } @@ -623,9 +832,28 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { (CommandBuffer->ZeCommandListResetEvents, CommandBuffer->AllResetEvent->ZeEvent)); + return UR_RESULT_SUCCESS; +} + +ur_result_t +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { + UR_ASSERT(CommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + // It is not allowed to append to command list from multiple threads. + std::scoped_lock Guard(CommandBuffer->Mutex); + + if (CommandBuffer->UseImmediateAppendPath) { + UR_CALL(finalizeImmediateAppendPath(CommandBuffer)); + } else { + UR_CALL(finalizeWaitEventPath(CommandBuffer)); + } + // Close the command lists and have them ready for dispatch. ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeComputeCommandList)); - ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeCommandListResetEvents)); + + if (CommandBuffer->ZeCommandListResetEvents) { + ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeCommandListResetEvents)); + } if (CommandBuffer->useCopyEngine()) { ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeCopyCommandList)); @@ -1218,67 +1446,129 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, } /** - * Creates a host visible event and appends a barrier to signal it when the - * command buffer finishes executing. - * @param[in] CommandBuffer The command buffer. - * @param[in] Queue The UR queue used to submit the command buffer. - * @param[in] SignalCommandList The command-list to append the barrier to. - * @param[out][optional] Event The host visible event which will be returned - * to the user, if user passed an output parameter to the UR API. - * @return UR_RESULT_SUCCESS or an error code on failure + * Appends a QueryKernelTimestamps command that does profiling for all the + * sync-point events. + * @param CommandBuffer The command-buffer that is being enqueued. + * @param CommandList The command-list to append the QueryKernelTimestamps + * command to. + * @param SignalEvent The event that must be signaled after the profiling is + * finished. This event will contain the profiling information. + * @param WaitEvent The event that must be waited on before starting the + * profiling. + * @return UR_RESULT_SUCCESS or an error code on failure. */ -ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer, - ur_queue_handle_t Queue, - ur_command_list_ptr_t SignalCommandList, - ur_event_handle_t *Event) { - // Execution event for this enqueue of the UR command-buffer - ur_event_handle_t RetEvent{}; - - UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent, - UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, - SignalCommandList, false, false, true)); - - if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) && - (!CommandBuffer->IsInOrderCmdList) && - (CommandBuffer->IsProfilingEnabled)) { - // Multiple submissions of a command buffer implies that we need to save - // the event timestamps before resubmiting the command buffer. We - // therefore copy the these timestamps in a dedicated USM memory section - // before completing the command buffer execution, and then attach this - // memory to the event returned to users to allow to allow the profiling - // engine to recover these timestamps. - command_buffer_profiling_t *Profiling = new command_buffer_profiling_t(); - - Profiling->NumEvents = CommandBuffer->ZeEventsList.size(); - Profiling->Timestamps = - new ze_kernel_timestamp_result_t[Profiling->NumEvents]; - - ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps, - (SignalCommandList->first, CommandBuffer->ZeEventsList.size(), - CommandBuffer->ZeEventsList.data(), - (void *)Profiling->Timestamps, 0, RetEvent->ZeEvent, 1, - &(CommandBuffer->SignalEvent->ZeEvent))); - - RetEvent->CommandData = static_cast(Profiling); - } else { +ur_result_t appendProfilingQueries(ur_exp_command_buffer_handle_t CommandBuffer, + ze_command_list_handle_t CommandList, + ur_event_handle_t SignalEvent, + ur_event_handle_t WaitEvent) { + // Multiple submissions of a command buffer implies that we need to save + // the event timestamps before resubmiting the command buffer. We + // therefore copy these timestamps in a dedicated USM memory section + // before completing the command buffer execution, and then attach this + // memory to the event returned to users to allow the profiling + // engine to recover these timestamps. + command_buffer_profiling_t *Profiling = new command_buffer_profiling_t(); + + Profiling->NumEvents = CommandBuffer->ZeEventsList.size(); + Profiling->Timestamps = + new ze_kernel_timestamp_result_t[Profiling->NumEvents]; + + ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps, + (CommandList, CommandBuffer->ZeEventsList.size(), + CommandBuffer->ZeEventsList.data(), (void *)Profiling->Timestamps, + 0, SignalEvent->ZeEvent, 1, &(WaitEvent->ZeEvent))); + + SignalEvent->CommandData = static_cast(Profiling); + + return UR_RESULT_SUCCESS; +} + +/* Enqueues the command-buffer using the + * zeCommandListImmediateAppendCommandListsExp API. */ +ur_result_t enqueueImmediateAppendPath( + ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *Event, ur_command_list_ptr_t CommandListHelper, + bool DoProfiling) { + + assert(CommandListHelper->second.IsImmediate); + + _ur_ze_event_list_t UrZeEventList; + if (NumEventsInWaitList) { + UR_CALL(UrZeEventList.createAndRetainUrZeEventList( + NumEventsInWaitList, EventWaitList, Queue, false)); + } + (*Event)->WaitList = UrZeEventList; + const auto &WaitList = (*Event)->WaitList; + + if (!CommandBuffer->MCopyCommandListEmpty) { + ur_command_list_ptr_t ZeCopyEngineImmediateListHelper{}; + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, ZeCopyEngineImmediateListHelper, true /*UseCopyEngine*/, + NumEventsInWaitList, EventWaitList, false /*AllowBatching*/, + nullptr /*ForcedCmdQueue*/)); + assert(ZeCopyEngineImmediateListHelper->second.IsImmediate); + + ZE2UR_CALL(zeCommandListImmediateAppendCommandListsExp, + (ZeCopyEngineImmediateListHelper->first, 1, + &CommandBuffer->ZeCopyCommandList, nullptr, + UrZeEventList.Length, UrZeEventList.ZeEventList)); + + UR_CALL(Queue->executeCommandList(ZeCopyEngineImmediateListHelper, false, + false)); + } + + ze_event_handle_t &EventToSignal = + DoProfiling ? CommandBuffer->ComputeFinishedEvent->ZeEvent + : (*Event)->ZeEvent; + ZE2UR_CALL(zeCommandListImmediateAppendCommandListsExp, + (CommandListHelper->first, 1, &CommandBuffer->ZeComputeCommandList, + EventToSignal, WaitList.Length, WaitList.ZeEventList)); + + if (DoProfiling) { + UR_CALL(appendProfilingQueries(CommandBuffer, CommandListHelper->first, + *Event, + CommandBuffer->ComputeFinishedEvent)); + } + + // When the current execution is finished, signal ExecutionFinishedEvent to + // reset all the events and prepare for the next execution. + if (CommandBuffer->ZeCommandListResetEvents) { ZE2UR_CALL(zeCommandListAppendBarrier, - (SignalCommandList->first, RetEvent->ZeEvent, 1, - &(CommandBuffer->SignalEvent->ZeEvent))); + (CommandListHelper->first, + CommandBuffer->ExecutionFinishedEvent->ZeEvent, 0, nullptr)); + + ZE2UR_CALL(zeCommandListImmediateAppendCommandListsExp, + (CommandListHelper->first, 1, + &CommandBuffer->ZeCommandListResetEvents, nullptr, 0, nullptr)); } - if (Event) { - *Event = RetEvent; + /* The event needs to be retained since it will be used later by the + command-buffer. If not retained, it might be released when + ZeImmediateListHelper is reset. If there is an existing event from a + previous submission of the command-buffer, release it since it is no longer + needed. */ + if (CommandBuffer->CurrentSubmissionEvent) { + UR_CALL(urEventReleaseInternal(CommandBuffer->CurrentSubmissionEvent)); } + (*Event)->RefCount.increment(); + CommandBuffer->CurrentSubmissionEvent = *Event; + + UR_CALL(Queue->executeCommandList(CommandListHelper, false, false)); return UR_RESULT_SUCCESS; } -ur_result_t -urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, - ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *Event) { - std::scoped_lock Lock(Queue->Mutex); +/* Enqueue the command-buffer using zeCommandQueueExecuteCommandLists. + * Also uses separate command-lists to wait for the dependencies and to + * signal the execution finished event. */ +ur_result_t enqueueWaitEventPath(ur_exp_command_buffer_handle_t CommandBuffer, + ur_queue_handle_t Queue, + uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList, + ur_event_handle_t *Event, + ur_command_list_ptr_t SignalCommandList, + bool DoProfiling) { ze_command_queue_handle_t ZeCommandQueue; getZeCommandQueue(Queue, false, ZeCommandQueue); @@ -1291,16 +1581,16 @@ urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, // Submit reset events command-list. This command-list is of a batch // command-list type, regardless of the UR Queue type. We therefore need to - // submit the list directly using the Level-Zero API to avoid type mismatches - // if using UR functions. + // submit the list directly using the Level-Zero API to avoid type + // mismatches if using UR functions. ZE2UR_CALL( zeCommandQueueExecuteCommandLists, (ZeCommandQueue, 1, &CommandBuffer->ZeCommandListResetEvents, nullptr)); // Submit main command-list. This command-list is of a batch command-list - // type, regardless of the UR Queue type. We therefore need to submit the list - // directly using the Level-Zero API to avoid type mismatches if using UR - // functions. + // type, regardless of the UR Queue type. We therefore need to submit the + // list directly using the Level-Zero API to avoid type mismatches if using + // UR functions. ZE2UR_CALL( zeCommandQueueExecuteCommandLists, (ZeCommandQueue, 1, &CommandBuffer->ZeComputeCommandList, ZeFence)); @@ -1315,12 +1605,6 @@ urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, (ZeCopyCommandQueue, 1, &CommandBuffer->ZeCopyCommandList, nullptr)); } - // Create a command-list to signal the Event on completion - ur_command_list_ptr_t SignalCommandList{}; - UR_CALL(Queue->Context->getAvailableCommandList( - Queue, SignalCommandList, false /*UseCopyEngine*/, NumEventsInWaitList, - EventWaitList, false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); - // Reset the wait-event for the UR command-buffer that is signaled when its // submission dependencies have been satisfied. ZE2UR_CALL(zeCommandListAppendEventReset, @@ -1330,9 +1614,15 @@ urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, ZE2UR_CALL(zeCommandListAppendEventReset, (SignalCommandList->first, CommandBuffer->AllResetEvent->ZeEvent)); - // Appends a wait on the main command-list signal and registers output Event - // parameter with signal command-list completing. - UR_CALL(createUserEvent(CommandBuffer, Queue, SignalCommandList, Event)); + if (DoProfiling) { + UR_CALL(appendProfilingQueries(CommandBuffer, SignalCommandList->first, + *Event, + CommandBuffer->ExecutionFinishedEvent)); + } else { + ZE2UR_CALL(zeCommandListAppendBarrier, + (SignalCommandList->first, (*Event)->ZeEvent, 1, + &(CommandBuffer->ExecutionFinishedEvent->ZeEvent))); + } UR_CALL(Queue->executeCommandList(SignalCommandList, false /*IsBlocking*/, false /*OKToBatchCommand*/)); @@ -1340,6 +1630,44 @@ urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } +ur_result_t urCommandBufferEnqueueExp( + ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t UrQueue, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *Event) { + + std::scoped_lock Lock(UrQueue->Mutex); + + const bool IsInternal = (Event == nullptr); + const bool DoProfiling = + (UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) && + (!CommandBuffer->IsInOrderCmdList) && + (CommandBuffer->IsProfilingEnabled) && Event; + ur_event_handle_t InternalEvent; + ur_event_handle_t *OutEvent = Event ? Event : &InternalEvent; + + ur_command_list_ptr_t ZeCommandListHelper{}; + UR_CALL(UrQueue->Context->getAvailableCommandList( + UrQueue, ZeCommandListHelper, false /*UseCopyEngine*/, + NumEventsInWaitList, EventWaitList, false /*AllowBatching*/, + nullptr /*ForcedCmdQueue*/)); + + UR_CALL(createEventAndAssociateQueue( + UrQueue, OutEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, + ZeCommandListHelper, IsInternal, false, std::nullopt)); + + if (CommandBuffer->UseImmediateAppendPath) { + UR_CALL(enqueueImmediateAppendPath( + CommandBuffer, UrQueue, NumEventsInWaitList, EventWaitList, OutEvent, + ZeCommandListHelper, DoProfiling)); + } else { + UR_CALL(enqueueWaitEventPath(CommandBuffer, UrQueue, NumEventsInWaitList, + EventWaitList, OutEvent, ZeCommandListHelper, + DoProfiling)); + } + + return UR_RESULT_SUCCESS; +} + ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t Command) { Command->RefCount.increment(); @@ -1372,7 +1700,7 @@ ur_result_t validateCommandDesc( logger::debug("Mutable features supported by device {}", SupportedFeatures); // Kernel handle updates are not yet supported. - if (CommandDesc->hNewKernel != Command->Kernel) { + if (CommandDesc->hNewKernel && CommandDesc->hNewKernel != Command->Kernel) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -1653,6 +1981,29 @@ ur_result_t updateKernelCommand( return UR_RESULT_SUCCESS; } +/** + * Waits for any ongoing executions of the command-buffer to finish before + * updating. + * @param CommandBuffer The command-buffer to wait for. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) { + if (CommandBuffer->UseImmediateAppendPath) { + if (ur_event_handle_t &CurrentSubmissionEvent = + CommandBuffer->CurrentSubmissionEvent) { + ZE2UR_CALL(zeEventHostSynchronize, + (CurrentSubmissionEvent->ZeEvent, UINT64_MAX)); + UR_CALL(urEventReleaseInternal(CurrentSubmissionEvent)); + CurrentSubmissionEvent = nullptr; + } + } else if (ze_fence_handle_t &ZeFence = CommandBuffer->ZeActiveFence) { + ZE2UR_CALL(zeFenceHostSynchronize, (ZeFence, UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; +} + ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t Command, const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) { @@ -1668,12 +2019,7 @@ ur_result_t urCommandBufferUpdateKernelLaunchExp( UR_RESULT_ERROR_INVALID_OPERATION); UR_CALL(validateCommandDesc(Command, CommandDesc)); - - // We must synchronize mutable command list execution before mutating. - if (ze_fence_handle_t &ZeFence = Command->CommandBuffer->ZeActiveFence) { - ZE2UR_CALL(zeFenceHostSynchronize, (ZeFence, UINT64_MAX)); - } - + UR_CALL(waitForOngoingExecution(Command->CommandBuffer)); UR_CALL(updateKernelCommand(Command, CommandDesc)); ZE2UR_CALL(zeCommandListClose, diff --git a/source/adapters/level_zero/command_buffer.hpp b/source/adapters/level_zero/command_buffer.hpp index c86c6f5ba7..156e0e5c24 100644 --- a/source/adapters/level_zero/command_buffer.hpp +++ b/source/adapters/level_zero/command_buffer.hpp @@ -31,9 +31,12 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { ze_command_list_handle_t CommandList, ze_command_list_handle_t CommandListTranslated, ze_command_list_handle_t CommandListResetEvents, - ze_command_list_handle_t CopyCommandList, ur_event_handle_t SignalEvent, - ur_event_handle_t WaitEvent, ur_event_handle_t AllResetEvent, - const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList); + ze_command_list_handle_t CopyCommandList, + ur_event_handle_t ExecutionFinishedEvent, ur_event_handle_t WaitEvent, + ur_event_handle_t AllResetEvent, ur_event_handle_t CopyFinishedEvent, + ur_event_handle_t ComputeFinishedEvent, + const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList, + const bool UseImmediateAppendPath); void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, ur_event_handle_t Event); @@ -72,33 +75,48 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { ur_context_handle_t Context; // Device associated with this command buffer ur_device_handle_t Device; - // Level Zero command list handle + // Level Zero command list handle that has the compute engine commands for + // this command-buffer. ze_command_list_handle_t ZeComputeCommandList; // Given a multi driver scenario, the driver handle must be translated to the // internal driver handle to allow calls to driver experimental apis. ze_command_list_handle_t ZeComputeCommandListTranslated; - // Level Zero command list handle + // Level Zero command list handle that is responsible for resetting + // the events after the compute and copy command-lists execute. ze_command_list_handle_t ZeCommandListResetEvents; - // Level Zero Copy command list handle + // Level Zero command list handle that has the copy engine commands for this + // command-buffer. ze_command_list_handle_t ZeCopyCommandList; // Event which will signals the most recent execution of the command-buffer - // has finished - ur_event_handle_t SignalEvent = nullptr; - // Event which a command-buffer waits on until the wait-list dependencies - // passed to a command-buffer enqueue have been satisfied. + // has finished. + ur_event_handle_t ExecutionFinishedEvent = nullptr; + // [WaitEvent Path Only] Event which a command-buffer waits on until the + // wait-list dependencies passed to a command-buffer enqueue have been + // satisfied. ur_event_handle_t WaitEvent = nullptr; - // Event which a command-buffer waits on until the main command-list event + // Event which a command-buffer waits on until the main command-list events // have been reset. ur_event_handle_t AllResetEvent = nullptr; - // This flag is must be set to false if at least one copy command has been + // [ImmediateAppend Path Only] Event that is signalled after the copy engine + // command-list finishes executing. + ur_event_handle_t CopyFinishedEvent = nullptr; + // [ImmediateAppend Path Only] Event that is signalled after the compute + // engine command-list finishes executing. + ur_event_handle_t ComputeFinishedEvent = nullptr; + // [ImmediateAppend Path Only] Event that is signaled after the current + // submission of this command-buffer finishes executing (i.e. after + // ZeComputeCommandList finishes executing). + ur_event_handle_t CurrentSubmissionEvent = nullptr; + // This flag must be set to false if at least one copy command has been // added to `ZeCopyCommandList` bool MCopyCommandListEmpty = true; - // Level Zero fences for each queue the command-buffer has been enqueued to. - // These should be destroyed when the command-buffer is released. + // [WaitEvent Path only] Level Zero fences for each queue the command-buffer + // has been enqueued to. These should be destroyed when the command-buffer is + // released. std::unordered_map ZeFencesMap; - // The Level Zero fence from the most recent enqueue of the command-buffer. - // Must be an element in ZeFencesMap, so is not required to be destroyed - // itself. + // [WaitEvent Path only] The Level Zero fence from the most recent enqueue of + // the command-buffer. Must be an element in ZeFencesMap, so is not required + // to be destroyed itself. ze_fence_handle_t ZeActiveFence; // Map of sync_points to ur_events std::unordered_map @@ -117,6 +135,9 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { bool IsProfilingEnabled = false; // Command-buffer can be submitted to an in-order command-list. bool IsInOrderCmdList = false; + // Whether this command-buffer should use the code path that uses + // zeCommandListImmediateAppendCommandListsExp during enqueue. + bool UseImmediateAppendPath = false; // This list is needed to release all kernels retained by the // command_buffer. std::vector KernelsList; diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 41c7593237..7c1c412ee4 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -126,13 +126,16 @@ ur_result_t urContextGetInfo( UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; return ReturnValue(Capabilities); } + case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } default: // TODO: implement other parameters - die("urGetContextInfo: unsuppported ParamName."); + return UR_RESULT_ERROR_INVALID_ENUMERATION; } - - return UR_RESULT_SUCCESS; } ur_result_t urContextGetNativeHandle( diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 94dad86070..865edebc08 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1151,6 +1151,8 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_USM_POOL_SUPPORT: return ReturnValue(true); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(false); default: logger::error("Unsupported ParamName in urGetDeviceInfo"); logger::error("ParamNameParamName={}(0x{})", ParamName, diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 408580dd80..96da4be0fd 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -419,6 +419,25 @@ ur_result_t urEnqueueEventsWaitWithBarrier( return UR_RESULT_SUCCESS; } +ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t + *, ///< [in][optional] pointer to the extended enqueue properties + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating that + ///< all previously enqueued commands must be complete. + ur_event_handle_t + *OutEvent ///< [in,out][optional] return an event object that identifies + ///< this particular command instance. +) { + return ur::level_zero::urEnqueueEventsWaitWithBarrier( + Queue, NumEventsInWaitList, EventWaitList, OutEvent); +} + ur_result_t urEventGetInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_event_info_t PropName, ///< [in] the name of the event property to query @@ -530,7 +549,8 @@ ur_result_t urEventGetProfilingInfo( // For timestamped events we have the timestamps ready directly on the event // handle, so we short-circuit the return. - if (isTimestampedEvent) { + // We don't support user events with timestamps due to requiring the UrQueue. + if (isTimestampedEvent && Event->UrQueue) { uint64_t ContextStartTime = Event->RecordEventStartTimestamp; switch (PropName) { case UR_PROFILING_INFO_COMMAND_QUEUED: diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 706a3f1364..5283ea4da3 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -1746,7 +1746,7 @@ ur_result_t urMemBufferCreateWithNativeHandle( ur_mem_handle_t *Mem ///< [out] pointer to handle of buffer memory object created. ) { - bool OwnNativeHandle = Properties->isNativeHandleOwned; + bool OwnNativeHandle = Properties ? Properties->isNativeHandleOwned : false; std::shared_lock Lock(Context->Mutex); @@ -1844,9 +1844,6 @@ ur_result_t urMemGetInfo( size_t *PropSizeRet ///< [out][optional] pointer to the actual size in ///< bytes of data queried by pMemInfo. ) { - UR_ASSERT(MemInfoType == UR_MEM_INFO_CONTEXT || !Memory->isImage(), - UR_RESULT_ERROR_INVALID_VALUE); - auto Buffer = reinterpret_cast<_ur_buffer *>(Memory); std::shared_lock Lock(Buffer->Mutex); UrReturnHelper ReturnValue(PropSize, MemInfo, PropSizeRet); @@ -1859,8 +1856,11 @@ ur_result_t urMemGetInfo( // Get size of the allocation return ReturnValue(size_t{Buffer->Size}); } + case UR_MEM_INFO_REFERENCE_COUNT: { + return ReturnValue(Buffer->RefCount.load()); + } default: { - die("urMemGetInfo: Parameter is not implemented"); + return UR_RESULT_ERROR_INVALID_ENUMERATION; } } @@ -2240,7 +2240,7 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, nullptr, 0u, nullptr)); } if (waitlist.ZeEventList) { - delete waitlist.ZeEventList; + delete[] waitlist.ZeEventList; } } Allocation.Valid = true; diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 721db3c359..520b52a1c8 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -196,6 +196,11 @@ ur_result_t urPlatformGetBackendOption( *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; return UR_RESULT_SUCCESS; } + if (FrontendOption == "-foffload-fp32-prec-div"sv || + FrontendOption == "-foffload-fp32-prec-sqrt"sv) { + *PlatformOption = "-ze-fp32-correctly-rounded-divide-sqrt"; + return UR_RESULT_SUCCESS; + } return UR_RESULT_ERROR_INVALID_VALUE; } @@ -215,6 +220,7 @@ ur_result_t ur_platform_handle_t_::initialize() { ZE2UR_CALL(zeDriverGetExtensionProperties, (ZeDriver, &Count, ZeExtensions.data())); + bool MutableCommandListSpecExtensionSupported = false; for (auto &extension : ZeExtensions) { // Check if global offset extension is available if (strncmp(extension.name, ZE_GLOBAL_OFFSET_EXP_NAME, @@ -239,6 +245,13 @@ ur_result_t ur_platform_handle_t_::initialize() { ZeDriverEventPoolCountingEventsExtensionFound = true; } } + // Check if extension is available for Mutable Command List v1.1. + if (strncmp(extension.name, ZE_MUTABLE_COMMAND_LIST_EXP_NAME, + strlen(ZE_MUTABLE_COMMAND_LIST_EXP_NAME) + 1) == 0) { + if (extension.version == ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_1) { + MutableCommandListSpecExtensionSupported = true; + } + } zeDriverExtensionMap[extension.name] = extension.version; } @@ -275,37 +288,72 @@ ur_result_t ur_platform_handle_t_::initialize() { // Check if mutable command list extension is supported and initialize // function pointers. - ZeMutableCmdListExt.Supported |= - (ZE_CALL_NOCHECK( - zeDriverGetExtensionFunctionAddress, - (ZeDriver, "zeCommandListGetNextCommandIdExp", - reinterpret_cast( - &ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp))) == 0); - - ZeMutableCmdListExt.Supported &= - (ZE_CALL_NOCHECK(zeDriverGetExtensionFunctionAddress, - (ZeDriver, "zeCommandListUpdateMutableCommandsExp", - reinterpret_cast( - &ZeMutableCmdListExt - .zexCommandListUpdateMutableCommandsExp))) == - 0); - - ZeMutableCmdListExt.Supported &= - (ZE_CALL_NOCHECK( - zeDriverGetExtensionFunctionAddress, - (ZeDriver, "zeCommandListUpdateMutableCommandSignalEventExp", - reinterpret_cast( - &ZeMutableCmdListExt - .zexCommandListUpdateMutableCommandSignalEventExp))) == 0); - - ZeMutableCmdListExt.Supported &= - (ZE_CALL_NOCHECK( - zeDriverGetExtensionFunctionAddress, - (ZeDriver, "zeCommandListUpdateMutableCommandWaitEventsExp", - reinterpret_cast( - &ZeMutableCmdListExt - .zexCommandListUpdateMutableCommandWaitEventsExp))) == 0); - + if (MutableCommandListSpecExtensionSupported) { + ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp = + (ze_pfnCommandListGetNextCommandIdExp_t) + ur_loader::LibLoader::getFunctionPtr( + GlobalAdapter->processHandle, + "zeCommandListGetNextCommandIdExp"); + ZeMutableCmdListExt.Supported |= + ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp != nullptr; + ZeMutableCmdListExt.zexCommandListUpdateMutableCommandsExp = + (ze_pfnCommandListUpdateMutableCommandsExp_t) + ur_loader::LibLoader::getFunctionPtr( + GlobalAdapter->processHandle, + "zeCommandListUpdateMutableCommandsExp"); + ZeMutableCmdListExt.Supported |= + ZeMutableCmdListExt.zexCommandListUpdateMutableCommandsExp != nullptr; + ZeMutableCmdListExt.zexCommandListUpdateMutableCommandSignalEventExp = + (ze_pfnCommandListUpdateMutableCommandSignalEventExp_t) + ur_loader::LibLoader::getFunctionPtr( + GlobalAdapter->processHandle, + "zeCommandListUpdateMutableCommandSignalEventExp"); + ZeMutableCmdListExt.Supported |= + ZeMutableCmdListExt.zexCommandListUpdateMutableCommandSignalEventExp != + nullptr; + ZeMutableCmdListExt.zexCommandListUpdateMutableCommandWaitEventsExp = + (ze_pfnCommandListUpdateMutableCommandWaitEventsExp_t) + ur_loader::LibLoader::getFunctionPtr( + GlobalAdapter->processHandle, + "zeCommandListUpdateMutableCommandWaitEventsExp"); + ZeMutableCmdListExt.Supported |= + ZeMutableCmdListExt.zexCommandListUpdateMutableCommandWaitEventsExp != + nullptr; + } else { + ZeMutableCmdListExt.Supported |= + (ZE_CALL_NOCHECK( + zeDriverGetExtensionFunctionAddress, + (ZeDriver, "zeCommandListGetNextCommandIdExp", + reinterpret_cast( + &ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp))) == + 0); + + ZeMutableCmdListExt.Supported &= + (ZE_CALL_NOCHECK(zeDriverGetExtensionFunctionAddress, + (ZeDriver, "zeCommandListUpdateMutableCommandsExp", + reinterpret_cast( + &ZeMutableCmdListExt + .zexCommandListUpdateMutableCommandsExp))) == + 0); + + ZeMutableCmdListExt.Supported &= + (ZE_CALL_NOCHECK( + zeDriverGetExtensionFunctionAddress, + (ZeDriver, "zeCommandListUpdateMutableCommandSignalEventExp", + reinterpret_cast( + &ZeMutableCmdListExt + .zexCommandListUpdateMutableCommandSignalEventExp))) == + 0); + + ZeMutableCmdListExt.Supported &= + (ZE_CALL_NOCHECK( + zeDriverGetExtensionFunctionAddress, + (ZeDriver, "zeCommandListUpdateMutableCommandWaitEventsExp", + reinterpret_cast( + &ZeMutableCmdListExt + .zexCommandListUpdateMutableCommandWaitEventsExp))) == + 0); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index b53b55bb23..413bb5c48a 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -12,6 +12,7 @@ #include "common.hpp" #include "ur_api.h" #include "ze_api.h" +#include "ze_ddi.h" #include "zes_api.h" struct ur_device_handle_t_; @@ -59,6 +60,7 @@ struct ur_platform_handle_t_ : public _ur_platform { bool ZeDriverGlobalOffsetExtensionFound{false}; bool ZeDriverModuleProgramExtensionFound{false}; bool ZeDriverEventPoolCountingEventsExtensionFound{false}; + bool zeDriverImmediateCommandListAppendFound{false}; // Cache UR devices for reuse std::vector> URDevicesCache; diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index d7adc5eb37..b5a64c3eda 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -618,6 +618,9 @@ ur_result_t urProgramGetGlobalVariablePointer( ///< variable if it is found in the program. ) { std::scoped_lock lock(Program->Mutex); + if (Program->getState(Device->ZeDevice) != ur_program_handle_t_::Exe) { + return UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE; + } ze_module_handle_t ZeModuleEntry{}; ZeModuleEntry = Program->getZeModuleHandle(Device->ZeDevice); @@ -668,17 +671,16 @@ ur_result_t urProgramGetInfo( binarySizes.push_back(Program->getCodeSize(Device->ZeDevice)); continue; } - auto ZeModule = Program->getZeModuleHandle(Device->ZeDevice); - if (!ZeModule) - return UR_RESULT_ERROR_INVALID_PROGRAM; - if (State == ur_program_handle_t_::IL || State == ur_program_handle_t_::Object) { - // We don't have a binary for this device, so return size of the spirv - // code. This is an array of 1 element, initialized as if it were - // scalar. - return ReturnValue(size_t{Program->getCodeSize()}); + // We don't have a binary for this device, so return 0. + binarySizes.push_back(0); + continue; } else if (State == ur_program_handle_t_::Exe) { + auto ZeModule = Program->getZeModuleHandle(Device->ZeDevice); + if (!ZeModule) + return UR_RESULT_ERROR_INVALID_PROGRAM; + size_t binarySize = 0; ZE2UR_CALL(zeModuleGetNativeBinary, (ZeModule, &binarySize, nullptr)); binarySizes.push_back(binarySize); @@ -718,27 +720,17 @@ ur_result_t urProgramGetInfo( SzBinary += Program->getCodeSize(ZeDevice); continue; } - auto ZeModule = Program->getZeModuleHandle(ZeDevice); - if (!ZeModule) { - return UR_RESULT_ERROR_INVALID_PROGRAM; - } - // If the caller is using a Program which is IL or an object, then - // the program has not been built for multiple devices so a single IL is - // returned. - // TODO: currently if program is not compiled for any of the associated - // devices, we just return spirv code, assuming that we either have the - // program built for all associated devices or for none. It is possible - // that program is compiled for subset of associated devices, so that case - // probably should be explicitely specified and handled better. if (State == ur_program_handle_t_::IL || State == ur_program_handle_t_::Object) { + // We don't have a binary for this device, so don't update the output + // pointer to the binary, only set return size to 0. if (PropSizeRet) - *PropSizeRet = Program->getCodeSize(); - if (PBinary) { - std::memcpy(PBinary[0], Program->getCode(), Program->getCodeSize()); - } - break; + *PropSizeRet = 0; } else if (State == ur_program_handle_t_::Exe) { + auto ZeModule = Program->getZeModuleHandle(ZeDevice); + if (!ZeModule) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } size_t binarySize = 0; if (PBinary) { NativeBinaryPtr = PBinary[deviceIndex]; diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 1c2f68c07c..0a36b3ecad 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -200,6 +200,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( ur::level_zero::urEnqueueDeviceGlobalVariableRead; pDdiTable->pfnReadHostPipe = ur::level_zero::urEnqueueReadHostPipe; pDdiTable->pfnWriteHostPipe = ur::level_zero::urEnqueueWriteHostPipe; + pDdiTable->pfnEventsWaitWithBarrierExt = + ur::level_zero::urEnqueueEventsWaitWithBarrierExt; return result; } diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 1207f7776b..1215d6449e 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -723,6 +723,11 @@ ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet); +ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, + const ur_exp_enqueue_ext_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, diff --git a/source/adapters/level_zero/v2/command_list_cache.cpp b/source/adapters/level_zero/v2/command_list_cache.cpp index 4379ec1d6a..9e585b80af 100644 --- a/source/adapters/level_zero/v2/command_list_cache.cpp +++ b/source/adapters/level_zero/v2/command_list_cache.cpp @@ -13,6 +13,21 @@ #include "../device.hpp" +typedef struct _zex_intel_queue_copy_operations_offload_hint_exp_desc_t { + ze_structure_type_t stype; + const void *pNext; + ze_bool_t copyOffloadEnabled; +} zex_intel_queue_copy_operations_offload_hint_exp_desc_t; + +#define ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES \ + (ze_structure_type_t)0x0003001B + +template <> +ze_structure_type_t +getZeStructureType() { + return ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES; +} + bool v2::immediate_command_list_descriptor_t::operator==( const immediate_command_list_descriptor_t &rhs) const { return ZeDevice == rhs.ZeDevice && IsInOrder == rhs.IsInOrder && @@ -45,6 +60,10 @@ command_list_cache_t::command_list_cache_t(ze_context_handle_t ZeContext) raii::ze_command_list_handle_t command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { + ZeStruct offloadDesc; + offloadDesc.copyOffloadEnabled = + std::visit([](auto &&arg) { return arg.CopyOffloadEnabled; }, desc); + if (auto ImmCmdDesc = std::get_if(&desc)) { ze_command_list_handle_t ZeCommandList; @@ -58,6 +77,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { QueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY; QueueDesc.index = ImmCmdDesc->Index.value(); } + QueueDesc.pNext = &offloadDesc; ZE2UR_CALL_THROWS( zeCommandListCreateImmediate, (ZeContext, ImmCmdDesc->ZeDevice, &QueueDesc, &ZeCommandList)); @@ -68,6 +88,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { CmdListDesc.flags = RegCmdDesc.IsInOrder ? ZE_COMMAND_LIST_FLAG_IN_ORDER : 0; CmdListDesc.commandQueueGroupOrdinal = RegCmdDesc.Ordinal; + CmdListDesc.pNext = &offloadDesc; ze_command_list_handle_t ZeCommandList; ZE2UR_CALL_THROWS(zeCommandListCreate, (ZeContext, RegCmdDesc.ZeDevice, @@ -78,13 +99,14 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { raii::command_list_unique_handle command_list_cache_t::getImmediateCommandList( ze_device_handle_t ZeDevice, bool IsInOrder, uint32_t Ordinal, - ze_command_queue_mode_t Mode, ze_command_queue_priority_t Priority, - std::optional Index) { + bool CopyOffloadEnable, ze_command_queue_mode_t Mode, + ze_command_queue_priority_t Priority, std::optional Index) { TRACK_SCOPE_LATENCY("command_list_cache_t::getImmediateCommandList"); immediate_command_list_descriptor_t Desc; Desc.ZeDevice = ZeDevice; Desc.Ordinal = Ordinal; + Desc.CopyOffloadEnabled = CopyOffloadEnable; Desc.IsInOrder = IsInOrder; Desc.Mode = Mode; Desc.Priority = Priority; @@ -99,13 +121,15 @@ raii::command_list_unique_handle command_list_cache_t::getImmediateCommandList( raii::command_list_unique_handle command_list_cache_t::getRegularCommandList(ze_device_handle_t ZeDevice, - bool IsInOrder, uint32_t Ordinal) { + bool IsInOrder, uint32_t Ordinal, + bool CopyOffloadEnable) { TRACK_SCOPE_LATENCY("command_list_cache_t::getRegularCommandList"); regular_command_list_descriptor_t Desc; Desc.ZeDevice = ZeDevice; Desc.IsInOrder = IsInOrder; Desc.Ordinal = Ordinal; + Desc.CopyOffloadEnabled = CopyOffloadEnable; auto [CommandList, _] = getCommandList(Desc).release(); diff --git a/source/adapters/level_zero/v2/command_list_cache.hpp b/source/adapters/level_zero/v2/command_list_cache.hpp index 0f2a212eab..c2462cce5e 100644 --- a/source/adapters/level_zero/v2/command_list_cache.hpp +++ b/source/adapters/level_zero/v2/command_list_cache.hpp @@ -30,6 +30,7 @@ struct immediate_command_list_descriptor_t { ze_device_handle_t ZeDevice; bool IsInOrder; uint32_t Ordinal; + bool CopyOffloadEnabled; ze_command_queue_mode_t Mode; ze_command_queue_priority_t Priority; std::optional Index; @@ -40,6 +41,7 @@ struct regular_command_list_descriptor_t { ze_device_handle_t ZeDevice; bool IsInOrder; uint32_t Ordinal; + bool CopyOffloadEnabled; bool operator==(const regular_command_list_descriptor_t &rhs) const; }; @@ -56,12 +58,13 @@ struct command_list_cache_t { raii::command_list_unique_handle getImmediateCommandList(ze_device_handle_t ZeDevice, bool IsInOrder, - uint32_t Ordinal, ze_command_queue_mode_t Mode, + uint32_t Ordinal, bool CopyOffloadEnable, + ze_command_queue_mode_t Mode, ze_command_queue_priority_t Priority, std::optional Index = std::nullopt); raii::command_list_unique_handle getRegularCommandList(ze_device_handle_t ZeDevice, bool IsInOrder, - uint32_t Ordinal); + uint32_t Ordinal, bool CopyOffloadEnable); // For testing purposes size_t getNumImmediateCommandLists(); diff --git a/source/adapters/level_zero/v2/context.cpp b/source/adapters/level_zero/v2/context.cpp index 71360dd057..89e77c98e9 100644 --- a/source/adapters/level_zero/v2/context.cpp +++ b/source/adapters/level_zero/v2/context.cpp @@ -106,7 +106,7 @@ namespace ur::level_zero { ur_result_t urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices, const ur_context_properties_t *pProperties, - ur_context_handle_t *phContext) { + ur_context_handle_t *phContext) try { std::ignore = pProperties; ur_platform_handle_t hPlatform = phDevices[0]->Platform; @@ -118,20 +118,24 @@ ur_result_t urContextCreate(uint32_t deviceCount, *phContext = new ur_context_handle_t_(zeContext, deviceCount, phDevices, true); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext, - ur_native_handle_t *phNativeContext) { + ur_native_handle_t *phNativeContext) try { *phNativeContext = reinterpret_cast(hContext->getZeHandle()); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urContextCreateWithNativeHandle( ur_native_handle_t hNativeContext, ur_adapter_handle_t, uint32_t numDevices, const ur_device_handle_t *phDevices, const ur_context_native_properties_t *pProperties, - ur_context_handle_t *phContext) { + ur_context_handle_t *phContext) try { auto zeContext = reinterpret_cast(hNativeContext); auto ownZeHandle = pProperties ? pProperties->isNativeHandleOwned : false; @@ -139,22 +143,25 @@ ur_result_t urContextCreateWithNativeHandle( *phContext = new ur_context_handle_t_(zeContext, numDevices, phDevices, ownZeHandle); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } -ur_result_t urContextRetain(ur_context_handle_t hContext) { +ur_result_t urContextRetain(ur_context_handle_t hContext) try { return hContext->retain(); +} catch (...) { + return exceptionToResult(std::current_exception()); } -ur_result_t urContextRelease(ur_context_handle_t hContext) { +ur_result_t urContextRelease(ur_context_handle_t hContext) try { return hContext->release(); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t contextInfoType, size_t propSize, - - void *pContextInfo, - - size_t *pPropSizeRet) { + void *pContextInfo, size_t *pPropSizeRet) try { // No locking needed here, we only read const members UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet); @@ -173,8 +180,16 @@ ur_result_t urContextGetInfo(ur_context_handle_t hContext, case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: // 2D USM fill is not supported. return ReturnValue(uint8_t{false}); + case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } default: - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } +} catch (...) { + return exceptionToResult(std::current_exception()); } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp index 1197ca7d82..f3319351e3 100644 --- a/source/adapters/level_zero/v2/event.cpp +++ b/source/adapters/level_zero/v2/event.cpp @@ -13,6 +13,7 @@ #include "event.hpp" #include "event_pool.hpp" #include "event_provider.hpp" +#include "queue_api.hpp" #include "../ur_interface_loader.hpp" @@ -24,15 +25,23 @@ ur_event_handle_t_::ur_event_handle_t_( zeTimerResolution(getDevice()->ZeDeviceProperties->timerResolution), timestampMaxValue(getDevice()->getTimestampMask()) {} +void ur_event_handle_t_::resetQueueAndCommand(ur_queue_handle_t hQueue, + ur_command_t commandType) { + this->hQueue = hQueue; + this->commandType = commandType; +} + void ur_event_handle_t_::reset() { - // consider make an abstraction for regular/counter based + // consider making an abstraction for regular/counter based // events if there's more of this type of conditions - if (pool->getFlags() & v2::EVENT_FLAGS_COUNTER) { + if (!(pool->getFlags() & v2::EVENT_FLAGS_COUNTER)) { zeEventHostReset(zeEvent.get()); } } ze_event_handle_t ur_event_handle_t_::getZeEvent() const { + assert(hQueue); + assert(commandType != UR_COMMAND_FORCE_UINT32); return zeEvent.get(); } @@ -41,14 +50,27 @@ ur_result_t ur_event_handle_t_::retain() { return UR_RESULT_SUCCESS; } +ur_result_t ur_event_handle_t_::releaseDeferred() { + assert(zeEventQueryStatus(zeEvent.get()) == ZE_RESULT_SUCCESS); + assert(RefCount.load() == 0); + + pool->free(this); + return UR_RESULT_SUCCESS; +} + ur_result_t ur_event_handle_t_::release() { if (!RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; + // Need to take a lock before checking if the event is timestamped. + std::unique_lock lock(Mutex); + if (isTimestamped() && adjustedEventEndTimestamp == 0) { // L0 will write end timestamp to this event some time in the future, // so we can't release it yet. - // TODO: delay releasing until the end timestamp is written. + + assert(hQueue); + hQueue->deferEventFree(this); return UR_RESULT_SUCCESS; } @@ -99,17 +121,16 @@ uint64_t ur_event_handle_t_::getEventEndTimestamp() { if (adjustedEventEndTimestamp) return adjustedEventEndTimestamp; - // If the result is 0, we have not yet gotten results back and so we just - // return it. - if (recordEventEndTimestamp == 0) - return recordEventEndTimestamp; + auto status = zeEventQueryStatus(zeEvent.get()); + if (status != ZE_RESULT_SUCCESS) { + // profiling info not ready + return 0; + } - // Now that we have the result, there is no need to keep it in the queue - // anymore, so we cache it on the event and evict the record from the - // queue. adjustedEventEndTimestamp = adjustEndEventTimestamp(getEventStartTimestmap(), recordEventEndTimestamp, timestampMaxValue, zeTimerResolution); + return adjustedEventEndTimestamp; } @@ -118,32 +139,46 @@ void ur_event_handle_t_::recordStartTimestamp() { UR_CALL_THROWS(ur::level_zero::urDeviceGetGlobalTimestamps( getDevice(), &deviceStartTimestamp, nullptr)); + assert(adjustedEventStartTimestamp == 0); adjustedEventStartTimestamp = deviceStartTimestamp; } -uint64_t *ur_event_handle_t_::getEventEndTimestampPtr() { - return &recordEventEndTimestamp; +std::pair +ur_event_handle_t_::getEventEndTimestampAndHandle() { + return {&recordEventEndTimestamp, zeEvent.get()}; } +ur_queue_handle_t ur_event_handle_t_::getQueue() const { return hQueue; } + +ur_command_t ur_event_handle_t_::getCommandType() const { return commandType; } + namespace ur::level_zero { -ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); } +ur_result_t urEventRetain(ur_event_handle_t hEvent) try { + return hEvent->retain(); +} catch (...) { + return exceptionToResult(std::current_exception()); +} -ur_result_t urEventRelease(ur_event_handle_t hEvent) { +ur_result_t urEventRelease(ur_event_handle_t hEvent) try { return hEvent->release(); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEventWait(uint32_t numEvents, - const ur_event_handle_t *phEventWaitList) { + const ur_event_handle_t *phEventWaitList) try { for (uint32_t i = 0; i < numEvents; ++i) { ZE2UR_CALL(zeEventHostSynchronize, (phEventWaitList[i]->getZeEvent(), UINT64_MAX)); } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, size_t propValueSize, void *pPropValue, - size_t *pPropValueSizeRet) { + size_t *pPropValueSizeRet) try { UrReturnHelper returnValue(propValueSize, pPropValue, pPropValueSizeRet); switch (propName) { @@ -159,6 +194,19 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, case UR_EVENT_INFO_REFERENCE_COUNT: { return returnValue(hEvent->RefCount.load()); } + case UR_EVENT_INFO_COMMAND_QUEUE: { + return returnValue(ur_queue_handle_t{hEvent->getQueue()}); + } + case UR_EVENT_INFO_CONTEXT: { + ur_context_handle_t hContext; + UR_CALL(::ur::level_zero::urQueueGetInfo( + hEvent->getQueue(), UR_QUEUE_INFO_CONTEXT, sizeof(hContext), + reinterpret_cast(&hContext), nullptr)); + return returnValue(hContext); + } + case UR_EVENT_INFO_COMMAND_TYPE: { + return returnValue(hEvent->getCommandType()); + } default: logger::error( "Unsupported ParamName in urEventGetInfo: ParamName=ParamName={}(0x{})", @@ -167,6 +215,8 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEventGetProfilingInfo( @@ -178,7 +228,7 @@ ur_result_t urEventGetProfilingInfo( void *pPropValue, ///< [out][optional] value of the profiling property size_t *pPropValueSizeRet ///< [out][optional] pointer to the actual size in ///< bytes returned in propValue -) { + ) try { std::scoped_lock lock(hEvent->Mutex); // The event must either have profiling enabled or be recording timestamps. @@ -247,5 +297,7 @@ ur_result_t urEventGetProfilingInfo( } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/event.hpp b/source/adapters/level_zero/v2/event.hpp index 394f139b30..3c22ef1337 100644 --- a/source/adapters/level_zero/v2/event.hpp +++ b/source/adapters/level_zero/v2/event.hpp @@ -27,12 +27,19 @@ struct ur_event_handle_t_ : _ur_object { ur_event_handle_t_(v2::raii::cache_borrowed_event eventAllocation, v2::event_pool *pool); + // Set the queue and command that this event is associated with + void resetQueueAndCommand(ur_queue_handle_t hQueue, ur_command_t commandType); + void reset(); ze_event_handle_t getZeEvent() const; ur_result_t retain(); ur_result_t release(); + // releases a signaled and no longer in-use event, that's on the + // deffered events list in the queue + ur_result_t releaseDeferred(); + // Tells if this event was created as a timestamp event, allowing profiling // info even if profiling is not enabled. bool isTimestamped() const; @@ -43,13 +50,24 @@ struct ur_event_handle_t_ : _ur_object { // Device associated with this event ur_device_handle_t getDevice() const; + // Queue associated with this event + ur_queue_handle_t getQueue() const; + + // Get the type of the command that this event is associated with + ur_command_t getCommandType() const; + void recordStartTimestamp(); - uint64_t *getEventEndTimestampPtr(); + + // Get pointer to the end timestamp, and ze event handle. + // Caller is responsible for signaling the event once the timestamp is ready. + std::pair getEventEndTimestampAndHandle(); uint64_t getEventStartTimestmap() const; uint64_t getEventEndTimestamp(); private: + ur_queue_handle_t hQueue = nullptr; + ur_command_t commandType = UR_COMMAND_FORCE_UINT32; v2::raii::cache_borrowed_event zeEvent; v2::event_pool *pool; diff --git a/source/adapters/level_zero/v2/event_pool.cpp b/source/adapters/level_zero/v2/event_pool.cpp index fe63681764..523aaf7fb9 100644 --- a/source/adapters/level_zero/v2/event_pool.cpp +++ b/source/adapters/level_zero/v2/event_pool.cpp @@ -15,7 +15,8 @@ namespace v2 { static constexpr size_t EVENTS_BURST = 64; -ur_event_handle_t_ *event_pool::allocate() { +ur_event_handle_t_ *event_pool::allocate(ur_queue_handle_t hQueue, + ur_command_t commandType) { TRACK_SCOPE_LATENCY("event_pool::allocate"); std::unique_lock lock(*mutex); @@ -32,6 +33,8 @@ ur_event_handle_t_ *event_pool::allocate() { auto event = freelist.back(); freelist.pop_back(); + event->resetQueueAndCommand(hQueue, commandType); + return event; } diff --git a/source/adapters/level_zero/v2/event_pool.hpp b/source/adapters/level_zero/v2/event_pool.hpp index 924d29b907..e9ad4051e6 100644 --- a/source/adapters/level_zero/v2/event_pool.hpp +++ b/source/adapters/level_zero/v2/event_pool.hpp @@ -41,7 +41,8 @@ class event_pool { DeviceId Id() { return provider->device()->Id.value(); }; // Allocate an event from the pool. Thread safe. - ur_event_handle_t_ *allocate(); + ur_event_handle_t_ *allocate(ur_queue_handle_t hQueue, + ur_command_t commandType); // Free an event back to the pool. Thread safe. void free(ur_event_handle_t_ *event); diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index de2e37e9bb..6f348bd5af 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -70,8 +70,10 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_handle_t hProgram, ur_kernel_handle_t_::ur_kernel_handle_t_( ur_native_handle_t hNativeKernel, ur_program_handle_t hProgram, + ur_context_handle_t context, const ur_kernel_native_properties_t *pProperties) - : hProgram(hProgram), deviceKernels(1) { + : hProgram(hProgram), + deviceKernels(context ? context->getPlatform()->getNumDevices() : 0) { ur::level_zero::urProgramRetain(hProgram); auto ownZeHandle = pProperties ? pProperties->isNativeHandleOwned : false; @@ -82,7 +84,12 @@ ur_kernel_handle_t_::ur_kernel_handle_t_( throw UR_RESULT_ERROR_INVALID_KERNEL; } - deviceKernels.back().emplace(nullptr, zeKernel, ownZeHandle); + for (auto &Dev : context->getDevices()) { + deviceKernels[*Dev->Id].emplace(Dev, zeKernel, ownZeHandle); + + // owned only by the first entry + ownZeHandle = false; + } completeInitialization(); } @@ -128,20 +135,6 @@ size_t ur_kernel_handle_t_::deviceIndex(ur_device_handle_t hDevice) const { hDevice = hDevice->RootDevice; } - // supports kernels created from native handle - if (deviceKernels.size() == 1) { - assert(deviceKernels[0].has_value()); - assert(deviceKernels[0].value().hKernel.get()); - - auto &kernel = deviceKernels[0].value(); - - if (kernel.hDevice != hDevice) { - throw UR_RESULT_ERROR_INVALID_DEVICE; - } - - return 0; - } - if (!deviceKernels[hDevice->Id.value()].has_value()) { throw UR_RESULT_ERROR_INVALID_DEVICE; } @@ -322,17 +315,21 @@ std::vector ur_kernel_handle_t_::getSourceAttributes() const { namespace ur::level_zero { ur_result_t urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, - ur_kernel_handle_t *phKernel) { + ur_kernel_handle_t *phKernel) try { *phKernel = new ur_kernel_handle_t_(hProgram, pKernelName); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel, - ur_native_handle_t *phNativeKernel) { + ur_native_handle_t *phNativeKernel) try { // Return the handle of the kernel for the first device *phNativeKernel = reinterpret_cast(hKernel->getNativeZeHandle()); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t @@ -340,22 +337,30 @@ urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, ur_program_handle_t hProgram, const ur_kernel_native_properties_t *pProperties, - ur_kernel_handle_t *phKernel) { - std::ignore = hContext; - *phKernel = new ur_kernel_handle_t_(hNativeKernel, hProgram, pProperties); + ur_kernel_handle_t *phKernel) try { + if (!hProgram) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + *phKernel = + new ur_kernel_handle_t_(hNativeKernel, hProgram, hContext, pProperties); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelRetain( ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to retain -) { + ) try { hKernel->RefCount.increment(); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelRelease( ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to release -) { + ) try { if (!hKernel->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -363,6 +368,8 @@ ur_result_t urKernelRelease( delete hKernel; return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelSetArgValue( @@ -373,11 +380,13 @@ ur_result_t urKernelSetArgValue( *pProperties, ///< [in][optional] argument properties const void *pArgValue ///< [in] argument value represented as matching arg type. -) { + ) try { TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgValue"); std::scoped_lock guard(hKernel->Mutex); return hKernel->setArgValue(argIndex, argSize, pProperties, pArgValue); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelSetArgPointer( @@ -387,11 +396,13 @@ ur_result_t urKernelSetArgPointer( *pProperties, ///< [in][optional] argument properties const void *pArgValue ///< [in] argument value represented as matching arg type. -) { + ) try { TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgPointer"); std::scoped_lock guard(hKernel->Mutex); return hKernel->setArgPointer(argIndex, pProperties, pArgValue); +} catch (...) { + return exceptionToResult(std::current_exception()); } static ur_mem_handle_t_::device_access_mode_t memAccessFromKernelProperties( @@ -414,7 +425,7 @@ static ur_mem_handle_t_::device_access_mode_t memAccessFromKernelProperties( ur_result_t urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *pProperties, - ur_mem_handle_t hArgValue) { + ur_mem_handle_t hArgValue) try { TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgMemObj"); std::scoped_lock guard(hKernel->Mutex); @@ -423,12 +434,14 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, {hArgValue, memAccessFromKernelProperties(pProperties), argIndex})); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, - const ur_kernel_arg_local_properties_t *pProperties) { + const ur_kernel_arg_local_properties_t *pProperties) try { TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgLocal"); std::scoped_lock guard(hKernel->Mutex); @@ -436,6 +449,8 @@ urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, std::ignore = pProperties; return hKernel->setArgValue(argIndex, argSize, nullptr, nullptr); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelSetExecInfo( @@ -446,13 +461,15 @@ ur_result_t urKernelSetExecInfo( *pProperties, ///< [in][optional] pointer to execution info properties const void *pPropValue ///< [in][range(0, propSize)] pointer to memory ///< location holding the property value. -) { + ) try { std::ignore = propSize; std::ignore = pProperties; std::scoped_lock guard(hKernel->Mutex); return hKernel->setExecInfo(propName, pPropValue); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelGetGroupInfo( @@ -466,7 +483,7 @@ ur_result_t urKernelGetGroupInfo( ///< Kernel Work Group property. size_t *pParamValueSizeRet ///< [out][optional] pointer to the actual size ///< in bytes of data being queried by propName. -) { + ) try { UrReturnHelper returnValue(paramValueSize, pParamValue, pParamValueSizeRet); // No locking needed here, we only read const members @@ -532,6 +549,8 @@ ur_result_t urKernelGetGroupInfo( } } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelGetSubGroupInfo( @@ -544,7 +563,7 @@ ur_result_t urKernelGetSubGroupInfo( ///< Kernel SubGroup property. size_t *pPropSizeRet ///< [out][optional] pointer to the actual size in ///< bytes of data being queried by propName. -) { + ) try { UrReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); auto props = hKernel->getProperties(hDevice); @@ -563,11 +582,13 @@ ur_result_t urKernelGetSubGroupInfo( return {}; } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, ur_kernel_info_t paramName, size_t propSize, - void *pKernelInfo, size_t *pPropSizeRet) { + void *pKernelInfo, size_t *pPropSizeRet) try { UrReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet); @@ -599,5 +620,7 @@ ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/kernel.hpp b/source/adapters/level_zero/v2/kernel.hpp index 735150838b..798c9f18df 100644 --- a/source/adapters/level_zero/v2/kernel.hpp +++ b/source/adapters/level_zero/v2/kernel.hpp @@ -39,7 +39,7 @@ struct ur_kernel_handle_t_ : _ur_object { // From native handle ur_kernel_handle_t_(ur_native_handle_t hNativeKernel, - ur_program_handle_t hProgram, + ur_program_handle_t hProgram, ur_context_handle_t context, const ur_kernel_native_properties_t *pProperties); // Get L0 kernel handle for a given device diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp index a70792969b..65972f3aff 100644 --- a/source/adapters/level_zero/v2/memory.cpp +++ b/source/adapters/level_zero/v2/memory.cpp @@ -157,7 +157,7 @@ static ur_result_t synchronousZeCopy(ur_context_handle_t hContext, hDevice ->QueueGroup[ur_device_handle_t_::queue_group_info_t::type::Compute] .ZeOrdinal, - ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + true, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, std::nullopt); ZE2UR_CALL(zeCommandListAppendMemoryCopy, @@ -402,7 +402,7 @@ namespace ur::level_zero { ur_result_t urMemBufferCreate(ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, - ur_mem_handle_t *phBuffer) { + ur_mem_handle_t *phBuffer) try { if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { // TODO: // Having PI_MEM_FLAGS_HOST_PTR_ALLOC for buffer requires allocation of @@ -430,12 +430,14 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext, } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, - ur_mem_handle_t *phMem) { + ur_mem_handle_t *phMem) try { UR_ASSERT(bufferCreateType == UR_BUFFER_CREATE_TYPE_REGION, UR_RESULT_ERROR_INVALID_ENUMERATION); UR_ASSERT((pRegion->origin < hBuffer->getSize() && @@ -451,12 +453,13 @@ ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, accessMode); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urMemBufferCreateWithNativeHandle( ur_native_handle_t hNativeMem, ur_context_handle_t hContext, - const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) try { auto ptr = reinterpret_cast(hNativeMem); bool ownNativeHandle = pProperties ? pProperties->isNativeHandleOwned : false; @@ -505,11 +508,13 @@ ur_result_t urMemBufferCreateWithNativeHandle( } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { + size_t *pPropSizeRet) try { // No locking needed here, we only read const members UrReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); @@ -522,29 +527,38 @@ ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, // Get size of the allocation return returnValue(size_t{hMemory->getSize()}); } + case UR_MEM_INFO_REFERENCE_COUNT: { + return returnValue(hMemory->getRefCount().load()); + } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } -ur_result_t urMemRetain(ur_mem_handle_t hMem) { +ur_result_t urMemRetain(ur_mem_handle_t hMem) try { hMem->getRefCount().increment(); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } -ur_result_t urMemRelease(ur_mem_handle_t hMem) { +ur_result_t urMemRelease(ur_mem_handle_t hMem) try { if (hMem->getRefCount().decrementAndTest()) { delete hMem; } return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, ur_device_handle_t hDevice, - ur_native_handle_t *phNativeMem) { + ur_native_handle_t *phNativeMem) try { std::ignore = hDevice; std::scoped_lock lock(hMem->getMutex()); @@ -554,5 +568,7 @@ ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, hMem->getSize(), nullptr); *phNativeMem = reinterpret_cast(ptr); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index ea2e931bfe..b7b45625a2 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -11,71 +11,94 @@ */ #include "queue_api.hpp" +#include "ur_util.hpp" ur_queue_handle_t_::~ur_queue_handle_t_() {} namespace ur::level_zero { ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { + size_t *pPropSizeRet) try { return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); +} catch (...) { + return exceptionToResult(std::current_exception()); } -ur_result_t urQueueRetain(ur_queue_handle_t hQueue) { +ur_result_t urQueueRetain(ur_queue_handle_t hQueue) try { return hQueue->queueRetain(); +} catch (...) { + return exceptionToResult(std::current_exception()); } -ur_result_t urQueueRelease(ur_queue_handle_t hQueue) { +ur_result_t urQueueRelease(ur_queue_handle_t hQueue) try { return hQueue->queueRelease(); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) { + ur_native_handle_t *phNativeQueue) try { return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); +} catch (...) { + return exceptionToResult(std::current_exception()); } -ur_result_t urQueueFinish(ur_queue_handle_t hQueue) { +ur_result_t urQueueFinish(ur_queue_handle_t hQueue) try { return hQueue->queueFinish(); +} catch (...) { + return exceptionToResult(std::current_exception()); } -ur_result_t urQueueFlush(ur_queue_handle_t hQueue) { +ur_result_t urQueueFlush(ur_queue_handle_t hQueue) try { return hQueue->queueFlush(); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->enqueueKernelLaunch( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->enqueueMemBufferWrite(hBuffer, blockingWrite, offset, size, pSrc, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, @@ -83,11 +106,13 @@ ur_result_t urEnqueueMemBufferReadRect( ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueMemBufferReadRect( hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, @@ -95,11 +120,13 @@ ur_result_t urEnqueueMemBufferWriteRect( ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueMemBufferWriteRect( hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, @@ -107,10 +134,12 @@ ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, dstOffset, size, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemBufferCopyRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, @@ -118,11 +147,13 @@ ur_result_t urEnqueueMemBufferCopyRect( ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueMemBufferCopyRect( hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, @@ -130,28 +161,34 @@ ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, size, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemImageRead( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->enqueueMemImageRead( hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->enqueueMemImageWrite( hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, @@ -159,95 +196,118 @@ urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, ur_rect_offset_t dstOrigin, ur_rect_region_t region, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, region, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, void **ppRetMap) { + ur_event_handle_t *phEvent, + void **ppRetMap) try { return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, size, numEventsInWaitList, phEventWaitList, phEvent, ppRetMap); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_advice_flags_t advice, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, const void *pPattern, size_t width, size_t height, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, height, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, const void *pSrc, size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, width, height, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueDeviceGlobalVariableWrite( hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueDeviceGlobalVariableRead( hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, ur_program_handle_t hProgram, @@ -255,10 +315,12 @@ ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, void *pDst, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, size, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, ur_program_handle_t hProgram, @@ -266,10 +328,12 @@ ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, void *pSrc, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, const void *pSrc, void *pDst, @@ -278,42 +342,52 @@ ur_result_t urBindlessImagesImageCopyExp( const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->bindlessImagesImageCopyExp( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urBindlessImagesWaitExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->bindlessImagesWaitExternalSemaphoreExp( hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urBindlessImagesSignalExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->bindlessImagesSignalExternalSemaphoreExp( hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueCooperativeKernelLaunchExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->enqueueCooperativeKernelLaunchExp( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueTimestampRecordingExp( ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueKernelLaunchCustomExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, @@ -321,11 +395,23 @@ ur_result_t urEnqueueKernelLaunchCustomExp( uint32_t numPropsInLaunchPropList, const ur_exp_launch_property_t *launchPropList, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueKernelLaunchCustomExp( hKernel, workDim, pGlobalWorkSize, pLocalWorkSize, numPropsInLaunchPropList, launchPropList, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} +ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, + const ur_exp_enqueue_ext_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) try { + return hQueue->enqueueEventsWaitWithBarrierExt( + pProperties, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, @@ -333,9 +419,11 @@ ur_result_t urEnqueueNativeCommandExp( uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, const ur_exp_enqueue_native_command_properties_t *pProperties, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { + ur_event_handle_t *phEvent) try { return hQueue->enqueueNativeCommandExp( pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); } } // namespace ur::level_zero \ No newline at end of file diff --git a/source/adapters/level_zero/v2/queue_api.hpp b/source/adapters/level_zero/v2/queue_api.hpp index bc01596d2b..7cb039ccdd 100644 --- a/source/adapters/level_zero/v2/queue_api.hpp +++ b/source/adapters/level_zero/v2/queue_api.hpp @@ -16,6 +16,9 @@ struct ur_queue_handle_t_ { virtual ~ur_queue_handle_t_(); + + virtual void deferEventFree(ur_event_handle_t hEvent) = 0; + virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *, size_t *) = 0; virtual ur_result_t queueRetain() = 0; @@ -145,6 +148,10 @@ struct ur_queue_handle_t_ { const ur_exp_launch_property_t *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; virtual ur_result_t + enqueueEventsWaitWithBarrierExt(const ur_exp_enqueue_ext_properties_t *, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, uint32_t, const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, diff --git a/source/adapters/level_zero/v2/queue_create.cpp b/source/adapters/level_zero/v2/queue_create.cpp index ce0e6c7168..f397cd8747 100644 --- a/source/adapters/level_zero/v2/queue_create.cpp +++ b/source/adapters/level_zero/v2/queue_create.cpp @@ -21,7 +21,7 @@ namespace ur::level_zero { ur_result_t urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, - ur_queue_handle_t *phQueue) { + ur_queue_handle_t *phQueue) try { if (!hContext->isValidDevice(hDevice)) { return UR_RESULT_ERROR_INVALID_DEVICE; } @@ -30,12 +30,14 @@ ur_result_t urQueueCreate(ur_context_handle_t hContext, *phQueue = new v2::ur_queue_immediate_in_order_t(hContext, hDevice, pProperties); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } ur_result_t urQueueCreateWithNativeHandle( ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, - ur_queue_handle_t *phQueue) { + ur_queue_handle_t *phQueue) try { // TODO: For now, always assume it's immediate, in-order bool ownNativeHandle = pProperties ? pProperties->isNativeHandleOwned : false; @@ -59,5 +61,7 @@ ur_result_t urQueueCreateWithNativeHandle( hContext, hDevice, hNativeQueue, flags, ownNativeHandle); return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index b4f61adbba..519b0ffc1e 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -23,30 +23,17 @@ namespace v2 { std::pair ur_queue_immediate_in_order_t::getWaitListView( - const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents, - ur_command_list_handler_t *pHandler) { - auto extraWaitEvent = (lastHandler && pHandler != lastHandler) - ? lastHandler->lastEvent->getZeEvent() - : nullptr; - - auto totalEvents = numWaitEvents + (extraWaitEvent != nullptr); - waitList.reserve(totalEvents); + const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) { + waitList.resize(numWaitEvents); for (uint32_t i = 0; i < numWaitEvents; i++) { waitList[i] = phWaitEvents[i]->getZeEvent(); } - if (extraWaitEvent) { - waitList[numWaitEvents] = extraWaitEvent; - } - - return {waitList.data(), static_cast(totalEvents)}; + return {waitList.data(), static_cast(numWaitEvents)}; } -static int32_t getZeOrdinal(ur_device_handle_t hDevice, queue_group_type type) { - if (type == queue_group_type::MainCopy && hDevice->hasMainCopyEngine()) { - return hDevice->QueueGroup[queue_group_type::MainCopy].ZeOrdinal; - } +static int32_t getZeOrdinal(ur_device_handle_t hDevice) { return hDevice->QueueGroup[queue_group_type::Compute].ZeOrdinal; } @@ -73,29 +60,22 @@ static ze_command_queue_priority_t getZePriority(ur_queue_flags_t flags) { ur_command_list_handler_t::ur_command_list_handler_t( ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProps, queue_group_type type, - event_pool *eventPool) + const ur_queue_properties_t *pProps) : commandList(hContext->commandListCache.getImmediateCommandList( - hDevice->ZeDevice, true, getZeOrdinal(hDevice, type), + hDevice->ZeDevice, true, getZeOrdinal(hDevice), + true /* always enable copy offload */, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, getZePriority(pProps ? pProps->flags : ur_queue_flags_t{}), - getZeIndex(pProps))), - internalEvent(eventPool->allocate(), [=](ur_event_handle_t event) { - ur::level_zero::urEventRelease(event); - }) {} + getZeIndex(pProps))) {} ur_command_list_handler_t::ur_command_list_handler_t( - ze_command_list_handle_t hZeCommandList, event_pool *eventPool, - bool ownZeHandle) + ze_command_list_handle_t hZeCommandList, bool ownZeHandle) : commandList(hZeCommandList, [ownZeHandle](ze_command_list_handle_t hZeCommandList) { if (ownZeHandle) { zeCommandListDestroy(hZeCommandList); } - }), - internalEvent(eventPool->allocate(), [=](ur_event_handle_t event) { - ur::level_zero::urEventRelease(event); - }) {} + }) {} static event_flags_t eventFlagsFromQueueFlags(ur_queue_flags_t flags) { event_flags_t eventFlags = EVENT_FLAGS_COUNTER; @@ -110,10 +90,7 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( : hContext(hContext), hDevice(hDevice), flags(pProps ? pProps->flags : 0), eventPool(hContext->eventPoolCache.borrow( hDevice->Id.value(), eventFlagsFromQueueFlags(flags))), - copyHandler(hContext, hDevice, pProps, queue_group_type::MainCopy, - eventPool.get()), - computeHandler(hContext, hDevice, pProps, queue_group_type::Compute, - eventPool.get()) {} + handler(hContext, hDevice, pProps) {} ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( ur_context_handle_t hContext, ur_device_handle_t hDevice, @@ -121,42 +98,18 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( : hContext(hContext), hDevice(hDevice), flags(flags), eventPool(hContext->eventPoolCache.borrow( hDevice->Id.value(), eventFlagsFromQueueFlags(flags))), - copyHandler( - reinterpret_cast(hNativeHandle), eventPool.get(), false /* we're using a single command list for both handlers, only own it by one of them */), - computeHandler(reinterpret_cast(hNativeHandle), - eventPool.get(), ownZeQueue) {} - -ur_command_list_handler_t * -ur_queue_immediate_in_order_t::getCommandListHandlerForCompute() { - return &computeHandler; -} - -ur_command_list_handler_t * -ur_queue_immediate_in_order_t::getCommandListHandlerForCopy() { - // TODO: optimize for specific devices, see ../memory.cpp - return ©Handler; -} - -ur_command_list_handler_t * -ur_queue_immediate_in_order_t::getCommandListHandlerForFill( - size_t patternSize) { - if (patternSize <= hDevice->QueueGroup[queue_group_type::MainCopy] - .ZeProperties.maxMemoryFillPatternSize) - return ©Handler; - else - return &computeHandler; -} - -ur_event_handle_t ur_queue_immediate_in_order_t::getSignalEvent( - ur_command_list_handler_t *handler, ur_event_handle_t *hUserEvent) { - if (!hUserEvent) { - handler->lastEvent = handler->internalEvent.get(); + handler(reinterpret_cast(hNativeHandle), + ownZeQueue) {} + +ur_event_handle_t +ur_queue_immediate_in_order_t::getSignalEvent(ur_event_handle_t *hUserEvent, + ur_command_t commandType) { + if (hUserEvent) { + *hUserEvent = eventPool->allocate(this, commandType); + return *hUserEvent; } else { - *hUserEvent = eventPool->allocate(); - handler->lastEvent = *hUserEvent; + return nullptr; } - - return handler->lastEvent; } ur_result_t @@ -178,11 +131,8 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName, case UR_QUEUE_INFO_DEVICE_DEFAULT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_QUEUE_INFO_EMPTY: { - // We can exit early if we have in-order queue. - if (!lastHandler) - return ReturnValue(true); - else - return ReturnValue(false); + // We can't tell if the queue is empty as we don't hold to any events + return ReturnValue(false); } default: logger::error("Unsupported ParamName in urQueueGetInfo: " @@ -203,34 +153,22 @@ ur_result_t ur_queue_immediate_in_order_t::queueRelease() { if (!RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; + UR_CALL(queueFinish()); + delete this; return UR_RESULT_SUCCESS; } +void ur_queue_immediate_in_order_t::deferEventFree(ur_event_handle_t hEvent) { + std::unique_lock lock(this->Mutex); + deferredEvents.push_back(hEvent); +} + ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle( ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) { std::ignore = pDesc; - *phNativeQueue = reinterpret_cast( - this->computeHandler.commandList.get()); - return UR_RESULT_SUCCESS; -} - -ur_result_t ur_queue_immediate_in_order_t::finalizeHandler( - ur_command_list_handler_t *handler) { - lastHandler = handler; - return UR_RESULT_SUCCESS; -} - -ur_result_t ur_queue_immediate_in_order_t::finalizeHandler( - ur_command_list_handler_t *handler, bool blocking) { - if (blocking) { - ZE2UR_CALL(zeCommandListHostSynchronize, - (handler->commandList.get(), UINT64_MAX)); - lastHandler = nullptr; - } else { - finalizeHandler(handler); - } - + *phNativeQueue = + reinterpret_cast(this->handler.commandList.get()); return UR_RESULT_SUCCESS; } @@ -239,18 +177,17 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() { std::unique_lock lock(this->Mutex); - if (!lastHandler) { - return UR_RESULT_SUCCESS; - } - - auto lastCmdList = lastHandler->commandList.get(); - lastHandler = nullptr; - lock.unlock(); - // TODO: use zeEventHostSynchronize instead? TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::zeCommandListHostSynchronize"); - ZE2UR_CALL(zeCommandListHostSynchronize, (lastCmdList, UINT64_MAX)); + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + + // Free deferred events + for (auto &hEvent : deferredEvents) { + hEvent->releaseDeferred(); + } + deferredEvents.clear(); return UR_RESULT_SUCCESS; } @@ -283,16 +220,14 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( zeThreadGroupDimensions, WG, workDim, pGlobalWorkSize, pLocalWorkSize)); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto memoryMigrate = [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; }; @@ -310,11 +245,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::zeCommandListAppendLaunchKernel"); + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendLaunchKernel, - (handler->commandList.get(), hZeKernel, &zeThreadGroupDimensions, - signalEvent->getZeEvent(), waitList.second, waitList.first)); + (handler.commandList.get(), hZeKernel, &zeThreadGroupDimensions, + zeSignalEvent, waitList.second, waitList.first)); - return finalizeHandler(handler); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( @@ -324,17 +260,26 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + if (!numEventsInWaitList && !phEvent) { + // nop + return UR_RESULT_SUCCESS; + } + + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT); auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + getWaitListView(phEventWaitList, numEventsInWaitList); - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler->commandList.get(), numWaitEvents, pWaitEvents)); - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); + if (numWaitEvents > 0) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (handler.commandList.get(), numWaitEvents, pWaitEvents)); + } - return finalizeHandler(handler); + if (signalEvent) { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (handler.commandList.get(), signalEvent->getZeEvent())); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier( @@ -347,22 +292,28 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier( return enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent); } +ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrierExt( + const ur_exp_enqueue_ext_properties_t *, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return enqueueEventsWaitWithBarrier(numEventsInWaitList, phEventWaitList, + phEvent); +} + ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType) { + auto signalEvent = getSignalEvent(phEvent, commandType); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto pSrc = ur_cast(src->getDevicePtr( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, srcOffset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -371,7 +322,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, dstOffset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -383,11 +334,17 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( waitList.second = 0; } + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), pDst, pSrc, size, - signalEvent->getZeEvent(), waitList.second, waitList.first)); + (handler.commandList.get(), pDst, pSrc, size, zeSignalEvent, + waitList.second, waitList.first)); + + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } - return finalizeHandler(handler, blocking); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferRead( @@ -403,9 +360,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferRead( std::scoped_lock lock(this->Mutex, hBuffer->getMutex()); - return enqueueGenericCopyUnlocked(hBuffer, &dstHandle, blockingRead, offset, - 0, size, numEventsInWaitList, - phEventWaitList, phEvent); + return enqueueGenericCopyUnlocked( + hBuffer, &dstHandle, blockingRead, offset, 0, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_READ); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( @@ -421,9 +378,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( std::scoped_lock lock(this->Mutex, hBuffer->getMutex()); - return enqueueGenericCopyUnlocked(&srcHandle, hBuffer, blockingWrite, 0, - offset, size, numEventsInWaitList, - phEventWaitList, phEvent); + return enqueueGenericCopyUnlocked( + &srcHandle, hBuffer, blockingWrite, 0, offset, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE); } ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( @@ -431,22 +388,21 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType) { auto zeParams = ur2zeRegionParams(srcOrigin, dstOrigin, region, srcRowPitch, dstRowPitch, srcSlicePitch, dstSlicePitch); - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent, commandType); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto pSrc = ur_cast(src->getDevicePtr( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, 0, src->getSize(), [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -454,7 +410,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, 0, dst->getSize(), [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -466,13 +422,19 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( waitList.second = 0; } + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (handler->commandList.get(), pDst, &zeParams.dstRegion, + (handler.commandList.get(), pDst, &zeParams.dstRegion, zeParams.dstPitch, zeParams.dstSlicePitch, pSrc, &zeParams.srcRegion, zeParams.srcPitch, zeParams.srcSlicePitch, - signalEvent->getZeEvent(), waitList.second, waitList.first)); + zeSignalEvent, waitList.second, waitList.first)); - return finalizeHandler(handler, blocking); + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferReadRect( @@ -492,7 +454,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferReadRect( return enqueueRegionCopyUnlocked( hBuffer, &dstHandle, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, - numEventsInWaitList, phEventWaitList, phEvent); + numEventsInWaitList, phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_READ_RECT); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect( @@ -512,7 +475,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect( return enqueueRegionCopyUnlocked( &srcHandle, hBuffer, blockingWrite, hostOrigin, bufferOrigin, region, hostRowPitch, hostSlicePitch, bufferRowPitch, bufferSlicePitch, - numEventsInWaitList, phEventWaitList, phEvent); + numEventsInWaitList, phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_WRITE_RECT); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( @@ -531,7 +495,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( return enqueueGenericCopyUnlocked(hBufferSrc, hBufferDst, false, srcOffset, dstOffset, size, numEventsInWaitList, - phEventWaitList, phEvent); + phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_COPY); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( @@ -549,7 +514,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( return enqueueRegionCopyUnlocked( hBufferSrc, hBufferDst, false, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, - phEventWaitList, phEvent); + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferFill( @@ -565,7 +530,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferFill( return enqueueGenericFillUnlocked(hBuffer, offset, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, - phEvent); + phEvent, UR_COMMAND_MEM_BUFFER_FILL); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageRead( @@ -630,17 +595,15 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( std::scoped_lock lock(this->Mutex, hBuffer->getMutex()); - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_BUFFER_MAP); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto pDst = ur_cast(hBuffer->mapHostPtr( mapFlags, offset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -649,14 +612,19 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( if (!memoryMigrated && waitList.second) { // If memory was not migrated, we need to wait on the events here. ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler->commandList.get(), waitList.second, waitList.first)); + (handler.commandList.get(), waitList.second, waitList.first)); if (signalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); + (handler.commandList.get(), signalEvent->getZeEvent())); } } - return finalizeHandler(handler, blockingMap); + if (blockingMap) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( @@ -666,51 +634,48 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_UNMAP); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); // TODO: currently unmapHostPtr deallocates memory immediately, // since the memory might be used by the user, we need to make sure // all dependencies are completed. ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler->commandList.get(), waitList.second, waitList.first)); + (handler.commandList.get(), waitList.second, waitList.first)); bool memoryMigrated = false; hMem->unmapHostPtr(pMappedPtr, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; }); if (signalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); + (handler.commandList.get(), signalEvent->getZeEvent())); } - return finalizeHandler(handler); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( ur_mem_handle_t dst, size_t offset, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType) { - auto handler = getCommandListHandlerForFill(patternSize); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent, commandType); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto pDst = ur_cast(dst->getDevicePtr( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, offset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -726,11 +691,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( // PatternSize must be a power of two for zeCommandListAppendMemoryFill. // When it's not, the fill is emulated with zeCommandListAppendMemoryCopy. + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryFill, - (handler->commandList.get(), pDst, pPattern, patternSize, size, - signalEvent->getZeEvent(), waitList.second, waitList.first)); + (handler.commandList.get(), pDst, pPattern, patternSize, size, + zeSignalEvent, waitList.second, waitList.first)); - return finalizeHandler(handler); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill( @@ -744,7 +710,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill( ur_usm_handle_t_ dstHandle(hContext, size, pMem); return enqueueGenericFillUnlocked(&dstHandle, 0, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, - phEvent); + phEvent, UR_COMMAND_USM_FILL); } ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( @@ -756,17 +722,22 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + getWaitListView(phEventWaitList, numEventsInWaitList); + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), pDst, pSrc, size, - signalEvent->getZeEvent(), numWaitEvents, pWaitEvents)); + (handler.commandList.get(), pDst, pSrc, size, zeSignalEvent, + numWaitEvents, pWaitEvents)); - return finalizeHandler(handler, blocking); + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch( @@ -779,23 +750,25 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH); auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + getWaitListView(phEventWaitList, numEventsInWaitList); if (pWaitEvents) { - ZE2UR_CALL(zeCommandListAppendBarrier, (handler->commandList.get(), nullptr, + ZE2UR_CALL(zeCommandListAppendBarrier, (handler.commandList.get(), nullptr, numWaitEvents, pWaitEvents)); } // TODO: figure out how to translate "flags" ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, - (handler->commandList.get(), pMem, size)); - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); + (handler.commandList.get(), pMem, size)); + + if (signalEvent) { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (handler.commandList.get(), signalEvent->getZeEvent())); + } - return finalizeHandler(handler); + return UR_RESULT_SUCCESS; } ur_result_t @@ -810,24 +783,26 @@ ur_queue_immediate_in_order_t::enqueueUSMAdvise(const void *pMem, size_t size, auto zeAdvice = ur_cast(advice); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE); - auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0, handler); + auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0); if (pWaitEvents) { - ZE2UR_CALL(zeCommandListAppendBarrier, (handler->commandList.get(), nullptr, + ZE2UR_CALL(zeCommandListAppendBarrier, (handler.commandList.get(), nullptr, numWaitEvents, pWaitEvents)); } // TODO: figure out how to translate "flags" ZE2UR_CALL(zeCommandListAppendMemAdvise, - (handler->commandList.get(), this->hDevice->ZeDevice, pMem, size, + (handler.commandList.get(), this->hDevice->ZeDevice, pMem, size, zeAdvice)); - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); - return finalizeHandler(handler); + if (signalEvent) { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (handler.commandList.get(), signalEvent->getZeEvent())); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill2D( @@ -1005,15 +980,57 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp( const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hKernel; - std::ignore = workDim; - std::ignore = pGlobalWorkOffset; - std::ignore = pGlobalWorkSize; - std::ignore = pLocalWorkSize; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + TRACK_SCOPE_LATENCY( + "ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp"); + + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hKernel->getProgramHandle(), UR_RESULT_ERROR_INVALID_NULL_POINTER); + + UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + + ze_kernel_handle_t hZeKernel = hKernel->getZeHandle(hDevice); + + std::scoped_lock Lock(this->Mutex, + hKernel->Mutex); + + ze_group_count_t zeThreadGroupDimensions{1, 1, 1}; + uint32_t WG[3]{}; + UR_CALL(calculateKernelWorkDimensions(hZeKernel, hDevice, + zeThreadGroupDimensions, WG, workDim, + pGlobalWorkSize, pLocalWorkSize)); + + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH); + + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); + + bool memoryMigrated = false; + auto memoryMigrate = [&](void *src, void *dst, size_t size) { + ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, + (handler.commandList.get(), dst, src, size, nullptr, + waitList.second, waitList.first)); + memoryMigrated = true; + }; + + UR_CALL(hKernel->prepareForSubmission(hContext, hDevice, pGlobalWorkOffset, + workDim, WG[0], WG[1], WG[2], + memoryMigrate)); + + if (memoryMigrated) { + // If memory was migrated, we don't need to pass the wait list to + // the copy command again. + waitList.first = nullptr; + waitList.second = 0; + } + + TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::" + "zeCommandListAppendLaunchCooperativeKernel"); + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; + ZE2UR_CALL(zeCommandListAppendLaunchCooperativeKernel, + (handler.commandList.get(), hZeKernel, &zeThreadGroupDimensions, + zeSignalEvent, waitList.second, waitList.first)); + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( @@ -1024,24 +1041,31 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = + getSignalEvent(phEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP); if (!signalEvent) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + getWaitListView(phEventWaitList, numEventsInWaitList); signalEvent->recordStartTimestamp(); + auto [timestampPtr, zeSignalEvent] = + signalEvent->getEventEndTimestampAndHandle(); + ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, - (handler->commandList.get(), - signalEvent->getEventEndTimestampPtr(), signalEvent->getZeEvent(), + (handler.commandList.get(), timestampPtr, zeSignalEvent, numWaitEvents, pWaitEvents)); - return finalizeHandler(handler, blocking); + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp( diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 5b320fe2e8..33e060ded3 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -26,20 +26,12 @@ using queue_group_type = ur_device_handle_t_::queue_group_info_t::type; struct ur_command_list_handler_t { ur_command_list_handler_t(ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProps, - queue_group_type type, event_pool *eventPool); + const ur_queue_properties_t *pProps); ur_command_list_handler_t(ze_command_list_handle_t hZeCommandList, - event_pool *eventPool, bool ownZeHandle); + bool ownZeHandle); raii::command_list_unique_handle commandList; - std::unique_ptr> - internalEvent; - - // TODO: do we need to keep ref count of this for user events? - // For counter based events, we can reuse them safely and l0 event pool - // cannot be destroyed before the queue is released. - ur_event_handle_t lastEvent = nullptr; }; struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { @@ -50,43 +42,40 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { raii::cache_borrowed_event_pool eventPool; - ur_command_list_handler_t copyHandler; - ur_command_list_handler_t computeHandler; - ur_command_list_handler_t *lastHandler = nullptr; + ur_command_list_handler_t handler; std::vector waitList; - std::pair - getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents, - ur_command_list_handler_t *pHandler); + std::vector deferredEvents; - ur_command_list_handler_t *getCommandListHandlerForCompute(); - ur_command_list_handler_t *getCommandListHandlerForCopy(); - ur_command_list_handler_t *getCommandListHandlerForFill(size_t patternSize); + std::pair + getWaitListView(const ur_event_handle_t *phWaitEvents, + uint32_t numWaitEvents); - ur_event_handle_t getSignalEvent(ur_command_list_handler_t *handler, - ur_event_handle_t *hUserEvent); + ur_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent, + ur_command_t commandType); - ur_result_t finalizeHandler(ur_command_list_handler_t *handler); - ur_result_t finalizeHandler(ur_command_list_handler_t *handler, - bool blocking); + void deferEventFree(ur_event_handle_t hEvent) override; ur_result_t enqueueRegionCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType); ur_result_t enqueueGenericCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType); ur_result_t enqueueGenericFillUnlocked( ur_mem_handle_t hBuffer, size_t offset, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType); public: ur_queue_immediate_in_order_t(ur_context_handle_t, ur_device_handle_t, @@ -119,6 +108,10 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override; + ur_result_t enqueueEventsWaitWithBarrierExt( + const ur_exp_enqueue_ext_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, diff --git a/source/adapters/level_zero/v2/usm.cpp b/source/adapters/level_zero/v2/usm.cpp index f23a6c6fe8..f31a2b5202 100644 --- a/source/adapters/level_zero/v2/usm.cpp +++ b/source/adapters/level_zero/v2/usm.cpp @@ -22,8 +22,12 @@ namespace umf { ur_result_t getProviderNativeError(const char *providerName, int32_t nativeError) { - if (strcmp(providerName, "Level Zero") == 0) { - return ze2urResult(static_cast(nativeError)); + if (strcmp(providerName, "LEVEL_ZERO") == 0) { + auto zeResult = static_cast(nativeError); + if (zeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(zeResult); } return UR_RESULT_ERROR_UNKNOWN; diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index dea28a4658..42c342444d 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -10592,6 +10592,70 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt +__urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_events_wait_with_barrier_ext_params_t params = { + &hQueue, &pProperties, &numEventsInWaitList, &phEventWaitList, + &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback( + "urEnqueueEventsWaitWithBarrierExt")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urEnqueueEventsWaitWithBarrierExt")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback( + "urEnqueueEventsWaitWithBarrierExt")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueNativeCommandExp __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( @@ -10996,6 +11060,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( pDdiTable->pfnWriteHostPipe = driver::urEnqueueWriteHostPipe; + pDdiTable->pfnEventsWaitWithBarrierExt = + driver::urEnqueueEventsWaitWithBarrierExt; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/adapters/native_cpu/common.hpp b/source/adapters/native_cpu/common.hpp index 2b4aabfbad..af0d11c5af 100644 --- a/source/adapters/native_cpu/common.hpp +++ b/source/adapters/native_cpu/common.hpp @@ -12,6 +12,7 @@ #include "logger/ur_logger.hpp" #include "ur/ur.hpp" +#include constexpr size_t MaxMessageSize = 256; @@ -70,3 +71,31 @@ template inline void decrementOrDelete(T *refC) { if (refC->decrementReferenceCount() == 0) delete refC; } + +inline uint64_t get_timestamp() { + return std::chrono::duration_cast( + std::chrono::high_resolution_clock::now().time_since_epoch()) + .count(); +} + +namespace native_cpu { + +inline void *aligned_malloc(size_t alignment, size_t size) { + void *ptr = nullptr; +#ifdef _MSC_VER + ptr = _aligned_malloc(size, alignment); +#else + ptr = std::aligned_alloc(alignment, size); +#endif + return ptr; +} + +inline void aligned_free(void *ptr) { +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} + +} // namespace native_cpu diff --git a/source/adapters/native_cpu/context.cpp b/source/adapters/native_cpu/context.cpp index 8efc61a024..7c178f951a 100644 --- a/source/adapters/native_cpu/context.cpp +++ b/source/adapters/native_cpu/context.cpp @@ -64,7 +64,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/native_cpu/context.hpp b/source/adapters/native_cpu/context.hpp index c59ab4eafb..b9d2d22dd1 100644 --- a/source/adapters/native_cpu/context.hpp +++ b/source/adapters/native_cpu/context.hpp @@ -64,17 +64,10 @@ static size_t get_padding(uint32_t alignment) { // allocation so that the pointer returned to the user // always satisfies (ptr % align) == 0. static inline void *malloc_impl(uint32_t alignment, size_t size) { - void *ptr = nullptr; assert(alignment >= alignof(usm_alloc_info) && "memory not aligned to usm_alloc_info"); -#ifdef _MSC_VER - ptr = _aligned_malloc(alloc_header_size + get_padding(alignment) + size, - alignment); - -#else - ptr = std::aligned_alloc(alignment, - alloc_header_size + get_padding(alignment) + size); -#endif + void *ptr = native_cpu::aligned_malloc( + alignment, alloc_header_size + get_padding(alignment) + size); return ptr; } @@ -100,11 +93,8 @@ struct ur_context_handle_t_ : RefCounted { const native_cpu::usm_alloc_info &info = native_cpu::get_alloc_info(ptr); UR_ASSERT(info.type != UR_USM_TYPE_UNKNOWN, UR_RESULT_ERROR_INVALID_MEM_OBJECT); -#ifdef _MSC_VER - _aligned_free(info.base_alloc_ptr); -#else - free(info.base_alloc_ptr); -#endif + + native_cpu::aligned_free(info.base_alloc_ptr); allocations.erase(ptr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index d744d6290b..b7c454315f 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -10,6 +10,7 @@ #include +#include "common.hpp" #include "platform.hpp" #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) @@ -247,7 +248,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(uint32_t{4}); case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: return ReturnValue(uint32_t{16}); - // Imported from level_zero case UR_DEVICE_INFO_USM_HOST_SUPPORT: case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: @@ -417,6 +417,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_USM_POOL_SUPPORT: return ReturnValue(false); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(false); default: DIE_NO_IMPLEMENTATION; } @@ -472,19 +474,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { - std::ignore = hDevice; // todo + std::ignore = hDevice; if (pHostTimestamp) { - using namespace std::chrono; - *pHostTimestamp = - duration_cast(steady_clock::now().time_since_epoch()) - .count(); + *pHostTimestamp = get_timestamp(); } if (pDeviceTimestamp) { - // todo: calculate elapsed time properly - using namespace std::chrono; - *pDeviceTimestamp = - duration_cast(steady_clock::now().time_since_epoch()) - .count(); + *pDeviceTimestamp = get_timestamp(); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp index 33d8c35c36..6e4094ddef 100644 --- a/source/adapters/native_cpu/enqueue.cpp +++ b/source/adapters/native_cpu/enqueue.cpp @@ -13,6 +13,7 @@ #include "ur_api.h" #include "common.hpp" +#include "event.hpp" #include "kernel.hpp" #include "memory.hpp" #include "queue.hpp" @@ -67,10 +68,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + urEventWait(numEventsInWaitList, phEventWaitList); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(pGlobalWorkOffset, UR_RESULT_ERROR_INVALID_NULL_POINTER); @@ -103,10 +102,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( } // TODO: add proper error checking - // TODO: add proper event dep management native_cpu::NDRDescT ndr(workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize); - auto &tp = hQueue->device->tp; + auto &tp = hQueue->getDevice()->tp; const size_t numParallelThreads = tp.num_threads(); hKernel->updateMemPool(numParallelThreads); std::vector> futures; @@ -118,6 +116,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ndr.GlobalSize[2], ndr.LocalSize[0], ndr.LocalSize[1], ndr.LocalSize[2], ndr.GlobalOffset[0], ndr.GlobalOffset[1], ndr.GlobalOffset[2]); + auto event = new ur_event_handle_t_(hQueue, UR_COMMAND_KERNEL_LAUNCH); + event->tick_start(); + #ifndef NATIVECPU_USE_OCK hKernel->handleLocalArgs(1, 0); for (unsigned g2 = 0; g2 < numWG2; g2++) { @@ -127,7 +128,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (unsigned local1 = 0; local1 < ndr.LocalSize[1]; local1++) { for (unsigned local0 = 0; local0 < ndr.LocalSize[0]; local0++) { state.update(g0, g1, g2, local0, local1, local2); - hKernel->_subhandler(hKernel->_args.data(), &state); + hKernel->_subhandler(hKernel->getArgs().data(), &state); } } } @@ -158,13 +159,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (unsigned g2 = 0; g2 < numWG2; g2++) { for (unsigned g1 = 0; g1 < numWG1; g1++) { for (unsigned g0 = 0; g0 < new_num_work_groups_0; g0 += 1) { - futures.emplace_back( - tp.schedule_task([&ndr = std::as_const(ndr), itemsPerThread, - hKernel, g0, g1, g2](size_t) { + futures.emplace_back(tp.schedule_task( + [ndr, itemsPerThread, kernel = *hKernel, g0, g1, g2](size_t) { native_cpu::state resized_state = getResizedState(ndr, itemsPerThread); resized_state.update(g0, g1, g2); - hKernel->_subhandler(hKernel->_args.data(), &resized_state); + kernel._subhandler(kernel.getArgs().data(), &resized_state); })); } // Peel the remaining work items. Since the local size is 1, we iterate @@ -172,7 +172,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (unsigned g0 = new_num_work_groups_0 * itemsPerThread; g0 < numWG0; g0++) { state.update(g0, g1, g2); - hKernel->_subhandler(hKernel->_args.data(), &state); + hKernel->_subhandler(hKernel->getArgs().data(), &state); } } } @@ -190,7 +190,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (unsigned g0 = 0; g0 < numWG0; g0++) { kernel.handleLocalArgs(numParallelThreads, threadId); state.update(g0, g1, g2); - kernel._subhandler(kernel._args.data(), &state); + kernel._subhandler(kernel.getArgs().data(), &state); } })); } @@ -207,7 +207,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( size_t threadId, ur_kernel_handle_t_ kernel) mutable { kernel.handleLocalArgs(numParallelThreads, threadId); state.update(g0, g1, g2); - kernel._subhandler(kernel._args.data(), &state); + kernel._subhandler(kernel.getArgs().data(), &state); }); } } @@ -216,11 +216,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( auto groupsPerThread = numGroups / numParallelThreads; auto remainder = numGroups % numParallelThreads; for (unsigned thread = 0; thread < numParallelThreads; thread++) { - futures.emplace_back(tp.schedule_task( - [&groups, thread, groupsPerThread, hKernel](size_t threadId) { + futures.emplace_back( + tp.schedule_task([groups, thread, groupsPerThread, + kernel = *hKernel](size_t threadId) { for (unsigned i = 0; i < groupsPerThread; i++) { auto index = thread * groupsPerThread + i; - groups[index](threadId, *hKernel); + groups[index](threadId, kernel); } })); } @@ -228,100 +229,141 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // schedule the remaining tasks if (remainder) { futures.emplace_back( - tp.schedule_task([&groups, remainder, + tp.schedule_task([groups, remainder, scheduled = numParallelThreads * groupsPerThread, - hKernel](size_t threadId) { + kernel = *hKernel](size_t threadId) { for (unsigned i = 0; i < remainder; i++) { auto index = scheduled + i; - groups[index](threadId, *hKernel); + groups[index](threadId, kernel); } })); } } } - for (auto &f : futures) - f.get(); #endif // NATIVECPU_USE_OCK - // TODO: we should avoid calling clear here by avoiding using push_back - // in setKernelArgs. - hKernel->_args.clear(); - hKernel->_localArgInfo.clear(); + event->set_futures(futures); + + *phEvent = event; + event->set_callback([hKernel, event]() { + event->tick_end(); + // TODO: avoid calling clear() here. + hKernel->_localArgInfo.clear(); + }); + + if (hQueue->isInOrder()) { + urEventWait(1, phEvent); + } + return UR_RESULT_SUCCESS; } +ur_result_t withTimingEvent(ur_command_t command_type, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, + const std::function &f) { + urEventWait(numEventsInWaitList, phEventWaitList); + ur_event_handle_t event; + if (phEvent) { + event = new ur_event_handle_t_(hQueue, command_type); + event->tick_start(); + } + + ur_result_t result = f(); + + if (phEvent) { + event->tick_end(); + *phEvent = event; + } + return result; +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - DIE_NO_IMPLEMENTATION; + // TODO: the wait here should be async + return withTimingEvent(UR_COMMAND_EVENTS_WAIT, hQueue, numEventsInWaitList, + phEventWaitList, phEvent, + [&]() { return UR_RESULT_SUCCESS; }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + return withTimingEvent(UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, hQueue, + numEventsInWaitList, phEventWaitList, phEvent, + [&]() { return UR_RESULT_SUCCESS; }); +} - DIE_NO_IMPLEMENTATION; +UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, + phEventWaitList, phEvent); } template static inline ur_result_t enqueueMemBufferReadWriteRect_impl( - ur_queue_handle_t, ur_mem_handle_t Buff, bool, + ur_queue_handle_t hQueue, ur_mem_handle_t Buff, bool, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t region, size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, size_t HostSlicePitch, typename std::conditional::type DstMem, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - // TODO: events, blocking, check other constraints, performance optimizations - // More sharing with level_zero where possible - - if (BufferRowPitch == 0) - BufferRowPitch = region.width; - if (BufferSlicePitch == 0) - BufferSlicePitch = BufferRowPitch * region.height; - if (HostRowPitch == 0) - HostRowPitch = region.width; - if (HostSlicePitch == 0) - HostSlicePitch = HostRowPitch * region.height; - for (size_t w = 0; w < region.width; w++) - for (size_t h = 0; h < region.height; h++) - for (size_t d = 0; d < region.depth; d++) { - size_t buff_orign = (d + BufferOffset.z) * BufferSlicePitch + - (h + BufferOffset.y) * BufferRowPitch + w + - BufferOffset.x; - size_t host_origin = (d + HostOffset.z) * HostSlicePitch + - (h + HostOffset.y) * HostRowPitch + w + - HostOffset.x; - int8_t &buff_mem = ur_cast(Buff->_mem)[buff_orign]; - if constexpr (IsRead) - ur_cast(DstMem)[host_origin] = buff_mem; - else - buff_mem = ur_cast(DstMem)[host_origin]; - } - return UR_RESULT_SUCCESS; + uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + ur_command_t command_t; + if constexpr (IsRead) + command_t = UR_COMMAND_MEM_BUFFER_READ_RECT; + else + command_t = UR_COMMAND_MEM_BUFFER_WRITE_RECT; + return withTimingEvent( + command_t, hQueue, NumEventsInWaitList, phEventWaitList, phEvent, [&]() { + // TODO: blocking, check other constraints, performance optimizations + // More sharing with level_zero where possible + + if (BufferRowPitch == 0) + BufferRowPitch = region.width; + if (BufferSlicePitch == 0) + BufferSlicePitch = BufferRowPitch * region.height; + if (HostRowPitch == 0) + HostRowPitch = region.width; + if (HostSlicePitch == 0) + HostSlicePitch = HostRowPitch * region.height; + for (size_t w = 0; w < region.width; w++) + for (size_t h = 0; h < region.height; h++) + for (size_t d = 0; d < region.depth; d++) { + size_t buff_orign = (d + BufferOffset.z) * BufferSlicePitch + + (h + BufferOffset.y) * BufferRowPitch + w + + BufferOffset.x; + size_t host_origin = (d + HostOffset.z) * HostSlicePitch + + (h + HostOffset.y) * HostRowPitch + w + + HostOffset.x; + int8_t &buff_mem = ur_cast(Buff->_mem)[buff_orign]; + if constexpr (IsRead) + ur_cast(DstMem)[host_origin] = buff_mem; + else + buff_mem = ur_cast(DstMem)[host_origin]; + } + + return UR_RESULT_SUCCESS; + }); } static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr, const void *SrcPtr, size_t Size, uint32_t numEventsInWaitList, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *Event) { - // todo: non-blocking, events, UR integration - std::ignore = EventWaitList; - std::ignore = Event; - std::ignore = hQueue; - std::ignore = numEventsInWaitList; - if (SrcPtr != DstPtr && Size) - memmove(DstPtr, SrcPtr, Size); - return UR_RESULT_SUCCESS; + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, + ur_command_t command_type) { + return withTimingEvent(command_type, hQueue, numEventsInWaitList, + phEventWaitList, phEvent, [&]() { + if (SrcPtr != DstPtr && Size) + memmove(DstPtr, SrcPtr, Size); + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( @@ -331,8 +373,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( std::ignore = blockingRead; void *FromPtr = /*Src*/ hBuffer->_mem + offset; - return doCopy_impl(hQueue, pDst, FromPtr, size, numEventsInWaitList, - phEventWaitList, phEvent); + auto res = doCopy_impl(hQueue, pDst, FromPtr, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_READ); + return res; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( @@ -342,8 +385,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( std::ignore = blockingWrite; void *ToPtr = hBuffer->_mem + offset; - return doCopy_impl(hQueue, ToPtr, pSrc, size, numEventsInWaitList, - phEventWaitList, phEvent); + auto res = doCopy_impl(hQueue, ToPtr, pSrc, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE); + return res; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( @@ -377,10 +421,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + urEventWait(numEventsInWaitList, phEventWaitList); const void *SrcPtr = hBufferSrc->_mem + srcOffset; void *DstPtr = hBufferDst->_mem + dstOffset; return doCopy_impl(hQueue, DstPtr, SrcPtr, size, numEventsInWaitList, - phEventWaitList, phEvent); + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( @@ -402,22 +447,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( size_t patternSize, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - - // TODO: error checking - // TODO: handle async - void *startingPtr = hBuffer->_mem + offset; - unsigned steps = size / patternSize; - for (unsigned i = 0; i < steps; i++) { - memcpy(static_cast(startingPtr) + i * patternSize, pPattern, - patternSize); - } + return withTimingEvent( + UR_COMMAND_MEM_BUFFER_FILL, hQueue, numEventsInWaitList, phEventWaitList, + phEvent, [&]() { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + // TODO: error checking + // TODO: handle async + void *startingPtr = hBuffer->_mem + offset; + unsigned steps = size / patternSize; + for (unsigned i = 0; i < steps; i++) { + memcpy(static_cast(startingPtr) + i * patternSize, pPattern, + patternSize); + } - return UR_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( @@ -484,106 +530,100 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, void **ppRetMap) { - std::ignore = hQueue; std::ignore = blockingMap; std::ignore = mapFlags; std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - *ppRetMap = hBuffer->_mem + offset; - - return UR_RESULT_SUCCESS; + return withTimingEvent(UR_COMMAND_MEM_BUFFER_MAP, hQueue, numEventsInWaitList, + phEventWaitList, phEvent, [&]() { + *ppRetMap = hBuffer->_mem + offset; + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; std::ignore = hMem; std::ignore = pMappedPtr; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - - return UR_RESULT_SUCCESS; + return withTimingEvent(UR_COMMAND_MEM_UNMAP, hQueue, numEventsInWaitList, + phEventWaitList, phEvent, + [&]() { return UR_RESULT_SUCCESS; }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, void *ptr, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - - UR_ASSERT(ptr, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(patternSize != 0, UR_RESULT_ERROR_INVALID_SIZE) - UR_ASSERT(size != 0, UR_RESULT_ERROR_INVALID_SIZE) - UR_ASSERT(patternSize < size, UR_RESULT_ERROR_INVALID_SIZE) - UR_ASSERT(size % patternSize == 0, UR_RESULT_ERROR_INVALID_SIZE) - // TODO: add check for allocation size once the query is supported - - switch (patternSize) { - case 1: - memset(ptr, *static_cast(pPattern), size * patternSize); - break; - case 2: { - const auto pattern = *static_cast(pPattern); - auto *start = reinterpret_cast(ptr); - auto *end = - reinterpret_cast(reinterpret_cast(ptr) + size); - std::fill(start, end, pattern); - break; - } - case 4: { - const auto pattern = *static_cast(pPattern); - auto *start = reinterpret_cast(ptr); - auto *end = - reinterpret_cast(reinterpret_cast(ptr) + size); - std::fill(start, end, pattern); - break; - } - case 8: { - const auto pattern = *static_cast(pPattern); - auto *start = reinterpret_cast(ptr); - auto *end = - reinterpret_cast(reinterpret_cast(ptr) + size); - std::fill(start, end, pattern); - break; - } - default: { - for (unsigned int step{0}; step < size; step += patternSize) { - auto *dest = - reinterpret_cast(reinterpret_cast(ptr) + step); - memcpy(dest, pPattern, patternSize); - } - } - } - return UR_RESULT_SUCCESS; + return withTimingEvent( + UR_COMMAND_USM_FILL, hQueue, numEventsInWaitList, phEventWaitList, + phEvent, [&]() { + UR_ASSERT(ptr, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(patternSize != 0, UR_RESULT_ERROR_INVALID_SIZE) + UR_ASSERT(size != 0, UR_RESULT_ERROR_INVALID_SIZE) + UR_ASSERT(patternSize < size, UR_RESULT_ERROR_INVALID_SIZE) + UR_ASSERT(size % patternSize == 0, UR_RESULT_ERROR_INVALID_SIZE) + // TODO: add check for allocation size once the query is supported + + switch (patternSize) { + case 1: + memset(ptr, *static_cast(pPattern), + size * patternSize); + break; + case 2: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = reinterpret_cast( + reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + case 4: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = reinterpret_cast( + reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + case 8: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = reinterpret_cast( + reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + default: { + for (unsigned int step{0}; step < size; step += patternSize) { + auto *dest = reinterpret_cast( + reinterpret_cast(ptr) + step); + memcpy(dest, pPattern, patternSize); + } + } + } + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; std::ignore = blocking; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + return withTimingEvent( + UR_COMMAND_USM_MEMCPY, hQueue, numEventsInWaitList, phEventWaitList, + phEvent, [&]() { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_QUEUE); + UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_QUEUE); - UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); + memcpy(pDst, pSrc, size); - memcpy(pDst, pSrc, size); - - return UR_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( diff --git a/source/adapters/native_cpu/event.cpp b/source/adapters/native_cpu/event.cpp index 9049e3c1b6..37eaf1f6d1 100644 --- a/source/adapters/native_cpu/event.cpp +++ b/source/adapters/native_cpu/event.cpp @@ -11,50 +11,70 @@ #include "ur_api.h" #include "common.hpp" +#include "event.hpp" +#include "queue.hpp" +#include +#include UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - std::ignore = hEvent; - std::ignore = propName; - std::ignore = propSize; - std::ignore = pPropValue; - std::ignore = pPropSizeRet; - - DIE_NO_IMPLEMENTATION; + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_EVENT_INFO_COMMAND_QUEUE: + return ReturnValue(hEvent->getQueue()); + case UR_EVENT_INFO_COMMAND_TYPE: + return ReturnValue(hEvent->getCommandType()); + case UR_EVENT_INFO_REFERENCE_COUNT: + return ReturnValue(hEvent->getReferenceCount()); + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: + return ReturnValue(hEvent->getExecutionStatus()); + case UR_EVENT_INFO_CONTEXT: + return ReturnValue(hEvent->getContext()); + default: + break; + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; } UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - std::ignore = hEvent; - std::ignore = propName; - std::ignore = propSize; - std::ignore = pPropValue; - std::ignore = pPropSizeRet; - - DIE_NO_IMPLEMENTATION; + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_PROFILING_INFO_COMMAND_START: + return ReturnValue(hEvent->get_start_timestamp()); + case UR_PROFILING_INFO_COMMAND_END: + return ReturnValue(hEvent->get_end_timestamp()); + case UR_PROFILING_INFO_COMMAND_QUEUED: + case UR_PROFILING_INFO_COMMAND_SUBMIT: + case UR_PROFILING_INFO_COMMAND_COMPLETE: + default: + break; + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; } UR_APIEXPORT ur_result_t UR_APICALL urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { - std::ignore = numEvents; - std::ignore = phEventWaitList; - // TODO: currently we do everything synchronously so this is a no-op + for (uint32_t i = 0; i < numEvents; i++) { + phEventWaitList[i]->wait(); + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - std::ignore = hEvent; - - DIE_NO_IMPLEMENTATION; + hEvent->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { - std::ignore = hEvent; - DIE_NO_IMPLEMENTATION; + decrementOrDelete(hEvent); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( @@ -99,3 +119,47 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( DIE_NO_IMPLEMENTATION; } + +ur_event_handle_t_::ur_event_handle_t_(ur_queue_handle_t queue, + ur_command_t command_type) + : queue(queue), context(queue->getContext()), command_type(command_type), + done(false) { + this->queue->addEvent(this); +} + +ur_event_handle_t_::~ur_event_handle_t_() { + if (!done) { + wait(); + } +} + +void ur_event_handle_t_::wait() { + std::unique_lock lock(mutex); + if (done) { + return; + } + for (auto &f : futures) { + f.wait(); + } + queue->removeEvent(this); + done = true; + // The callback may need to acquire the lock, so we unlock it here + lock.unlock(); + + if (callback) + callback(); +} + +void ur_event_handle_t_::tick_start() { + if (!queue->isProfiling()) + return; + std::lock_guard lock(mutex); + timestamp_start = get_timestamp(); +} + +void ur_event_handle_t_::tick_end() { + if (!queue->isProfiling()) + return; + std::lock_guard lock(mutex); + timestamp_end = get_timestamp(); +} diff --git a/source/adapters/native_cpu/event.hpp b/source/adapters/native_cpu/event.hpp new file mode 100644 index 0000000000..60176a33a6 --- /dev/null +++ b/source/adapters/native_cpu/event.hpp @@ -0,0 +1,66 @@ +//===----------- event.hpp - Native CPU Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once +#include "common.hpp" +#include "ur_api.h" +#include +#include +#include +#include + +struct ur_event_handle_t_ : RefCounted { + + ur_event_handle_t_(ur_queue_handle_t queue, ur_command_t command_type); + + ~ur_event_handle_t_(); + + void set_callback(const std::function &cb) { callback = cb; } + + void wait(); + + uint32_t getExecutionStatus() { + // TODO: add support for UR_EVENT_STATUS_RUNNING + std::lock_guard lock(mutex); + if (done) { + return UR_EVENT_STATUS_COMPLETE; + } + return UR_EVENT_STATUS_SUBMITTED; + } + + ur_queue_handle_t getQueue() const { return queue; } + + ur_context_handle_t getContext() const { return context; } + + ur_command_t getCommandType() const { return command_type; } + + void set_futures(std::vector> &fs) { + std::lock_guard lock(mutex); + futures = std::move(fs); + } + + void tick_start(); + + void tick_end(); + + uint64_t get_start_timestamp() const { return timestamp_start; } + + uint64_t get_end_timestamp() const { return timestamp_end; } + +private: + ur_queue_handle_t queue; + ur_context_handle_t context; + ur_command_t command_type; + bool done; + std::mutex mutex; + std::vector> futures; + std::function callback; + uint64_t timestamp_start = 0; + uint64_t timestamp_end = 0; +}; diff --git a/source/adapters/native_cpu/kernel.cpp b/source/adapters/native_cpu/kernel.cpp index af8906245c..596a3ffdf1 100644 --- a/source/adapters/native_cpu/kernel.cpp +++ b/source/adapters/native_cpu/kernel.cpp @@ -59,18 +59,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_value_properties_t *pProperties, const void *pArgValue) { - // Todo: error checking - // Todo: I think that the opencl spec (and therefore the pi spec mandates that - // arg is copied (this is why it is defined as const void*, I guess we should - // do it - // TODO: can args arrive out of order? + // TODO: error checking std::ignore = argIndex; std::ignore = pProperties; UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(argSize, UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE); - hKernel->_args.emplace_back(const_cast(pArgValue)); + hKernel->addArg(pArgValue, argIndex, argSize); return UR_RESULT_SUCCESS; } @@ -81,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( std::ignore = pProperties; // emplace a placeholder kernel arg, gets replaced with a pointer to the // memory pool before enqueueing the kernel. - hKernel->_args.emplace_back(nullptr); + hKernel->addPtrArg(nullptr, argIndex); hKernel->_localArgInfo.emplace_back(argIndex, argSize); return UR_RESULT_SUCCESS; } @@ -221,14 +217,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_pointer_properties_t *pProperties, const void *pArgValue) { - // TODO: out_of_order args? std::ignore = argIndex; std::ignore = pProperties; UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(pArgValue, UR_RESULT_ERROR_INVALID_NULL_POINTER); - hKernel->_args.push_back(const_cast(pArgValue)); + hKernel->addPtrArg(const_cast(pArgValue), argIndex); return UR_RESULT_SUCCESS; } @@ -262,7 +257,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *pProperties, ur_mem_handle_t hArgValue) { - // TODO: out_of_order args? std::ignore = argIndex; std::ignore = pProperties; @@ -271,11 +265,11 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, // Taken from ur/adapters/cuda/kernel.cpp // zero-sized buffers are expected to be null. if (hArgValue == nullptr) { - hKernel->_args.emplace_back(nullptr); + hKernel->addPtrArg(nullptr, argIndex); return UR_RESULT_SUCCESS; } - hKernel->_args.emplace_back(hArgValue->_mem); + hKernel->addPtrArg(hArgValue->_mem, argIndex); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/kernel.hpp b/source/adapters/native_cpu/kernel.hpp index 084a0ee695..e2df672d05 100644 --- a/source/adapters/native_cpu/kernel.hpp +++ b/source/adapters/native_cpu/kernel.hpp @@ -11,22 +11,11 @@ #include "common.hpp" #include "nativecpu_state.hpp" #include "program.hpp" -#include +#include #include #include -namespace native_cpu { - -struct NativeCPUArgDesc { - void *MPtr; - - NativeCPUArgDesc(void *Ptr) : MPtr(Ptr){}; -}; - -} // namespace native_cpu - -using nativecpu_kernel_t = void(const native_cpu::NativeCPUArgDesc *, - native_cpu::state *); +using nativecpu_kernel_t = void(void *const *, native_cpu::state *); using nativecpu_ptr_t = nativecpu_kernel_t *; using nativecpu_task_t = std::function; @@ -44,9 +33,9 @@ struct ur_kernel_handle_t_ : RefCounted { : hProgram(hProgram), _name{name}, _subhandler{std::move(subhandler)} {} ur_kernel_handle_t_(const ur_kernel_handle_t_ &other) - : hProgram(other.hProgram), _name(other._name), - _subhandler(other._subhandler), _args(other._args), - _localArgInfo(other._localArgInfo), _localMemPool(other._localMemPool), + : Args(other.Args), hProgram(other.hProgram), _name(other._name), + _subhandler(other._subhandler), _localArgInfo(other._localArgInfo), + _localMemPool(other._localMemPool), _localMemPoolSize(other._localMemPoolSize), ReqdWGSize(other.ReqdWGSize) { incrementReferenceCount(); @@ -55,8 +44,10 @@ struct ur_kernel_handle_t_ : RefCounted { ~ur_kernel_handle_t_() { if (decrementReferenceCount() == 0) { free(_localMemPool); + Args.deallocate(); } } + ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *name, nativecpu_task_t subhandler, std::optional ReqdWGSize, @@ -66,10 +57,67 @@ struct ur_kernel_handle_t_ : RefCounted { ReqdWGSize(ReqdWGSize), MaxWGSize(MaxWGSize), MaxLinearWGSize(MaxLinearWGSize) {} + struct arguments { + using args_index_t = std::vector; + args_index_t Indices; + std::vector ParamSizes; + std::vector OwnsMem; + static constexpr size_t MaxAlign = 16 * sizeof(double); + + /// Add an argument to the kernel. + /// If the argument existed before, it is replaced. + /// Otherwise, it is added. + /// Gaps are filled with empty arguments. + /// Implicit offset argument is kept at the back of the indices collection. + void addArg(size_t Index, size_t Size, const void *Arg) { + if (Index + 1 > Indices.size()) { + Indices.resize(Index + 1); + OwnsMem.resize(Index + 1); + ParamSizes.resize(Index + 1); + + // Update the stored value for the argument + Indices[Index] = native_cpu::aligned_malloc(MaxAlign, Size); + OwnsMem[Index] = true; + ParamSizes[Index] = Size; + } else { + if (ParamSizes[Index] != Size) { + Indices[Index] = realloc(Indices[Index], Size); + ParamSizes[Index] = Size; + } + } + std::memcpy(Indices[Index], Arg, Size); + } + + void addPtrArg(size_t Index, void *Arg) { + if (Index + 1 > Indices.size()) { + Indices.resize(Index + 1); + OwnsMem.resize(Index + 1); + ParamSizes.resize(Index + 1); + + OwnsMem[Index] = false; + ParamSizes[Index] = sizeof(uint8_t *); + } + Indices[Index] = Arg; + } + + // This is called by the destructor of ur_kernel_handle_t_, since + // ur_kernel_handle_t_ implements reference counting and we want + // to deallocate only when the reference count is 0. + void deallocate() { + assert(OwnsMem.size() == Indices.size() && "Size mismatch"); + for (size_t Index = 0; Index < Indices.size(); Index++) { + if (OwnsMem[Index]) + native_cpu::aligned_free(Indices[Index]); + } + } + + const args_index_t &getIndices() const noexcept { return Indices; } + + } Args; + ur_program_handle_t hProgram; std::string _name; nativecpu_task_t _subhandler; - std::vector _args; std::vector _localArgInfo; std::optional getReqdWGSize() const { @@ -99,13 +147,21 @@ struct ur_kernel_handle_t_ : RefCounted { // For each local argument we have size*numthreads size_t offset = 0; for (auto &entry : _localArgInfo) { - _args[entry.argIndex].MPtr = + Args.Indices[entry.argIndex] = _localMemPool + offset + (entry.argSize * threadId); // update offset in the memory pool offset += entry.argSize * numParallelThread; } } + const std::vector &getArgs() const { return Args.getIndices(); } + + void addArg(const void *Ptr, size_t Index, size_t Size) { + Args.addArg(Index, Size, Ptr); + } + + void addPtrArg(void *Ptr, size_t Index) { Args.addPtrArg(Index, Ptr); } + private: char *_localMemPool = nullptr; size_t _localMemPoolSize = 0; diff --git a/source/adapters/native_cpu/queue.cpp b/source/adapters/native_cpu/queue.cpp index 7ee1fdf04c..e2dda24236 100644 --- a/source/adapters/native_cpu/queue.cpp +++ b/source/adapters/native_cpu/queue.cpp @@ -31,11 +31,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { - std::ignore = hContext; - std::ignore = hDevice; - std::ignore = pProperties; + // TODO: UR_QUEUE_FLAG_PROFILING_ENABLE and other props - auto Queue = new ur_queue_handle_t_(hDevice); + auto Queue = new ur_queue_handle_t_(hDevice, hContext, pProperties); *phQueue = Queue; return UR_RESULT_SUCCESS; @@ -78,8 +76,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - std::ignore = hQueue; - // TODO: is this fine as no-op? + hQueue->finish(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/queue.hpp b/source/adapters/native_cpu/queue.hpp index 8c34af6327..05ff78d822 100644 --- a/source/adapters/native_cpu/queue.hpp +++ b/source/adapters/native_cpu/queue.hpp @@ -9,10 +9,48 @@ //===----------------------------------------------------------------------===// #pragma once #include "common.hpp" -#include "device.hpp" +#include "event.hpp" +#include "ur_api.h" +#include struct ur_queue_handle_t_ : RefCounted { - ur_device_handle_t_ *const device; + ur_queue_handle_t_(ur_device_handle_t device, ur_context_handle_t context, + const ur_queue_properties_t *pProps) + : device(device), context(context), + inOrder(pProps ? !(pProps->flags & + UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) + : true), + profilingEnabled(pProps ? pProps->flags & UR_QUEUE_FLAG_PROFILING_ENABLE + : false) {} - ur_queue_handle_t_(ur_device_handle_t_ *device) : device(device) {} + ur_device_handle_t getDevice() const { return device; } + + ur_context_handle_t getContext() const { return context; } + + void addEvent(ur_event_handle_t event) { events.insert(event); } + + void removeEvent(ur_event_handle_t event) { events.erase(event); } + + void finish() { + while (!events.empty()) { + auto ev = *events.begin(); + // ur_event_handle_t_::wait removes itself from the events set in the + // queue + ev->wait(); + } + events.clear(); + } + + ~ur_queue_handle_t_() { finish(); } + + bool isInOrder() const { return inOrder; } + + bool isProfiling() const { return profilingEnabled; } + +private: + ur_device_handle_t device; + ur_context_handle_t context; + std::set events; + const bool inOrder; + const bool profilingEnabled; }; diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index e794c308f9..bf81f6bdaf 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -24,7 +24,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { auto handle = LoadLibraryA("OpenCL.dll"); #define CL_CORE_FUNCTION(FUNC) \ - FUNC = reinterpret_cast(GetProcAddress(handle, "FUNC")); + FUNC = reinterpret_cast(GetProcAddress(handle, #FUNC)); #include "core_functions.def" #undef CL_CORE_FUNCTION diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 15029d5e27..a161a5b32b 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -547,7 +547,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( *pUpdateKernelLaunch) { // Kernel handle updates are not yet supported. - if (pUpdateKernelLaunch->hNewKernel != hCommand->Kernel) { + if (pUpdateKernelLaunch->hNewKernel && + pUpdateKernelLaunch->hNewKernel != hCommand->Kernel) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index e21f78af6b..95fc57319d 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -218,6 +218,8 @@ CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR"; +CONSTFIX char CreateProgramWithILName[] = "clCreateProgramWithILKHR"; +CONSTFIX char GetKernelSubGroupInfoName[] = "clGetKernelSubGroupInfoKHR"; #undef CONSTFIX @@ -316,6 +318,13 @@ cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer, cl_uint num_configs, const cl_command_buffer_update_type_khr *config_types, const void **configs); +using clCreateProgramWithILKHR_fn = CL_API_ENTRY +cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); + +using clGetKernelSubGroupInfoKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_kernel, cl_device_id, cl_kernel_sub_group_info, size_t, + const void *, size_t, void *, size_t *); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index c2c38aa753..34181eda3b 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -89,7 +89,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { /* These queries should be dealt with in context_impl.cpp by calling the * queries of each device separately and building the intersection set. */ - return UR_RESULT_ERROR_INVALID_ARGUMENT; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } case UR_CONTEXT_INFO_NUM_DEVICES: case UR_CONTEXT_INFO_DEVICES: diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index e17211826f..70559eb52e 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -61,7 +61,8 @@ ur_result_t cl_adapter::checkDeviceExtensions( // doesn't report them. if (isIntelFPGAEmuDevice(Dev) && (Ext == "cl_intel_device_attribute_query" || - Ext == "cl_intel_required_subgroup_size")) { + Ext == "cl_intel_required_subgroup_size" || + Ext == "cl_khr_subgroups")) { Supported = true; continue; } @@ -1119,6 +1120,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(false); default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -1143,17 +1146,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( switch (pProperties->pProperties->type) { case UR_DEVICE_PARTITION_EQUALLY: { CLProperty = static_cast( - pProperties->pProperties->value.equally); + pProperties->pProperties[i].value.equally); break; } case UR_DEVICE_PARTITION_BY_COUNTS: { CLProperty = static_cast( - pProperties->pProperties->value.count); + pProperties->pProperties[i].value.count); break; } case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { CLProperty = static_cast( - pProperties->pProperties->value.affinity_domain); + pProperties->pProperties[i].value.affinity_domain); break; } default: { diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 6596a01317..45e5fbb5c4 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -96,6 +96,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, + phEventWaitList, phEvent); +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, diff --git a/source/adapters/opencl/extension_functions.def b/source/adapters/opencl/extension_functions.def index 98359465ed..3f5e3ea917 100644 --- a/source/adapters/opencl/extension_functions.def +++ b/source/adapters/opencl/extension_functions.def @@ -24,3 +24,5 @@ CL_EXTENSION_FUNC(clCommandFillBufferKHR) CL_EXTENSION_FUNC(clEnqueueCommandBufferKHR) CL_EXTENSION_FUNC(clGetCommandBufferInfoKHR) CL_EXTENSION_FUNC(clUpdateMutableCommandsKHR) +CL_EXTENSION_FUNC(clCreateProgramWithILKHR) +CL_EXTENSION_FUNC(clGetKernelSubGroupInfoKHR) diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 617b6a9b2c..f60c8a2715 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// #include "common.hpp" +#include "device.hpp" #include #include @@ -189,11 +190,39 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, InputValueSize = MaxDims * sizeof(size_t); } - cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - mapURKernelSubGroupInfoToCL(propName), - InputValueSize, InputValue.get(), - sizeof(size_t), &RetVal, pPropSizeRet); + // We need to allow for the possibility that this device runs an older CL and + // supports the original khr subgroup extension. + cl_ext::clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfo = nullptr; + + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + if (DevVer < oclv::V2_1) { + bool SubgroupExtSupported = false; + + UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), {"cl_khr_subgroups"}, + SubgroupExtSupported)); + if (!SubgroupExtSupported) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + cl_context Context = nullptr; + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_KERNEL_CONTEXT, sizeof(Context), + &Context, nullptr)); + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + Context, cl_ext::ExtFuncPtrCache->clGetKernelSubGroupInfoKHRCache, + cl_ext::GetKernelSubGroupInfoName, &GetKernelSubGroupInfo)); + } else { + GetKernelSubGroupInfo = clGetKernelSubGroupInfo; + } + + cl_int Ret = GetKernelSubGroupInfo(cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), + sizeof(size_t), &RetVal, pPropSizeRet); if (Ret == CL_INVALID_OPERATION) { // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index b2476fc420..201df1f678 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -186,6 +186,8 @@ cl_int mapURMemInfoToCL(ur_mem_info_t URPropName) { return CL_MEM_SIZE; case UR_MEM_INFO_CONTEXT: return CL_MEM_CONTEXT; + case UR_MEM_INFO_REFERENCE_COUNT: + return CL_MEM_REFERENCE_COUNT; default: return -1; } diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 218a5e7f00..b6d3a77cee 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -144,5 +144,10 @@ urPlatformGetBackendOption(ur_platform_handle_t, const char *pFrontendOption, *ppPlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; return UR_RESULT_SUCCESS; } + if (pFrontendOption == "-foffload-fp32-prec-div"sv || + pFrontendOption == "-foffload-fp32-prec-sqrt"sv) { + *ppPlatformOption = "-cl-fp32-correctly-rounded-divide-sqrt"; + return UR_RESULT_SUCCESS; + } return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 20aaa8fd3a..1682b8c0d0 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -99,16 +99,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } } - using ApiFuncT = - cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); - ApiFuncT FuncPtr = - reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - CurPlatform, "clCreateProgramWithILKHR")); + cl_ext::clCreateProgramWithILKHR_fn CreateProgramWithIL = nullptr; - assert(FuncPtr != nullptr); + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + cl_adapter::cast(hContext), + cl_ext::ExtFuncPtrCache->clCreateProgramWithILKHRCache, + cl_ext::CreateProgramWithILName, &CreateProgramWithIL)); - *phProgram = cl_adapter::cast( - FuncPtr(cl_adapter::cast(hContext), pIL, length, &Err)); + *phProgram = cl_adapter::cast(CreateProgramWithIL( + cl_adapter::cast(hContext), pIL, length, &Err)); } // INVALID_VALUE is only returned in three circumstances according to the cl diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index da0b659bec..cba90ee152 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -154,6 +154,7 @@ urGetMemProcAddrTable(ur_api_version_t Version, ur_mem_dditable_t *pDdiTable) { pDdiTable->pfnBufferPartition = urMemBufferPartition; pDdiTable->pfnBufferCreateWithNativeHandle = urMemBufferCreateWithNativeHandle; + pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; pDdiTable->pfnGetInfo = urMemGetInfo; pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; pDdiTable->pfnImageCreate = urMemImageCreate; diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index ada211f1d9..df3fb95262 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -7,6 +7,8 @@ if (UR_BUILD_ADAPTER_L0 OR UR_BUILD_ADAPTER_L0_V2) include(FetchLevelZero) set(UMF_BUILD_LEVEL_ZERO_PROVIDER ON CACHE INTERNAL "Build Level Zero Provider") set(UMF_LEVEL_ZERO_INCLUDE_DIR "${LEVEL_ZERO_INCLUDE_DIR}" CACHE INTERNAL "Level Zero headers") +else() + set(UMF_BUILD_LEVEL_ZERO_PROVIDER OFF CACHE INTERNAL "Build Level Zero Provider") endif() add_ur_library(ur_common STATIC @@ -30,8 +32,8 @@ if (NOT DEFINED UMF_REPO) endif() if (NOT DEFINED UMF_TAG) - # main 14.10.2024: Merge pull request #797... - set(UMF_TAG 673b844db8ddc2ddda393ccb77416eaebb5f9d92) + # main 28.10.2024: Merge pull request #832 ... + set(UMF_TAG 43e9af0f50b70ccb989f786243881035dd829203) endif() message(STATUS "Will fetch Unified Memory Framework from ${UMF_REPO}") diff --git a/source/common/linux/ur_lib_loader.cpp b/source/common/linux/ur_lib_loader.cpp index 4da7f98bc1..3f9acc259b 100644 --- a/source/common/linux/ur_lib_loader.cpp +++ b/source/common/linux/ur_lib_loader.cpp @@ -8,6 +8,10 @@ * */ #include +#if __has_include() +#include +#define ADD_FULL_PATH_LOG +#endif #include "logger/ur_logger.hpp" #include "ur_lib_loader.hpp" @@ -50,7 +54,14 @@ LibLoader::loadAdapterLibrary(const char *name) { logger::info("failed to load adapter '{}' with error: {}", name, err ? err : "unknown error"); } else { - logger::info("loaded adapter 0x{} ({})", handle, name); +#if defined(ADD_FULL_PATH_LOG) + struct link_map *dlinfo_map; + if (dlinfo(handle, RTLD_DI_LINKMAP, &dlinfo_map) == 0) { + logger::info("loaded adapter 0x{} ({}) from {}", handle, name, + dlinfo_map->l_name); + } else +#endif + logger::info("loaded adapter 0x{} ({})", handle, name); } return std::unique_ptr(handle); } diff --git a/source/common/stype_map_helpers.def b/source/common/stype_map_helpers.def index c938ca6b95..ec2856e60d 100644 --- a/source/common/stype_map_helpers.def +++ b/source/common/stype_map_helpers.def @@ -99,4 +99,6 @@ template <> struct stype_map : stype_map_impl {}; template <> struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; diff --git a/source/common/windows/ur_lib_loader.cpp b/source/common/windows/ur_lib_loader.cpp index db6d5b3a17..6706c7f938 100644 --- a/source/common/windows/ur_lib_loader.cpp +++ b/source/common/windows/ur_lib_loader.cpp @@ -17,7 +17,7 @@ void LibLoader::freeAdapterLibrary(HMODULE handle) { BOOL res = FreeLibrary(handle); if (!res) { logger::error( - "Failed to unload the library with the handle at address {}", + "Failed to unload the library with the handle at address 0x{}", handle); } else { logger::info("unloaded adapter 0x{}", handle); @@ -27,10 +27,14 @@ void LibLoader::freeAdapterLibrary(HMODULE handle) { std::unique_ptr LibLoader::loadAdapterLibrary(const char *name) { - auto handle = std::unique_ptr( - LoadLibraryExA(name, nullptr, 0)); - logger::info("loaded adapter 0x{} ({})", handle, name); - return handle; + if (HMODULE handle = LoadLibraryExA(name, nullptr, 0)) { + logger::info("loaded adapter 0x{}: {}", handle, name); + return std::unique_ptr{handle}; + } else { + logger::debug("loading adapter failed with error {}: {}", + GetLastError(), name); + } + return nullptr; } void *LibLoader::getFunctionPtr(HMODULE handle, const char *func_name) { diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index 07dab17943..1837108645 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -28,7 +28,7 @@ if (MSVC) set(LOADER_VERSION_SCRIPT ${CMAKE_CURRENT_BINARY_DIR}/ur_loader.def) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/loader.def.in ${LOADER_VERSION_SCRIPT} @ONLY) set_target_properties(ur_loader PROPERTIES - LINK_FLAGS "/DEF:${LOADER_VERSION_SCRIPT}" + LINK_OPTIONS "LINKER:/DEF:${LOADER_VERSION_SCRIPT}" ) elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(TARGET_LIBNAME libur_loader_${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}) @@ -131,45 +131,43 @@ if(UR_ENABLE_SANITIZER) target_sources(ur_loader PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ur/ur.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_allocator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_allocator.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_buffer.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_libdevice.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_options.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_options.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_quarantine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_quarantine.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_report.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_report.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_statistics.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_statistics.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_validator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_validator.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/common.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/stacktrace.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/stacktrace.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_allocator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_allocator.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_buffer.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_ddi.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_interceptor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_interceptor.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_libdevice.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_options.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_options.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_quarantine.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_quarantine.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_report.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_report.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_shadow.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_shadow.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_statistics.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_statistics.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_validator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_validator.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/backtrace.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_common.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_libdevice.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_utils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_utils.hpp - ) - - target_sources(ur_loader - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/linux/backtrace.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/linux/sanitizer_utils.cpp ) if(UR_ENABLE_SYMBOLIZER) target_sources(ur_loader PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/linux/symbolizer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/symbolizer.cpp ) target_include_directories(ur_loader PRIVATE ${LLVM_INCLUDE_DIRS}) target_link_libraries(ur_loader PRIVATE LLVMSupport LLVMSymbolize) diff --git a/source/loader/layers/sanitizer/asan_allocator.cpp b/source/loader/layers/sanitizer/asan/asan_allocator.cpp similarity index 94% rename from source/loader/layers/sanitizer/asan_allocator.cpp rename to source/loader/layers/sanitizer/asan/asan_allocator.cpp index c95ed728b6..d51cb4438c 100644 --- a/source/loader/layers/sanitizer/asan_allocator.cpp +++ b/source/loader/layers/sanitizer/asan/asan_allocator.cpp @@ -14,6 +14,7 @@ #include "ur_sanitizer_layer.hpp" namespace ur_sanitizer_layer { +namespace asan { void AllocInfo::print() { getContext()->logger.info( @@ -22,4 +23,5 @@ void AllocInfo::print() { (void *)(UserEnd), AllocSize, ToString(Type)); } +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_allocator.hpp b/source/loader/layers/sanitizer/asan/asan_allocator.hpp similarity index 57% rename from source/loader/layers/sanitizer/asan_allocator.hpp rename to source/loader/layers/sanitizer/asan/asan_allocator.hpp index 249ef896d0..1eca00171b 100644 --- a/source/loader/layers/sanitizer/asan_allocator.hpp +++ b/source/loader/layers/sanitizer/asan/asan_allocator.hpp @@ -12,22 +12,12 @@ #pragma once -#include "common.hpp" -#include "stacktrace.hpp" - -#include -#include +#include "sanitizer_common/sanitizer_allocator.hpp" +#include "sanitizer_common/sanitizer_common.hpp" +#include "sanitizer_common/sanitizer_stacktrace.hpp" namespace ur_sanitizer_layer { - -enum class AllocType : uint32_t { - UNKNOWN, - DEVICE_USM, - SHARED_USM, - HOST_USM, - MEM_BUFFER, - DEVICE_GLOBAL -}; +namespace asan { struct AllocInfo { uptr AllocBegin = 0; @@ -51,21 +41,5 @@ struct AllocInfo { using AllocationMap = std::map>; using AllocationIterator = AllocationMap::iterator; -inline const char *ToString(AllocType Type) { - switch (Type) { - case AllocType::DEVICE_USM: - return "Device USM"; - case AllocType::HOST_USM: - return "Host USM"; - case AllocType::SHARED_USM: - return "Shared USM"; - case AllocType::MEM_BUFFER: - return "Memory Buffer"; - case AllocType::DEVICE_GLOBAL: - return "Device Global"; - default: - return "Unknown Type"; - } -} - +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_buffer.cpp b/source/loader/layers/sanitizer/asan/asan_buffer.cpp similarity index 96% rename from source/loader/layers/sanitizer/asan_buffer.cpp rename to source/loader/layers/sanitizer/asan/asan_buffer.cpp index 9316d68bf4..8dd93a4679 100644 --- a/source/loader/layers/sanitizer/asan_buffer.cpp +++ b/source/loader/layers/sanitizer/asan/asan_buffer.cpp @@ -12,10 +12,11 @@ #include "asan_buffer.hpp" #include "asan_interceptor.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" #include "ur_sanitizer_layer.hpp" -#include "ur_sanitizer_utils.hpp" namespace ur_sanitizer_layer { +namespace asan { ur_result_t EnqueueMemCopyRectHelper( ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset, @@ -91,7 +92,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - URes = getContext()->interceptor->allocateMemory( + URes = getAsanInterceptor()->allocateMemory( Context, Device, &USMDesc, Pool, Size, AllocType::MEM_BUFFER, ur_cast(&Allocation)); if (URes != UR_RESULT_SUCCESS) { @@ -129,7 +130,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - URes = getContext()->interceptor->allocateMemory( + URes = getAsanInterceptor()->allocateMemory( Context, nullptr, &USMDesc, Pool, Size, AllocType::HOST_USM, ur_cast(&HostAllocation)); if (URes != UR_RESULT_SUCCESS) { @@ -174,8 +175,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { ur_result_t MemBuffer::free() { for (const auto &[_, Ptr] : Allocations) { - ur_result_t URes = - getContext()->interceptor->releaseMemory(Context, Ptr); + ur_result_t URes = getAsanInterceptor()->releaseMemory(Context, Ptr); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("Failed to free buffer handle {}", Ptr); return URes; @@ -200,4 +200,5 @@ size_t MemBuffer::getAlignment() { return Alignment; } +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_buffer.hpp b/source/loader/layers/sanitizer/asan/asan_buffer.hpp similarity index 97% rename from source/loader/layers/sanitizer/asan_buffer.hpp rename to source/loader/layers/sanitizer/asan/asan_buffer.hpp index 989ef4249f..46201d9f8d 100644 --- a/source/loader/layers/sanitizer/asan_buffer.hpp +++ b/source/loader/layers/sanitizer/asan/asan_buffer.hpp @@ -16,9 +16,10 @@ #include #include -#include "common.hpp" +#include "ur/ur.hpp" namespace ur_sanitizer_layer { +namespace asan { struct MemBuffer { // Buffer constructor @@ -77,4 +78,5 @@ ur_result_t EnqueueMemCopyRectHelper( bool Blocking, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event); +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/asan/asan_ddi.cpp similarity index 89% rename from source/loader/layers/sanitizer/ur_sanddi.cpp rename to source/loader/layers/sanitizer/asan/asan_ddi.cpp index 95b1649691..53e86ef49f 100644 --- a/source/loader/layers/sanitizer/ur_sanddi.cpp +++ b/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -1,35 +1,37 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2024 Intel Corporation * * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file ur_sanddi.cpp + * @file asan_ddi.cpp * */ +#include "asan_ddi.hpp" #include "asan_interceptor.hpp" #include "asan_options.hpp" -#include "stacktrace.hpp" +#include "sanitizer_common/sanitizer_stacktrace.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" #include "ur_sanitizer_layer.hpp" -#include "ur_sanitizer_utils.hpp" #include namespace ur_sanitizer_layer { +namespace asan { namespace { ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices, const ur_device_handle_t *phDevices) { std::shared_ptr CI; - UR_CALL(getContext()->interceptor->insertContext(Context, CI)); + UR_CALL(getAsanInterceptor()->insertContext(Context, CI)); for (uint32_t i = 0; i < numDevices; ++i) { auto hDevice = phDevices[i]; std::shared_ptr DI; - UR_CALL(getContext()->interceptor->insertDevice(hDevice, DI)); + UR_CALL(getAsanInterceptor()->insertDevice(hDevice, DI)); DI->Type = GetDeviceType(Context, hDevice); if (DI->Type == DeviceType::UNKNOWN) { getContext()->logger.error("Unsupport device"); @@ -77,7 +79,7 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( if (result == UR_RESULT_SUCCESS && phAdapters) { const uint32_t NumAdapters = pNumAdapters ? *pNumAdapters : NumEntries; for (uint32_t i = 0; i < NumAdapters; ++i) { - UR_CALL(getContext()->interceptor->holdAdapter(phAdapters[i])); + UR_CALL(getAsanInterceptor()->holdAdapter(phAdapters[i])); } } @@ -104,7 +106,7 @@ __urdlllocal ur_result_t UR_APICALL urUSMHostAlloc( getContext()->logger.debug("==== urUSMHostAlloc"); - return getContext()->interceptor->allocateMemory( + return getAsanInterceptor()->allocateMemory( hContext, nullptr, pUSMDesc, pool, size, AllocType::HOST_USM, ppMem); } @@ -129,7 +131,7 @@ __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( getContext()->logger.debug("==== urUSMDeviceAlloc"); - return getContext()->interceptor->allocateMemory( + return getAsanInterceptor()->allocateMemory( hContext, hDevice, pUSMDesc, pool, size, AllocType::DEVICE_USM, ppMem); } @@ -154,7 +156,7 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc( getContext()->logger.debug("==== urUSMSharedAlloc"); - return getContext()->interceptor->allocateMemory( + return getAsanInterceptor()->allocateMemory( hContext, hDevice, pUSMDesc, pool, size, AllocType::SHARED_USM, ppMem); } @@ -172,7 +174,7 @@ __urdlllocal ur_result_t UR_APICALL urUSMFree( getContext()->logger.debug("==== urUSMFree"); - return getContext()->interceptor->releaseMemory(hContext, pMem); + return getAsanInterceptor()->releaseMemory(hContext, pMem); } /////////////////////////////////////////////////////////////////////////////// @@ -197,7 +199,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( UR_CALL( pfnProgramCreateWithIL(hContext, pIL, length, pProperties, phProgram)); - UR_CALL(getContext()->interceptor->insertProgram(*phProgram)); + UR_CALL(getAsanInterceptor()->insertProgram(*phProgram)); return UR_RESULT_SUCCESS; } @@ -233,7 +235,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( UR_CALL(pfnProgramCreateWithBinary(hContext, numDevices, phDevices, pLengths, ppBinaries, pProperties, phProgram)); - UR_CALL(getContext()->interceptor->insertProgram(*phProgram)); + UR_CALL(getAsanInterceptor()->insertProgram(*phProgram)); return UR_RESULT_SUCCESS; } @@ -260,7 +262,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithNativeHandle( UR_CALL(pfnProgramCreateWithNativeHandle(hNativeProgram, hContext, pProperties, phProgram)); - UR_CALL(getContext()->interceptor->insertProgram(*phProgram)); + UR_CALL(getAsanInterceptor()->insertProgram(*phProgram)); return UR_RESULT_SUCCESS; } @@ -281,7 +283,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramRetain( UR_CALL(pfnRetain(hProgram)); - auto ProgramInfo = getContext()->interceptor->getProgramInfo(hProgram); + auto ProgramInfo = getAsanInterceptor()->getProgramInfo(hProgram); UR_ASSERT(ProgramInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); ProgramInfo->RefCount++; @@ -305,7 +307,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( UR_CALL(pfnProgramBuild(hContext, hProgram, pOptions)); - UR_CALL(getContext()->interceptor->registerProgram(hContext, hProgram)); + UR_CALL(getAsanInterceptor()->registerProgram(hContext, hProgram)); return UR_RESULT_SUCCESS; } @@ -329,8 +331,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( getContext()->logger.debug("==== urProgramBuildExp"); UR_CALL(pfnBuildExp(hProgram, numDevices, phDevices, pOptions)); - UR_CALL(getContext()->interceptor->registerProgram(GetContext(hProgram), - hProgram)); + UR_CALL( + getAsanInterceptor()->registerProgram(GetContext(hProgram), hProgram)); return UR_RESULT_SUCCESS; } @@ -357,7 +359,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramLink( UR_CALL(pfnProgramLink(hContext, count, phPrograms, pOptions, phProgram)); - UR_CALL(getContext()->interceptor->registerProgram(hContext, *phProgram)); + UR_CALL(getAsanInterceptor()->registerProgram(hContext, *phProgram)); return UR_RESULT_SUCCESS; } @@ -388,7 +390,7 @@ ur_result_t UR_APICALL urProgramLinkExp( UR_CALL(pfnProgramLinkExp(hContext, numDevices, phDevices, count, phPrograms, pOptions, phProgram)); - UR_CALL(getContext()->interceptor->registerProgram(hContext, *phProgram)); + UR_CALL(getAsanInterceptor()->registerProgram(hContext, *phProgram)); return UR_RESULT_SUCCESS; } @@ -409,11 +411,11 @@ ur_result_t UR_APICALL urProgramRelease( UR_CALL(pfnProgramRelease(hProgram)); - auto ProgramInfo = getContext()->interceptor->getProgramInfo(hProgram); + auto ProgramInfo = getAsanInterceptor()->getProgramInfo(hProgram); UR_ASSERT(ProgramInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); if (--ProgramInfo->RefCount == 0) { - UR_CALL(getContext()->interceptor->unregisterProgram(hProgram)); - UR_CALL(getContext()->interceptor->eraseProgram(hProgram)); + UR_CALL(getAsanInterceptor()->unregisterProgram(hProgram)); + UR_CALL(getAsanInterceptor()->eraseProgram(hProgram)); } return UR_RESULT_SUCCESS; @@ -463,8 +465,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( workDim); UR_CALL(LaunchInfo.initialize()); - UR_CALL(getContext()->interceptor->preLaunchKernel(hKernel, hQueue, - LaunchInfo)); + UR_CALL(getAsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo)); ur_event_handle_t hEvent{}; ur_result_t result = @@ -473,8 +474,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( numEventsInWaitList, phEventWaitList, &hEvent); if (result == UR_RESULT_SUCCESS) { - UR_CALL(getContext()->interceptor->postLaunchKernel(hKernel, hQueue, - LaunchInfo)); + UR_CALL(getAsanInterceptor()->postLaunchKernel(hKernel, hQueue, + LaunchInfo)); } if (phEvent) { @@ -563,7 +564,7 @@ __urdlllocal ur_result_t UR_APICALL urContextRetain( UR_CALL(pfnRetain(hContext)); - auto ContextInfo = getContext()->interceptor->getContextInfo(hContext); + auto ContextInfo = getAsanInterceptor()->getContextInfo(hContext); UR_ASSERT(ContextInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); ContextInfo->RefCount++; @@ -585,10 +586,10 @@ __urdlllocal ur_result_t UR_APICALL urContextRelease( UR_CALL(pfnRelease(hContext)); - auto ContextInfo = getContext()->interceptor->getContextInfo(hContext); + auto ContextInfo = getAsanInterceptor()->getContextInfo(hContext); UR_ASSERT(ContextInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); if (--ContextInfo->RefCount == 0) { - UR_CALL(getContext()->interceptor->eraseContext(hContext)); + UR_CALL(getAsanInterceptor()->eraseContext(hContext)); } return UR_RESULT_SUCCESS; @@ -631,7 +632,7 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreate( if (Host && (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER)) { std::shared_ptr CtxInfo = - getContext()->interceptor->getContextInfo(hContext); + getAsanInterceptor()->getContextInfo(hContext); for (const auto &hDevice : CtxInfo->DeviceList) { ManagedQueue InternalQueue(hContext, hDevice); char *Handle = nullptr; @@ -641,7 +642,7 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreate( } } - ur_result_t result = getContext()->interceptor->insertMemBuffer(pMemBuffer); + ur_result_t result = getAsanInterceptor()->insertMemBuffer(pMemBuffer); *phBuffer = ur_cast(pMemBuffer.get()); return result; @@ -672,7 +673,7 @@ __urdlllocal ur_result_t UR_APICALL urMemGetInfo( getContext()->logger.debug("==== urMemGetInfo"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hMemory)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hMemory)) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_MEM_INFO_CONTEXT: { @@ -706,7 +707,7 @@ __urdlllocal ur_result_t UR_APICALL urMemRetain( getContext()->logger.debug("==== urMemRetain"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hMem)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hMem)) { MemBuffer->RefCount++; } else { UR_CALL(pfnRetain(hMem)); @@ -728,12 +729,12 @@ __urdlllocal ur_result_t UR_APICALL urMemRelease( getContext()->logger.debug("==== urMemRelease"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hMem)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hMem)) { if (--MemBuffer->RefCount != 0) { return UR_RESULT_SUCCESS; } UR_CALL(MemBuffer->free()); - UR_CALL(getContext()->interceptor->eraseMemBuffer(hMem)); + UR_CALL(getAsanInterceptor()->eraseMemBuffer(hMem)); } else { UR_CALL(pfnRelease(hMem)); } @@ -761,13 +762,13 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferPartition( getContext()->logger.debug("==== urMemBufferPartition"); - if (auto ParentBuffer = getContext()->interceptor->getMemBuffer(hBuffer)) { + if (auto ParentBuffer = getAsanInterceptor()->getMemBuffer(hBuffer)) { if (ParentBuffer->Size < (pRegion->origin + pRegion->size)) { return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; } std::shared_ptr SubBuffer = std::make_shared( ParentBuffer, pRegion->origin, pRegion->size); - UR_CALL(getContext()->interceptor->insertMemBuffer(SubBuffer)); + UR_CALL(getAsanInterceptor()->insertMemBuffer(SubBuffer)); *phMem = reinterpret_cast(SubBuffer.get()); } else { UR_CALL(pfnBufferPartition(hBuffer, flags, bufferCreateType, pRegion, @@ -793,7 +794,7 @@ __urdlllocal ur_result_t UR_APICALL urMemGetNativeHandle( getContext()->logger.debug("==== urMemGetNativeHandle"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hMem)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hMem)) { char *Handle = nullptr; UR_CALL(MemBuffer->getHandle(hDevice, Handle)); *phNativeMem = ur_cast(Handle); @@ -832,7 +833,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( getContext()->logger.debug("==== urEnqueueMemBufferRead"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hBuffer)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hBuffer)) { ur_device_handle_t Device = GetDevice(hQueue); char *pSrc = nullptr; UR_CALL(MemBuffer->getHandle(Device, pSrc)); @@ -878,7 +879,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( getContext()->logger.debug("==== urEnqueueMemBufferWrite"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hBuffer)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hBuffer)) { ur_device_handle_t Device = GetDevice(hQueue); char *pDst = nullptr; UR_CALL(MemBuffer->getHandle(Device, pDst)); @@ -935,7 +936,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( getContext()->logger.debug("==== urEnqueueMemBufferReadRect"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hBuffer)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hBuffer)) { char *SrcHandle = nullptr; ur_device_handle_t Device = GetDevice(hQueue); UR_CALL(MemBuffer->getHandle(Device, SrcHandle)); @@ -999,7 +1000,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( getContext()->logger.debug("==== urEnqueueMemBufferWriteRect"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hBuffer)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hBuffer)) { char *DstHandle = nullptr; ur_device_handle_t Device = GetDevice(hQueue); UR_CALL(MemBuffer->getHandle(Device, DstHandle)); @@ -1048,8 +1049,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( getContext()->logger.debug("==== urEnqueueMemBufferCopy"); - auto SrcBuffer = getContext()->interceptor->getMemBuffer(hBufferSrc); - auto DstBuffer = getContext()->interceptor->getMemBuffer(hBufferDst); + auto SrcBuffer = getAsanInterceptor()->getMemBuffer(hBufferSrc); + auto DstBuffer = getAsanInterceptor()->getMemBuffer(hBufferDst); UR_ASSERT((SrcBuffer && DstBuffer) || (!SrcBuffer && !DstBuffer), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -1113,8 +1114,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( getContext()->logger.debug("==== urEnqueueMemBufferCopyRect"); - auto SrcBuffer = getContext()->interceptor->getMemBuffer(hBufferSrc); - auto DstBuffer = getContext()->interceptor->getMemBuffer(hBufferDst); + auto SrcBuffer = getAsanInterceptor()->getMemBuffer(hBufferSrc); + auto DstBuffer = getAsanInterceptor()->getMemBuffer(hBufferDst); UR_ASSERT((SrcBuffer && DstBuffer) || (!SrcBuffer && !DstBuffer), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -1169,7 +1170,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( getContext()->logger.debug("==== urEnqueueMemBufferFill"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hBuffer)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hBuffer)) { char *Handle = nullptr; ur_device_handle_t Device = GetDevice(hQueue); UR_CALL(MemBuffer->getHandle(Device, Handle)); @@ -1215,7 +1216,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( getContext()->logger.debug("==== urEnqueueMemBufferMap"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hBuffer)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hBuffer)) { // Translate the host access mode info. MemBuffer::AccessMode AccessMode = MemBuffer::UNKNOWN; @@ -1245,7 +1246,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_usm_desc_t USMDesc{}; USMDesc.align = MemBuffer->getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(getContext()->interceptor->allocateMemory( + UR_CALL(getAsanInterceptor()->allocateMemory( Context, nullptr, &USMDesc, Pool, size, AllocType::HOST_USM, ppRetMap)); } @@ -1300,7 +1301,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( getContext()->logger.debug("==== urEnqueueMemUnmap"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hMem)) { + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hMem)) { MemBuffer::Mapping Mapping{}; { std::scoped_lock Guard(MemBuffer->Mutex); @@ -1323,8 +1324,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( numEventsInWaitList, phEventWaitList, phEvent)); if (!MemBuffer->HostPtr) { - UR_CALL( - getContext()->interceptor->releaseMemory(Context, pMappedPtr)); + UR_CALL(getAsanInterceptor()->releaseMemory(Context, pMappedPtr)); } } else { UR_CALL(pfnMemUnmap(hQueue, hMem, pMappedPtr, numEventsInWaitList, @@ -1351,7 +1351,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreate( getContext()->logger.debug("==== urKernelCreate"); UR_CALL(pfnCreate(hProgram, pKernelName, phKernel)); - UR_CALL(getContext()->interceptor->insertKernel(*phKernel)); + UR_CALL(getAsanInterceptor()->insertKernel(*phKernel)); return UR_RESULT_SUCCESS; } @@ -1371,7 +1371,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelRetain( UR_CALL(pfnRetain(hKernel)); - auto KernelInfo = getContext()->interceptor->getKernelInfo(hKernel); + auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); UR_ASSERT(KernelInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); KernelInfo->RefCount++; @@ -1392,10 +1392,10 @@ __urdlllocal ur_result_t urKernelRelease( getContext()->logger.debug("==== urKernelRelease"); UR_CALL(pfnRelease(hKernel)); - auto KernelInfo = getContext()->interceptor->getKernelInfo(hKernel); + auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); UR_ASSERT(KernelInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); if (--KernelInfo->RefCount == 0) { - UR_CALL(getContext()->interceptor->eraseKernel(hKernel)); + UR_CALL(getAsanInterceptor()->eraseKernel(hKernel)); } return UR_RESULT_SUCCESS; @@ -1422,9 +1422,9 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgValue( std::shared_ptr MemBuffer; if (argSize == sizeof(ur_mem_handle_t) && - (MemBuffer = getContext()->interceptor->getMemBuffer( + (MemBuffer = getAsanInterceptor()->getMemBuffer( *ur_cast(pArgValue)))) { - auto KernelInfo = getContext()->interceptor->getKernelInfo(hKernel); + auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KernelInfo->Mutex); KernelInfo->BufferArgs[argIndex] = std::move(MemBuffer); } else { @@ -1452,8 +1452,8 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj( getContext()->logger.debug("==== urKernelSetArgMemObj"); - if (auto MemBuffer = getContext()->interceptor->getMemBuffer(hArgValue)) { - auto KernelInfo = getContext()->interceptor->getKernelInfo(hKernel); + if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hArgValue)) { + auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KernelInfo->Mutex); KernelInfo->BufferArgs[argIndex] = std::move(MemBuffer); } else { @@ -1484,7 +1484,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal( argSize); { - auto KI = getContext()->interceptor->getKernelInfo(hKernel); + auto KI = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KI->Mutex); // TODO: get local variable alignment auto argSizeWithRZ = GetSizeAndRedzoneSizeForLocal( @@ -1520,8 +1520,8 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer( "==== urKernelSetArgPointer (argIndex={}, pArgValue={})", argIndex, pArgValue); - if (getContext()->interceptor->getOptions().DetectKernelArguments) { - auto KI = getContext()->interceptor->getKernelInfo(hKernel); + if (getAsanInterceptor()->getOptions().DetectKernelArguments) { + auto KI = getAsanInterceptor()->getKernelInfo(hKernel); std::scoped_lock Guard(KI->Mutex); KI->PointerArgs[argIndex] = {pArgValue, GetCurrentBacktrace()}; } @@ -1558,7 +1558,7 @@ __urdlllocal ur_result_t UR_APICALL urGetGlobalProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnAdapterGet = ur_sanitizer_layer::urAdapterGet; + pDdiTable->pfnAdapterGet = ur_sanitizer_layer::asan::urAdapterGet; return result; } @@ -1588,12 +1588,12 @@ __urdlllocal ur_result_t UR_APICALL urGetContextProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnCreate = ur_sanitizer_layer::urContextCreate; - pDdiTable->pfnRetain = ur_sanitizer_layer::urContextRetain; - pDdiTable->pfnRelease = ur_sanitizer_layer::urContextRelease; + pDdiTable->pfnCreate = ur_sanitizer_layer::asan::urContextCreate; + pDdiTable->pfnRetain = ur_sanitizer_layer::asan::urContextRetain; + pDdiTable->pfnRelease = ur_sanitizer_layer::asan::urContextRelease; pDdiTable->pfnCreateWithNativeHandle = - ur_sanitizer_layer::urContextCreateWithNativeHandle; + ur_sanitizer_layer::asan::urContextCreateWithNativeHandle; return result; } @@ -1621,15 +1621,16 @@ __urdlllocal ur_result_t UR_APICALL urGetProgramProcAddrTable( return UR_RESULT_ERROR_UNSUPPORTED_VERSION; } - pDdiTable->pfnCreateWithIL = ur_sanitizer_layer::urProgramCreateWithIL; + pDdiTable->pfnCreateWithIL = + ur_sanitizer_layer::asan::urProgramCreateWithIL; pDdiTable->pfnCreateWithBinary = - ur_sanitizer_layer::urProgramCreateWithBinary; + ur_sanitizer_layer::asan::urProgramCreateWithBinary; pDdiTable->pfnCreateWithNativeHandle = - ur_sanitizer_layer::urProgramCreateWithNativeHandle; - pDdiTable->pfnBuild = ur_sanitizer_layer::urProgramBuild; - pDdiTable->pfnLink = ur_sanitizer_layer::urProgramLink; - pDdiTable->pfnRetain = ur_sanitizer_layer::urProgramRetain; - pDdiTable->pfnRelease = ur_sanitizer_layer::urProgramRelease; + ur_sanitizer_layer::asan::urProgramCreateWithNativeHandle; + pDdiTable->pfnBuild = ur_sanitizer_layer::asan::urProgramBuild; + pDdiTable->pfnLink = ur_sanitizer_layer::asan::urProgramLink; + pDdiTable->pfnRetain = ur_sanitizer_layer::asan::urProgramRetain; + pDdiTable->pfnRelease = ur_sanitizer_layer::asan::urProgramRelease; return UR_RESULT_SUCCESS; } @@ -1660,13 +1661,14 @@ __urdlllocal ur_result_t UR_APICALL urGetKernelProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnCreate = ur_sanitizer_layer::urKernelCreate; - pDdiTable->pfnRetain = ur_sanitizer_layer::urKernelRetain; - pDdiTable->pfnRelease = ur_sanitizer_layer::urKernelRelease; - pDdiTable->pfnSetArgValue = ur_sanitizer_layer::urKernelSetArgValue; - pDdiTable->pfnSetArgMemObj = ur_sanitizer_layer::urKernelSetArgMemObj; - pDdiTable->pfnSetArgLocal = ur_sanitizer_layer::urKernelSetArgLocal; - pDdiTable->pfnSetArgPointer = ur_sanitizer_layer::urKernelSetArgPointer; + pDdiTable->pfnCreate = ur_sanitizer_layer::asan::urKernelCreate; + pDdiTable->pfnRetain = ur_sanitizer_layer::asan::urKernelRetain; + pDdiTable->pfnRelease = ur_sanitizer_layer::asan::urKernelRelease; + pDdiTable->pfnSetArgValue = ur_sanitizer_layer::asan::urKernelSetArgValue; + pDdiTable->pfnSetArgMemObj = ur_sanitizer_layer::asan::urKernelSetArgMemObj; + pDdiTable->pfnSetArgLocal = ur_sanitizer_layer::asan::urKernelSetArgLocal; + pDdiTable->pfnSetArgPointer = + ur_sanitizer_layer::asan::urKernelSetArgPointer; return result; } @@ -1696,12 +1698,14 @@ __urdlllocal ur_result_t UR_APICALL urGetMemProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnBufferCreate = ur_sanitizer_layer::urMemBufferCreate; - pDdiTable->pfnRetain = ur_sanitizer_layer::urMemRetain; - pDdiTable->pfnRelease = ur_sanitizer_layer::urMemRelease; - pDdiTable->pfnBufferPartition = ur_sanitizer_layer::urMemBufferPartition; - pDdiTable->pfnGetNativeHandle = ur_sanitizer_layer::urMemGetNativeHandle; - pDdiTable->pfnGetInfo = ur_sanitizer_layer::urMemGetInfo; + pDdiTable->pfnBufferCreate = ur_sanitizer_layer::asan::urMemBufferCreate; + pDdiTable->pfnRetain = ur_sanitizer_layer::asan::urMemRetain; + pDdiTable->pfnRelease = ur_sanitizer_layer::asan::urMemRelease; + pDdiTable->pfnBufferPartition = + ur_sanitizer_layer::asan::urMemBufferPartition; + pDdiTable->pfnGetNativeHandle = + ur_sanitizer_layer::asan::urMemGetNativeHandle; + pDdiTable->pfnGetInfo = ur_sanitizer_layer::asan::urMemGetInfo; return result; } @@ -1730,8 +1734,8 @@ __urdlllocal ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnBuildExp = ur_sanitizer_layer::urProgramBuildExp; - pDdiTable->pfnLinkExp = ur_sanitizer_layer::urProgramLinkExp; + pDdiTable->pfnBuildExp = ur_sanitizer_layer::asan::urProgramBuildExp; + pDdiTable->pfnLinkExp = ur_sanitizer_layer::asan::urProgramLinkExp; return result; } @@ -1761,19 +1765,25 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnMemBufferRead = ur_sanitizer_layer::urEnqueueMemBufferRead; - pDdiTable->pfnMemBufferWrite = ur_sanitizer_layer::urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferRead = + ur_sanitizer_layer::asan::urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferWrite = + ur_sanitizer_layer::asan::urEnqueueMemBufferWrite; pDdiTable->pfnMemBufferReadRect = - ur_sanitizer_layer::urEnqueueMemBufferReadRect; + ur_sanitizer_layer::asan::urEnqueueMemBufferReadRect; pDdiTable->pfnMemBufferWriteRect = - ur_sanitizer_layer::urEnqueueMemBufferWriteRect; - pDdiTable->pfnMemBufferCopy = ur_sanitizer_layer::urEnqueueMemBufferCopy; + ur_sanitizer_layer::asan::urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemBufferCopy = + ur_sanitizer_layer::asan::urEnqueueMemBufferCopy; pDdiTable->pfnMemBufferCopyRect = - ur_sanitizer_layer::urEnqueueMemBufferCopyRect; - pDdiTable->pfnMemBufferFill = ur_sanitizer_layer::urEnqueueMemBufferFill; - pDdiTable->pfnMemBufferMap = ur_sanitizer_layer::urEnqueueMemBufferMap; - pDdiTable->pfnMemUnmap = ur_sanitizer_layer::urEnqueueMemUnmap; - pDdiTable->pfnKernelLaunch = ur_sanitizer_layer::urEnqueueKernelLaunch; + ur_sanitizer_layer::asan::urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = + ur_sanitizer_layer::asan::urEnqueueMemBufferFill; + pDdiTable->pfnMemBufferMap = + ur_sanitizer_layer::asan::urEnqueueMemBufferMap; + pDdiTable->pfnMemUnmap = ur_sanitizer_layer::asan::urEnqueueMemUnmap; + pDdiTable->pfnKernelLaunch = + ur_sanitizer_layer::asan::urEnqueueKernelLaunch; return result; } @@ -1803,14 +1813,16 @@ __urdlllocal ur_result_t UR_APICALL urGetUSMProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::urUSMDeviceAlloc; - pDdiTable->pfnHostAlloc = ur_sanitizer_layer::urUSMHostAlloc; - pDdiTable->pfnSharedAlloc = ur_sanitizer_layer::urUSMSharedAlloc; - pDdiTable->pfnFree = ur_sanitizer_layer::urUSMFree; + pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::asan::urUSMDeviceAlloc; + pDdiTable->pfnHostAlloc = ur_sanitizer_layer::asan::urUSMHostAlloc; + pDdiTable->pfnSharedAlloc = ur_sanitizer_layer::asan::urUSMSharedAlloc; + pDdiTable->pfnFree = ur_sanitizer_layer::asan::urUSMFree; return result; } +} // namespace asan + ur_result_t context_t::init(ur_dditable_t *dditable, const std::set &enabledLayerNames, [[maybe_unused]] codeloc_data codelocData) { @@ -1818,7 +1830,7 @@ ur_result_t context_t::init(ur_dditable_t *dditable, if (enabledLayerNames.count("UR_LAYER_ASAN")) { enabledType = SanitizerType::AddressSanitizer; - interceptor = std::make_unique(); + initAsanInterceptor(); } else if (enabledLayerNames.count("UR_LAYER_MSAN")) { enabledType = SanitizerType::MemorySanitizer; } else if (enabledLayerNames.count("UR_LAYER_TSAN")) { @@ -1830,61 +1842,50 @@ ur_result_t context_t::init(ur_dditable_t *dditable, return result; } - if (enabledType == SanitizerType::AddressSanitizer) { - if (!(dditable->VirtualMem.pfnReserve && dditable->VirtualMem.pfnMap && - dditable->VirtualMem.pfnGranularityGetInfo)) { - die("Some VirtualMem APIs are needed to enable UR_LAYER_ASAN"); - } - - if (!dditable->PhysicalMem.pfnCreate) { - die("Some PhysicalMem APIs are needed to enable UR_LAYER_ASAN"); - } - } - urDdiTable = *dditable; if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetGlobalProcAddrTable( + result = ur_sanitizer_layer::asan::urGetGlobalProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Global); } if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetContextProcAddrTable( + result = ur_sanitizer_layer::asan::urGetContextProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Context); } if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetKernelProcAddrTable( + result = ur_sanitizer_layer::asan::urGetKernelProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Kernel); } if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetProgramProcAddrTable( + result = ur_sanitizer_layer::asan::urGetProgramProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Program); } if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetKernelProcAddrTable( + result = ur_sanitizer_layer::asan::urGetKernelProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Kernel); } if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetMemProcAddrTable( + result = ur_sanitizer_layer::asan::urGetMemProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Mem); } if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetProgramExpProcAddrTable( + result = ur_sanitizer_layer::asan::urGetProgramExpProcAddrTable( UR_API_VERSION_CURRENT, &dditable->ProgramExp); } if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetEnqueueProcAddrTable( + result = ur_sanitizer_layer::asan::urGetEnqueueProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Enqueue); } if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::urGetUSMProcAddrTable( + result = ur_sanitizer_layer::asan::urGetUSMProcAddrTable( UR_API_VERSION_CURRENT, &dditable->USM); } diff --git a/source/loader/layers/sanitizer/asan/asan_ddi.hpp b/source/loader/layers/sanitizer/asan/asan_ddi.hpp new file mode 100644 index 0000000000..735c4409d8 --- /dev/null +++ b/source/loader/layers/sanitizer/asan/asan_ddi.hpp @@ -0,0 +1,20 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file asan_ddi.hpp + * + */ + +#include "ur_ddi.h" + +namespace ur_sanitizer_layer { + +void initAsanInterceptor(); +void destroyAsanInterceptor(); + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp similarity index 83% rename from source/loader/layers/sanitizer/asan_interceptor.cpp rename to source/loader/layers/sanitizer/asan/asan_interceptor.cpp index fbcc401909..ef9be376c5 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -1,7 +1,7 @@ //===----------------------------------------------------------------------===// /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2024 Intel Corporation * * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. * See LICENSE.TXT @@ -12,17 +12,19 @@ */ #include "asan_interceptor.hpp" +#include "asan_ddi.hpp" #include "asan_options.hpp" #include "asan_quarantine.hpp" #include "asan_report.hpp" #include "asan_shadow.hpp" #include "asan_validator.hpp" -#include "stacktrace.hpp" -#include "ur_sanitizer_utils.hpp" +#include "sanitizer_common/sanitizer_stacktrace.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" namespace ur_sanitizer_layer { +namespace asan { -SanitizerInterceptor::SanitizerInterceptor() { +AsanInterceptor::AsanInterceptor() { if (getOptions().MaxQuarantineSizeMB) { m_Quarantine = std::make_unique( static_cast(getOptions().MaxQuarantineSizeMB) * 1024 * @@ -30,7 +32,7 @@ SanitizerInterceptor::SanitizerInterceptor() { } } -SanitizerInterceptor::~SanitizerInterceptor() { +AsanInterceptor::~AsanInterceptor() { // We must release these objects before releasing adapters, since // they may use the adapter in their destructor for (const auto &[_, DeviceInfo] : m_DeviceMap) { @@ -39,9 +41,11 @@ SanitizerInterceptor::~SanitizerInterceptor() { m_Quarantine = nullptr; m_MemBufferMap.clear(); - m_AllocationMap.clear(); m_KernelMap.clear(); m_ContextMap.clear(); + // AllocationMap need to be cleared after ContextMap because memory leak + // detection depends on it. + m_AllocationMap.clear(); for (auto Adapter : m_Adapters) { getContext()->urDdiTable.Global.pfnAdapterRelease(Adapter); @@ -55,10 +59,12 @@ SanitizerInterceptor::~SanitizerInterceptor() { /// R -- right redzone (0 or more bytes) /// /// ref: "compiler-rt/lib/asan/asan_allocator.cpp" Allocator::Allocate -ur_result_t SanitizerInterceptor::allocateMemory( - ur_context_handle_t Context, ur_device_handle_t Device, - const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, size_t Size, - AllocType Type, void **ResultPtr) { +ur_result_t AsanInterceptor::allocateMemory(ur_context_handle_t Context, + ur_device_handle_t Device, + const ur_usm_desc_t *Properties, + ur_usm_pool_handle_t Pool, + size_t Size, AllocType Type, + void **ResultPtr) { auto ContextInfo = getContextInfo(Context); std::shared_ptr DeviceInfo = @@ -150,8 +156,8 @@ ur_result_t SanitizerInterceptor::allocateMemory( return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context, - void *Ptr) { +ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context, + void *Ptr) { auto ContextInfo = getContextInfo(Context); auto Addr = reinterpret_cast(Ptr); @@ -241,9 +247,9 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, - ur_queue_handle_t Queue, - USMLaunchInfo &LaunchInfo) { +ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + USMLaunchInfo &LaunchInfo) { auto Context = GetContext(Queue); auto Device = GetDevice(Queue); auto ContextInfo = getContextInfo(Context); @@ -266,31 +272,31 @@ ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, - ur_queue_handle_t Queue, - USMLaunchInfo &LaunchInfo) { +ur_result_t AsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + USMLaunchInfo &LaunchInfo) { // FIXME: We must use block operation here, until we support urEventSetCallback auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue); if (Result == UR_RESULT_SUCCESS) { - for (const auto &AH : LaunchInfo.Data->SanitizerReport) { - if (!AH.Flag) { + for (const auto &Report : LaunchInfo.Data->Report) { + if (!Report.Flag) { continue; } - switch (AH.ErrorType) { - case DeviceSanitizerErrorType::USE_AFTER_FREE: - ReportUseAfterFree(AH, Kernel, GetContext(Queue)); + switch (Report.ErrorTy) { + case ErrorType::USE_AFTER_FREE: + ReportUseAfterFree(Report, Kernel, GetContext(Queue)); break; - case DeviceSanitizerErrorType::OUT_OF_BOUNDS: - case DeviceSanitizerErrorType::MISALIGNED: - case DeviceSanitizerErrorType::NULL_POINTER: - ReportGenericError(AH, Kernel); + case ErrorType::OUT_OF_BOUNDS: + case ErrorType::MISALIGNED: + case ErrorType::NULL_POINTER: + ReportGenericError(Report, Kernel); break; default: - ReportFatalError(AH); + ReportFatalError(Report); } - if (!AH.IsRecover) { - exit(1); + if (!Report.IsRecover) { + exitWithErrors(); } } } @@ -316,9 +322,9 @@ ur_result_t DeviceInfo::allocShadowMemory(ur_context_handle_t Context) { /// /// ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#mapping ur_result_t -SanitizerInterceptor::enqueueAllocInfo(std::shared_ptr &DeviceInfo, - ur_queue_handle_t Queue, - std::shared_ptr &AI) { +AsanInterceptor::enqueueAllocInfo(std::shared_ptr &DeviceInfo, + ur_queue_handle_t Queue, + std::shared_ptr &AI) { if (AI->IsReleased) { int ShadowByte; switch (AI->Type) { @@ -391,9 +397,10 @@ SanitizerInterceptor::enqueueAllocInfo(std::shared_ptr &DeviceInfo, return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::updateShadowMemory( - std::shared_ptr &ContextInfo, - std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue) { +ur_result_t +AsanInterceptor::updateShadowMemory(std::shared_ptr &ContextInfo, + std::shared_ptr &DeviceInfo, + ur_queue_handle_t Queue) { auto &AllocInfos = ContextInfo->AllocInfosMap[DeviceInfo->Handle]; std::scoped_lock Guard(AllocInfos.Mutex); @@ -405,8 +412,8 @@ ur_result_t SanitizerInterceptor::updateShadowMemory( return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::registerProgram(ur_context_handle_t Context, - ur_program_handle_t Program) { +ur_result_t AsanInterceptor::registerProgram(ur_context_handle_t Context, + ur_program_handle_t Program) { std::vector Devices = GetDevices(Program); auto ContextInfo = getContextInfo(Context); @@ -415,22 +422,25 @@ ur_result_t SanitizerInterceptor::registerProgram(ur_context_handle_t Context, for (auto Device : Devices) { ManagedQueue Queue(Context, Device); - uint64_t NumOfDeviceGlobal; + size_t MetadataSize; + void *MetadataPtr; auto Result = - getContext()->urDdiTable.Enqueue.pfnDeviceGlobalVariableRead( - Queue, Program, kSPIR_AsanDeviceGlobalCount, true, - sizeof(NumOfDeviceGlobal), 0, &NumOfDeviceGlobal, 0, nullptr, - nullptr); + getContext()->urDdiTable.Program.pfnGetGlobalVariablePointer( + Device, Program, kSPIR_AsanDeviceGlobalMetadata, &MetadataSize, + &MetadataPtr); if (Result != UR_RESULT_SUCCESS) { getContext()->logger.info("No device globals"); continue; } + const uint64_t NumOfDeviceGlobal = + MetadataSize / sizeof(DeviceGlobalInfo); + assert((MetadataSize % sizeof(DeviceGlobalInfo) == 0) && + "DeviceGlobal metadata size is not correct"); std::vector GVInfos(NumOfDeviceGlobal); - Result = getContext()->urDdiTable.Enqueue.pfnDeviceGlobalVariableRead( - Queue, Program, kSPIR_AsanDeviceGlobalMetadata, true, - sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, &GVInfos[0], 0, - nullptr, nullptr); + Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, &GVInfos[0], MetadataPtr, + sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, nullptr, nullptr); if (Result != UR_RESULT_SUCCESS) { getContext()->logger.error("Device Global[{}] Read Failed: {}", kSPIR_AsanDeviceGlobalMetadata, Result); @@ -464,8 +474,7 @@ ur_result_t SanitizerInterceptor::registerProgram(ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -ur_result_t -SanitizerInterceptor::unregisterProgram(ur_program_handle_t Program) { +ur_result_t AsanInterceptor::unregisterProgram(ur_program_handle_t Program) { auto ProgramInfo = getProgramInfo(Program); std::scoped_lock Guard( @@ -479,9 +488,8 @@ SanitizerInterceptor::unregisterProgram(ur_program_handle_t Program) { return UR_RESULT_SUCCESS; } -ur_result_t -SanitizerInterceptor::insertContext(ur_context_handle_t Context, - std::shared_ptr &CI) { +ur_result_t AsanInterceptor::insertContext(ur_context_handle_t Context, + std::shared_ptr &CI) { std::scoped_lock Guard(m_ContextMapMutex); if (m_ContextMap.find(Context) != m_ContextMap.end()) { @@ -497,7 +505,7 @@ SanitizerInterceptor::insertContext(ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::eraseContext(ur_context_handle_t Context) { +ur_result_t AsanInterceptor::eraseContext(ur_context_handle_t Context) { std::scoped_lock Guard(m_ContextMapMutex); assert(m_ContextMap.find(Context) != m_ContextMap.end()); m_ContextMap.erase(Context); @@ -505,9 +513,8 @@ ur_result_t SanitizerInterceptor::eraseContext(ur_context_handle_t Context) { return UR_RESULT_SUCCESS; } -ur_result_t -SanitizerInterceptor::insertDevice(ur_device_handle_t Device, - std::shared_ptr &DI) { +ur_result_t AsanInterceptor::insertDevice(ur_device_handle_t Device, + std::shared_ptr &DI) { std::scoped_lock Guard(m_DeviceMapMutex); if (m_DeviceMap.find(Device) != m_DeviceMap.end()) { @@ -515,7 +522,7 @@ SanitizerInterceptor::insertDevice(ur_device_handle_t Device, return UR_RESULT_SUCCESS; } - DI = std::make_shared(Device); + DI = std::make_shared(Device); DI->IsSupportSharedSystemUSM = GetDeviceUSMCapability( Device, UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT); @@ -531,7 +538,7 @@ SanitizerInterceptor::insertDevice(ur_device_handle_t Device, return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::eraseDevice(ur_device_handle_t Device) { +ur_result_t AsanInterceptor::eraseDevice(ur_device_handle_t Device) { std::scoped_lock Guard(m_DeviceMapMutex); assert(m_DeviceMap.find(Device) != m_DeviceMap.end()); m_DeviceMap.erase(Device); @@ -539,7 +546,7 @@ ur_result_t SanitizerInterceptor::eraseDevice(ur_device_handle_t Device) { return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::insertProgram(ur_program_handle_t Program) { +ur_result_t AsanInterceptor::insertProgram(ur_program_handle_t Program) { std::scoped_lock Guard(m_ProgramMapMutex); if (m_ProgramMap.find(Program) != m_ProgramMap.end()) { return UR_RESULT_SUCCESS; @@ -548,14 +555,14 @@ ur_result_t SanitizerInterceptor::insertProgram(ur_program_handle_t Program) { return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::eraseProgram(ur_program_handle_t Program) { +ur_result_t AsanInterceptor::eraseProgram(ur_program_handle_t Program) { std::scoped_lock Guard(m_ProgramMapMutex); assert(m_ProgramMap.find(Program) != m_ProgramMap.end()); m_ProgramMap.erase(Program); return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::insertKernel(ur_kernel_handle_t Kernel) { +ur_result_t AsanInterceptor::insertKernel(ur_kernel_handle_t Kernel) { std::scoped_lock Guard(m_KernelMapMutex); if (m_KernelMap.find(Kernel) != m_KernelMap.end()) { return UR_RESULT_SUCCESS; @@ -564,7 +571,7 @@ ur_result_t SanitizerInterceptor::insertKernel(ur_kernel_handle_t Kernel) { return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::eraseKernel(ur_kernel_handle_t Kernel) { +ur_result_t AsanInterceptor::eraseKernel(ur_kernel_handle_t Kernel) { std::scoped_lock Guard(m_KernelMapMutex); assert(m_KernelMap.find(Kernel) != m_KernelMap.end()); m_KernelMap.erase(Kernel); @@ -572,7 +579,7 @@ ur_result_t SanitizerInterceptor::eraseKernel(ur_kernel_handle_t Kernel) { } ur_result_t -SanitizerInterceptor::insertMemBuffer(std::shared_ptr MemBuffer) { +AsanInterceptor::insertMemBuffer(std::shared_ptr MemBuffer) { std::scoped_lock Guard(m_MemBufferMapMutex); assert(m_MemBufferMap.find(ur_cast(MemBuffer.get())) == m_MemBufferMap.end()); @@ -581,7 +588,7 @@ SanitizerInterceptor::insertMemBuffer(std::shared_ptr MemBuffer) { return UR_RESULT_SUCCESS; } -ur_result_t SanitizerInterceptor::eraseMemBuffer(ur_mem_handle_t MemHandle) { +ur_result_t AsanInterceptor::eraseMemBuffer(ur_mem_handle_t MemHandle) { std::scoped_lock Guard(m_MemBufferMapMutex); assert(m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end()); m_MemBufferMap.erase(MemHandle); @@ -589,7 +596,7 @@ ur_result_t SanitizerInterceptor::eraseMemBuffer(ur_mem_handle_t MemHandle) { } std::shared_ptr -SanitizerInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) { +AsanInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) { std::shared_lock Guard(m_MemBufferMapMutex); if (m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end()) { return m_MemBufferMap[MemHandle]; @@ -597,7 +604,7 @@ SanitizerInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) { return nullptr; } -ur_result_t SanitizerInterceptor::prepareLaunch( +ur_result_t AsanInterceptor::prepareLaunch( std::shared_ptr &ContextInfo, std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) { @@ -616,7 +623,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch( ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) { ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, ValidateResult, PtrPair.second); - exit(1); + exitWithErrors(); } } } @@ -740,7 +747,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch( } std::optional -SanitizerInterceptor::findAllocInfoByAddress(uptr Address) { +AsanInterceptor::findAllocInfoByAddress(uptr Address) { std::shared_lock Guard(m_AllocationMapMutex); auto It = m_AllocationMap.upper_bound(Address); if (It == m_AllocationMap.begin()) { @@ -755,7 +762,7 @@ SanitizerInterceptor::findAllocInfoByAddress(uptr Address) { } std::vector -SanitizerInterceptor::findAllocInfoByContext(ur_context_handle_t Context) { +AsanInterceptor::findAllocInfoByContext(ur_context_handle_t Context) { std::shared_lock Guard(m_AllocationMapMutex); std::vector AllocInfos; for (auto It = m_AllocationMap.begin(); It != m_AllocationMap.end(); It++) { @@ -775,12 +782,14 @@ ContextInfo::~ContextInfo() { assert(Result == UR_RESULT_SUCCESS); // check memory leaks - std::vector AllocInfos = - getContext()->interceptor->findAllocInfoByContext(Handle); - for (const auto &It : AllocInfos) { - const auto &[_, AI] = *It; - if (!AI->IsReleased) { - ReportMemoryLeak(AI); + if (getAsanInterceptor()->isNormalExit()) { + std::vector AllocInfos = + getAsanInterceptor()->findAllocInfoByContext(Handle); + for (const auto &It : AllocInfos) { + const auto &[_, AI] = *It; + if (!AI->IsReleased) { + ReportMemoryLeak(AI); + } } } } @@ -815,7 +824,7 @@ ur_result_t USMLaunchInfo::updateKernelInfo(const KernelInfo &KI) { USMLaunchInfo::~USMLaunchInfo() { [[maybe_unused]] ur_result_t Result; if (Data) { - auto ContextInfo = getContext()->interceptor->getContextInfo(Context); + auto ContextInfo = getAsanInterceptor()->getContextInfo(Context); if (Data->LocalArgs) { Result = getContext()->urDdiTable.USM.pfnFree( Context, (void *)Data->LocalArgs); @@ -830,4 +839,24 @@ USMLaunchInfo::~USMLaunchInfo() { assert(Result == UR_RESULT_SUCCESS); } +} // namespace asan + +using namespace asan; + +static AsanInterceptor *interceptor; + +AsanInterceptor *getAsanInterceptor() { return interceptor; } + +void initAsanInterceptor() { + if (interceptor) { + return; + } + interceptor = new AsanInterceptor(); +} + +void destroyAsanInterceptor() { + delete interceptor; + interceptor = nullptr; +} + } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp similarity index 96% rename from source/loader/layers/sanitizer/asan_interceptor.hpp rename to source/loader/layers/sanitizer/asan/asan_interceptor.hpp index e5429acd56..1f8d6dab31 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2024 Intel Corporation * * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. * See LICENSE.TXT @@ -18,7 +18,7 @@ #include "asan_options.hpp" #include "asan_shadow.hpp" #include "asan_statistics.hpp" -#include "common.hpp" +#include "sanitizer_common/sanitizer_common.hpp" #include "ur_sanitizer_layer.hpp" #include @@ -29,6 +29,7 @@ #include namespace ur_sanitizer_layer { +namespace asan { class Quarantine; @@ -186,11 +187,11 @@ struct DeviceGlobalInfo { uptr Addr; }; -class SanitizerInterceptor { +class AsanInterceptor { public: - explicit SanitizerInterceptor(); + explicit AsanInterceptor(); - ~SanitizerInterceptor(); + ~AsanInterceptor(); ur_result_t allocateMemory(ur_context_handle_t Context, ur_device_handle_t Device, @@ -271,6 +272,13 @@ class SanitizerInterceptor { const AsanOptions &getOptions() { return m_Options; } + void exitWithErrors() { + m_NormalExit = false; + exit(1); + } + + bool isNormalExit() { return m_NormalExit; } + private: ur_result_t updateShadowMemory(std::shared_ptr &ContextInfo, std::shared_ptr &DeviceInfo, @@ -320,6 +328,12 @@ class SanitizerInterceptor { std::unordered_set m_Adapters; ur_shared_mutex m_AdaptersMutex; + + bool m_NormalExit = true; }; +} // namespace asan + +asan::AsanInterceptor *getAsanInterceptor(); + } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_libdevice.hpp b/source/loader/layers/sanitizer/asan/asan_libdevice.hpp similarity index 53% rename from source/loader/layers/sanitizer/asan_libdevice.hpp rename to source/loader/layers/sanitizer/asan/asan_libdevice.hpp index 26ef946c5f..2c15d99d87 100644 --- a/source/loader/layers/sanitizer/asan_libdevice.hpp +++ b/source/loader/layers/sanitizer/asan/asan_libdevice.hpp @@ -1,47 +1,24 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2024 Intel Corporation * * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file device_sanitizer_report.hpp + * @file asan_libdevice.hpp * */ #pragma once -#include +#include "sanitizer_common/sanitizer_libdevice.hpp" #if !defined(__SPIR__) && !defined(__SPIRV__) namespace ur_sanitizer_layer { #endif // !__SPIR__ && !__SPIRV__ -enum class DeviceType : uint32_t { UNKNOWN = 0, CPU, GPU_PVC, GPU_DG2 }; - -enum class DeviceSanitizerErrorType : int32_t { - UNKNOWN, - OUT_OF_BOUNDS, - MISALIGNED, - USE_AFTER_FREE, - OUT_OF_SHADOW_BOUNDS, - UNKNOWN_DEVICE, - NULL_POINTER, -}; - -enum class DeviceSanitizerMemoryType : int32_t { - UNKNOWN, - USM_DEVICE, - USM_HOST, - USM_SHARED, - LOCAL, - PRIVATE, - MEM_BUFFER, - DEVICE_GLOBAL, -}; - -struct DeviceSanitizerReport { +struct AsanErrorReport { int Flag = 0; char File[256 + 1] = {}; @@ -60,8 +37,8 @@ struct DeviceSanitizerReport { uintptr_t Address = 0; bool IsWrite = false; uint32_t AccessSize = 0; - DeviceSanitizerMemoryType MemoryType = DeviceSanitizerMemoryType::UNKNOWN; - DeviceSanitizerErrorType ErrorType = DeviceSanitizerErrorType::UNKNOWN; + MemoryType MemoryTy = MemoryType::UNKNOWN; + ErrorType ErrorTy = ErrorType::UNKNOWN; bool IsRecover = false; }; @@ -90,7 +67,7 @@ struct LaunchInfo { uint32_t Debug = 0; int ReportFlag = 0; - DeviceSanitizerReport SanitizerReport[ASAN_MAX_NUM_REPORTS]; + AsanErrorReport Report[ASAN_MAX_NUM_REPORTS]; }; constexpr unsigned ASAN_SHADOW_SCALE = 4; @@ -121,49 +98,8 @@ const int kPrivateLeftRedzoneMagic = (char)0xf1; const int kPrivateMidRedzoneMagic = (char)0xf2; const int kPrivateRightRedzoneMagic = (char)0xf3; -constexpr auto kSPIR_AsanDeviceGlobalCount = "__AsanDeviceGlobalCount"; constexpr auto kSPIR_AsanDeviceGlobalMetadata = "__AsanDeviceGlobalMetadata"; -inline const char *ToString(DeviceSanitizerMemoryType MemoryType) { - switch (MemoryType) { - case DeviceSanitizerMemoryType::USM_DEVICE: - return "Device USM"; - case DeviceSanitizerMemoryType::USM_HOST: - return "Host USM"; - case DeviceSanitizerMemoryType::USM_SHARED: - return "Shared USM"; - case DeviceSanitizerMemoryType::LOCAL: - return "Local Memory"; - case DeviceSanitizerMemoryType::PRIVATE: - return "Private Memory"; - case DeviceSanitizerMemoryType::MEM_BUFFER: - return "Memory Buffer"; - case DeviceSanitizerMemoryType::DEVICE_GLOBAL: - return "Device Global"; - default: - return "Unknown Memory"; - } -} - -inline const char *ToString(DeviceSanitizerErrorType ErrorType) { - switch (ErrorType) { - case DeviceSanitizerErrorType::OUT_OF_BOUNDS: - return "out-of-bounds-access"; - case DeviceSanitizerErrorType::MISALIGNED: - return "misaligned-access"; - case DeviceSanitizerErrorType::USE_AFTER_FREE: - return "use-after-free"; - case DeviceSanitizerErrorType::OUT_OF_SHADOW_BOUNDS: - return "out-of-shadow-bounds-access"; - case DeviceSanitizerErrorType::UNKNOWN_DEVICE: - return "unknown-device"; - case DeviceSanitizerErrorType::NULL_POINTER: - return "null-pointer-access"; - default: - return "unknown-error"; - } -} - #if !defined(__SPIR__) && !defined(__SPIRV__) } // namespace ur_sanitizer_layer #endif // !__SPIR__ && !__SPIRV__ diff --git a/source/loader/layers/sanitizer/asan_options.cpp b/source/loader/layers/sanitizer/asan/asan_options.cpp similarity index 98% rename from source/loader/layers/sanitizer/asan_options.cpp rename to source/loader/layers/sanitizer/asan/asan_options.cpp index 5c42ab8fca..5953a75c85 100644 --- a/source/loader/layers/sanitizer/asan_options.cpp +++ b/source/loader/layers/sanitizer/asan/asan_options.cpp @@ -11,6 +11,8 @@ */ #include "asan_options.hpp" + +#include "ur/ur.hpp" #include "ur_sanitizer_layer.hpp" #include @@ -18,6 +20,7 @@ #include namespace ur_sanitizer_layer { +namespace asan { AsanOptions::AsanOptions() { std::optional OptionsEnvMap; @@ -139,4 +142,5 @@ AsanOptions::AsanOptions() { } } +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_options.hpp b/source/loader/layers/sanitizer/asan/asan_options.hpp similarity index 91% rename from source/loader/layers/sanitizer/asan_options.hpp rename to source/loader/layers/sanitizer/asan/asan_options.hpp index 4c515e28fe..1385fdf6e3 100644 --- a/source/loader/layers/sanitizer/asan_options.hpp +++ b/source/loader/layers/sanitizer/asan/asan_options.hpp @@ -12,9 +12,10 @@ #pragma once -#include "common.hpp" +#include namespace ur_sanitizer_layer { +namespace asan { struct AsanOptions { bool Debug = false; @@ -29,4 +30,5 @@ struct AsanOptions { explicit AsanOptions(); }; +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_quarantine.cpp b/source/loader/layers/sanitizer/asan/asan_quarantine.cpp similarity index 96% rename from source/loader/layers/sanitizer/asan_quarantine.cpp rename to source/loader/layers/sanitizer/asan/asan_quarantine.cpp index 9826aeb62a..dab9b45e64 100644 --- a/source/loader/layers/sanitizer/asan_quarantine.cpp +++ b/source/loader/layers/sanitizer/asan/asan_quarantine.cpp @@ -13,6 +13,7 @@ #include "asan_quarantine.hpp" namespace ur_sanitizer_layer { +namespace asan { std::vector Quarantine::put(ur_device_handle_t Device, AllocationIterator &It) { @@ -33,4 +34,5 @@ std::vector Quarantine::put(ur_device_handle_t Device, return DequeueList; } +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_quarantine.hpp b/source/loader/layers/sanitizer/asan/asan_quarantine.hpp similarity index 97% rename from source/loader/layers/sanitizer/asan_quarantine.hpp rename to source/loader/layers/sanitizer/asan/asan_quarantine.hpp index 6dc15d382a..af5963dc25 100644 --- a/source/loader/layers/sanitizer/asan_quarantine.hpp +++ b/source/loader/layers/sanitizer/asan/asan_quarantine.hpp @@ -20,6 +20,7 @@ #include namespace ur_sanitizer_layer { +namespace asan { class QuarantineCache { public: @@ -71,4 +72,5 @@ class Quarantine { size_t m_MaxQuarantineSize; }; +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_report.cpp b/source/loader/layers/sanitizer/asan/asan_report.cpp similarity index 90% rename from source/loader/layers/sanitizer/asan_report.cpp rename to source/loader/layers/sanitizer/asan/asan_report.cpp index 7843b0f690..fe7c1e0f87 100644 --- a/source/loader/layers/sanitizer/asan_report.cpp +++ b/source/loader/layers/sanitizer/asan/asan_report.cpp @@ -16,10 +16,11 @@ #include "asan_libdevice.hpp" #include "asan_options.hpp" #include "asan_validator.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" #include "ur_sanitizer_layer.hpp" -#include "ur_sanitizer_utils.hpp" namespace ur_sanitizer_layer { +namespace asan { namespace { @@ -89,12 +90,12 @@ void ReportMemoryLeak(const std::shared_ptr &AI) { AI->AllocStack.print(); } -void ReportFatalError(const DeviceSanitizerReport &Report) { +void ReportFatalError(const AsanErrorReport &Report) { getContext()->logger.always("\n====ERROR: DeviceSanitizer: {}", - ToString(Report.ErrorType)); + ToString(Report.ErrorTy)); } -void ReportGenericError(const DeviceSanitizerReport &Report, +void ReportGenericError(const AsanErrorReport &Report, ur_kernel_handle_t Kernel) { const char *File = Report.File[0] ? Report.File : ""; const char *Func = Report.Func[0] ? Report.Func : ""; @@ -103,10 +104,9 @@ void ReportGenericError(const DeviceSanitizerReport &Report, // Try to demangle the kernel name KernelName = DemangleName(KernelName); - getContext()->logger.always("\n====ERROR: DeviceSanitizer: {} on {} ({})", - ToString(Report.ErrorType), - ToString(Report.MemoryType), - (void *)Report.Address); + getContext()->logger.always( + "\n====ERROR: DeviceSanitizer: {} on {} ({})", ToString(Report.ErrorTy), + ToString(Report.MemoryTy), (void *)Report.Address); getContext()->logger.always( "{} of size {} at kernel <{}> LID({}, {}, {}) GID({}, " "{}, {})", @@ -116,7 +116,7 @@ void ReportGenericError(const DeviceSanitizerReport &Report, getContext()->logger.always(" #0 {} {}:{}", Func, File, Report.Line); } -void ReportUseAfterFree(const DeviceSanitizerReport &Report, +void ReportUseAfterFree(const AsanErrorReport &Report, ur_kernel_handle_t Kernel, ur_context_handle_t Context) { const char *File = Report.File[0] ? Report.File : ""; @@ -128,7 +128,7 @@ void ReportUseAfterFree(const DeviceSanitizerReport &Report, getContext()->logger.always( "\n====ERROR: DeviceSanitizer: {} on address {}", - ToString(Report.ErrorType), (void *)Report.Address); + ToString(Report.ErrorTy), (void *)Report.Address); getContext()->logger.always( "{} of size {} at kernel <{}> LID({}, {}, {}) GID({}, " "{}, {})", @@ -138,9 +138,9 @@ void ReportUseAfterFree(const DeviceSanitizerReport &Report, getContext()->logger.always(" #0 {} {}:{}", Func, File, Report.Line); getContext()->logger.always(""); - if (getContext()->interceptor->getOptions().MaxQuarantineSizeMB > 0) { + if (getAsanInterceptor()->getOptions().MaxQuarantineSizeMB > 0) { auto AllocInfoItOp = - getContext()->interceptor->findAllocInfoByAddress(Report.Address); + getAsanInterceptor()->findAllocInfoByAddress(Report.Address); if (!AllocInfoItOp) { getContext()->logger.always( @@ -210,4 +210,5 @@ void ReportInvalidKernelArgument(ur_kernel_handle_t Kernel, uint32_t ArgIndex, } } +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_report.hpp b/source/loader/layers/sanitizer/asan/asan_report.hpp similarity index 82% rename from source/loader/layers/sanitizer/asan_report.hpp rename to source/loader/layers/sanitizer/asan/asan_report.hpp index e679b30c5d..c92ef997af 100644 --- a/source/loader/layers/sanitizer/asan_report.hpp +++ b/source/loader/layers/sanitizer/asan/asan_report.hpp @@ -12,15 +12,18 @@ #pragma once -#include "common.hpp" +#include "sanitizer_common/sanitizer_common.hpp" #include namespace ur_sanitizer_layer { -struct DeviceSanitizerReport; -struct AllocInfo; +struct AsanErrorReport; struct StackTrace; + +namespace asan { + +struct AllocInfo; struct ValidateUSMResult; void ReportBadFree(uptr Addr, const StackTrace &stack, @@ -36,16 +39,17 @@ void ReportMemoryLeak(const std::shared_ptr &AI); // This type of error is usually unexpected mistake and doesn't have enough // debug information -void ReportFatalError(const DeviceSanitizerReport &Report); +void ReportFatalError(const AsanErrorReport &Report); -void ReportGenericError(const DeviceSanitizerReport &Report, +void ReportGenericError(const AsanErrorReport &Report, ur_kernel_handle_t Kernel); -void ReportUseAfterFree(const DeviceSanitizerReport &Report, +void ReportUseAfterFree(const AsanErrorReport &Report, ur_kernel_handle_t Kernel, ur_context_handle_t Context); void ReportInvalidKernelArgument(ur_kernel_handle_t Kernel, uint32_t ArgIndex, uptr Addr, const ValidateUSMResult &VR, StackTrace Stack); +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_shadow.cpp b/source/loader/layers/sanitizer/asan/asan_shadow.cpp similarity index 97% rename from source/loader/layers/sanitizer/asan_shadow.cpp rename to source/loader/layers/sanitizer/asan/asan_shadow.cpp index f5800a694c..02bb88436e 100644 --- a/source/loader/layers/sanitizer/asan_shadow.cpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -13,10 +13,11 @@ #include "asan_shadow.hpp" #include "asan_interceptor.hpp" #include "asan_libdevice.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" #include "ur_sanitizer_layer.hpp" -#include "ur_sanitizer_utils.hpp" namespace ur_sanitizer_layer { +namespace asan { std::shared_ptr GetShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device, @@ -214,7 +215,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, } auto AllocInfoIt = - getContext()->interceptor->findAllocInfoByAddress(Ptr); + getAsanInterceptor()->findAllocInfoByAddress(Ptr); assert(AllocInfoIt); VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second); } @@ -270,7 +271,7 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE; static size_t LastAllocedSize = 0; if (RequiredShadowSize > LastAllocedSize) { - auto ContextInfo = getContext()->interceptor->getContextInfo(Context); + auto ContextInfo = getAsanInterceptor()->getContextInfo(Context); if (LocalShadowOffset) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( Context, (void *)LocalShadowOffset)); @@ -310,7 +311,7 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE; static size_t LastAllocedSize = 0; if (RequiredShadowSize > LastAllocedSize) { - auto ContextInfo = getContext()->interceptor->getContextInfo(Context); + auto ContextInfo = getAsanInterceptor()->getContextInfo(Context); if (PrivateShadowOffset) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( Context, (void *)PrivateShadowOffset)); @@ -361,4 +362,5 @@ uptr ShadowMemoryDG2::MemToShadow(uptr Ptr) { } } +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_shadow.hpp b/source/loader/layers/sanitizer/asan/asan_shadow.hpp similarity index 97% rename from source/loader/layers/sanitizer/asan_shadow.hpp rename to source/loader/layers/sanitizer/asan/asan_shadow.hpp index d6d6e634e6..0658a07925 100644 --- a/source/loader/layers/sanitizer/asan_shadow.hpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.hpp @@ -12,11 +12,13 @@ #pragma once -#include "asan_allocator.hpp" -#include "common.hpp" +#include "asan/asan_allocator.hpp" +#include "sanitizer_common/sanitizer_libdevice.hpp" + #include namespace ur_sanitizer_layer { +namespace asan { struct ShadowMemory { ShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device) @@ -164,4 +166,5 @@ std::shared_ptr GetShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device, DeviceType Type); +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_statistics.cpp b/source/loader/layers/sanitizer/asan/asan_statistics.cpp similarity index 96% rename from source/loader/layers/sanitizer/asan_statistics.cpp rename to source/loader/layers/sanitizer/asan/asan_statistics.cpp index 82eef69c44..5525e2aa7c 100644 --- a/source/loader/layers/sanitizer/asan_statistics.cpp +++ b/source/loader/layers/sanitizer/asan/asan_statistics.cpp @@ -17,6 +17,7 @@ #include namespace ur_sanitizer_layer { +namespace asan { struct AsanStats { void UpdateUSMMalloced(uptr MallocedSize, uptr RedzoneSize); @@ -66,7 +67,7 @@ void AsanStats::UpdateUSMFreed(uptr FreedSize) { void AsanStats::UpdateUSMRealFreed(uptr FreedSize, uptr RedzoneSize) { UsmMalloced -= FreedSize; UsmMallocedRedzones -= RedzoneSize; - if (getContext()->interceptor->getOptions().MaxQuarantineSizeMB) { + if (getAsanInterceptor()->getOptions().MaxQuarantineSizeMB) { UsmFreed -= FreedSize; } getContext()->logger.debug( @@ -136,11 +137,12 @@ void AsanStatsWrapper::Print(ur_context_handle_t Context) { } AsanStatsWrapper::AsanStatsWrapper() : Stat(nullptr) { - if (getContext()->interceptor->getOptions().PrintStats) { + if (getAsanInterceptor()->getOptions().PrintStats) { Stat = new AsanStats; } } AsanStatsWrapper::~AsanStatsWrapper() { delete Stat; } +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_statistics.hpp b/source/loader/layers/sanitizer/asan/asan_statistics.hpp similarity index 90% rename from source/loader/layers/sanitizer/asan_statistics.hpp rename to source/loader/layers/sanitizer/asan/asan_statistics.hpp index fab30e28c0..ea336880d2 100644 --- a/source/loader/layers/sanitizer/asan_statistics.hpp +++ b/source/loader/layers/sanitizer/asan/asan_statistics.hpp @@ -12,9 +12,10 @@ #pragma once -#include "common.hpp" +#include "sanitizer_common/sanitizer_common.hpp" namespace ur_sanitizer_layer { +namespace asan { struct AsanStats; @@ -36,4 +37,5 @@ struct AsanStatsWrapper { AsanStats *Stat; }; +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_validator.cpp b/source/loader/layers/sanitizer/asan/asan_validator.cpp similarity index 90% rename from source/loader/layers/sanitizer/asan_validator.cpp rename to source/loader/layers/sanitizer/asan/asan_validator.cpp index a9f2bd2b17..14c9042e46 100644 --- a/source/loader/layers/sanitizer/asan_validator.cpp +++ b/source/loader/layers/sanitizer/asan/asan_validator.cpp @@ -12,9 +12,10 @@ #include "asan_validator.hpp" #include "asan_interceptor.hpp" -#include "ur_sanitizer_utils.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" namespace ur_sanitizer_layer { +namespace asan { namespace { @@ -38,9 +39,9 @@ ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context, ur_device_handle_t Device, uptr Ptr) { assert(Ptr != 0 && "Don't validate nullptr here"); - auto AllocInfoItOp = getContext()->interceptor->findAllocInfoByAddress(Ptr); + auto AllocInfoItOp = getAsanInterceptor()->findAllocInfoByAddress(Ptr); if (!AllocInfoItOp) { - auto DI = getContext()->interceptor->getDeviceInfo(Device); + auto DI = getAsanInterceptor()->getDeviceInfo(Device); bool IsSupportSharedSystemUSM = DI->IsSupportSharedSystemUSM; if (IsSupportSharedSystemUSM) { // maybe it's host pointer @@ -74,4 +75,5 @@ ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context, return ValidateUSMResult::success(); } +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_validator.hpp b/source/loader/layers/sanitizer/asan/asan_validator.hpp similarity index 97% rename from source/loader/layers/sanitizer/asan_validator.hpp rename to source/loader/layers/sanitizer/asan/asan_validator.hpp index 52db966562..f3347740ea 100644 --- a/source/loader/layers/sanitizer/asan_validator.hpp +++ b/source/loader/layers/sanitizer/asan/asan_validator.hpp @@ -14,6 +14,7 @@ #include "asan_allocator.hpp" namespace ur_sanitizer_layer { +namespace asan { struct ValidateUSMResult { enum ErrorType { @@ -47,4 +48,5 @@ struct ValidateUSMResult { ValidateUSMResult ValidateUSMPointer(ur_context_handle_t Context, ur_device_handle_t Device, uptr Ptr); +} // namespace asan } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/linux/backtrace.cpp b/source/loader/layers/sanitizer/sanitizer_common/linux/backtrace.cpp similarity index 91% rename from source/loader/layers/sanitizer/linux/backtrace.cpp rename to source/loader/layers/sanitizer/sanitizer_common/linux/backtrace.cpp index b746348205..6015b9d239 100644 --- a/source/loader/layers/sanitizer/linux/backtrace.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/linux/backtrace.cpp @@ -6,8 +6,10 @@ * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * + * @file backtrace.cpp + * */ -#include "stacktrace.hpp" +#include "sanitizer_common/sanitizer_stacktrace.hpp" #include #include diff --git a/source/loader/layers/sanitizer/linux/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp similarity index 92% rename from source/loader/layers/sanitizer/linux/sanitizer_utils.cpp rename to source/loader/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp index d0bc038174..380482ff84 100644 --- a/source/loader/layers/sanitizer/linux/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp @@ -1,7 +1,6 @@ -//===----------------------------------------------------------------------===// /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2024 Intel Corporation * * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. * See LICENSE.TXT @@ -11,7 +10,7 @@ * */ -#include "common.hpp" +#include "sanitizer_common/sanitizer_common.hpp" #include "ur_sanitizer_layer.hpp" #include diff --git a/source/loader/layers/sanitizer/linux/symbolizer.cpp b/source/loader/layers/sanitizer/sanitizer_common/linux/symbolizer.cpp similarity index 98% rename from source/loader/layers/sanitizer/linux/symbolizer.cpp rename to source/loader/layers/sanitizer/sanitizer_common/linux/symbolizer.cpp index b1968389e5..98dcba9077 100644 --- a/source/loader/layers/sanitizer/linux/symbolizer.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/linux/symbolizer.cpp @@ -6,6 +6,8 @@ * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * + * @file symbolizer.cpp + * */ #include "llvm/DebugInfo/Symbolize/DIPrinter.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp new file mode 100644 index 0000000000..3176757216 --- /dev/null +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp @@ -0,0 +1,43 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file sanitizer_allocator.hpp + * + */ + +#pragma once + +namespace ur_sanitizer_layer { + +enum class AllocType { + UNKNOWN, + DEVICE_USM, + SHARED_USM, + HOST_USM, + MEM_BUFFER, + DEVICE_GLOBAL +}; + +inline const char *ToString(AllocType Type) { + switch (Type) { + case AllocType::DEVICE_USM: + return "Device USM"; + case AllocType::HOST_USM: + return "Host USM"; + case AllocType::SHARED_USM: + return "Shared USM"; + case AllocType::MEM_BUFFER: + return "Memory Buffer"; + case AllocType::DEVICE_GLOBAL: + return "Device Global"; + default: + return "Unknown Type"; + } +} + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/common.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp similarity index 91% rename from source/loader/layers/sanitizer/common.hpp rename to source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp index ea5e33ed4b..147bd23be3 100644 --- a/source/loader/layers/sanitizer/common.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp @@ -1,18 +1,17 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2024 Intel Corporation * * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file common.hpp + * @file sanitizer_common.hpp * */ #pragma once -#include "asan_libdevice.hpp" #include "ur/ur.hpp" #include "ur_ddi.h" @@ -138,21 +137,6 @@ struct SourceInfo { int column = 0; }; -inline const char *ToString(DeviceType Type) { - switch (Type) { - case DeviceType::UNKNOWN: - return "UNKNOWN"; - case DeviceType::CPU: - return "CPU"; - case DeviceType::GPU_PVC: - return "PVC"; - case DeviceType::GPU_DG2: - return "DG2"; - default: - return "UNKNOWN"; - } -} - bool IsInASanContext(); uptr MmapNoReserve(uptr Addr, uptr Size); diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_libdevice.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_libdevice.hpp new file mode 100644 index 0000000000..75e3fb9ee4 --- /dev/null +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_libdevice.hpp @@ -0,0 +1,101 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file sanitizer_libdevice.hpp + * + */ + +#pragma once + +#include + +#if !defined(__SPIR__) && !defined(__SPIRV__) +namespace ur_sanitizer_layer { +#endif // !__SPIR__ && !__SPIRV__ + +enum class DeviceType : uint32_t { UNKNOWN = 0, CPU, GPU_PVC, GPU_DG2 }; + +inline const char *ToString(DeviceType Type) { + switch (Type) { + case DeviceType::UNKNOWN: + return "UNKNOWN"; + case DeviceType::CPU: + return "CPU"; + case DeviceType::GPU_PVC: + return "PVC"; + case DeviceType::GPU_DG2: + return "DG2"; + default: + return "UNKNOWN"; + } +} + +enum class ErrorType : int32_t { + UNKNOWN, + OUT_OF_BOUNDS, + MISALIGNED, + USE_AFTER_FREE, + OUT_OF_SHADOW_BOUNDS, + UNKNOWN_DEVICE, + NULL_POINTER, +}; + +inline const char *ToString(ErrorType ErrorType) { + switch (ErrorType) { + case ErrorType::OUT_OF_BOUNDS: + return "out-of-bounds-access"; + case ErrorType::MISALIGNED: + return "misaligned-access"; + case ErrorType::USE_AFTER_FREE: + return "use-after-free"; + case ErrorType::OUT_OF_SHADOW_BOUNDS: + return "out-of-shadow-bounds-access"; + case ErrorType::UNKNOWN_DEVICE: + return "unknown-device"; + case ErrorType::NULL_POINTER: + return "null-pointer-access"; + default: + return "unknown-error"; + } +} + +enum class MemoryType : int32_t { + UNKNOWN, + USM_DEVICE, + USM_HOST, + USM_SHARED, + LOCAL, + PRIVATE, + MEM_BUFFER, + DEVICE_GLOBAL, +}; + +inline const char *ToString(MemoryType MemoryType) { + switch (MemoryType) { + case MemoryType::USM_DEVICE: + return "Device USM"; + case MemoryType::USM_HOST: + return "Host USM"; + case MemoryType::USM_SHARED: + return "Shared USM"; + case MemoryType::LOCAL: + return "Local Memory"; + case MemoryType::PRIVATE: + return "Private Memory"; + case MemoryType::MEM_BUFFER: + return "Memory Buffer"; + case MemoryType::DEVICE_GLOBAL: + return "Device Global"; + default: + return "Unknown Memory"; + } +} + +#if !defined(__SPIR__) && !defined(__SPIRV__) +} // namespace ur_sanitizer_layer +#endif // !__SPIR__ && !__SPIRV__ diff --git a/source/loader/layers/sanitizer/stacktrace.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp similarity index 92% rename from source/loader/layers/sanitizer/stacktrace.cpp rename to source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp index ed7f01c9cf..357eff4b77 100644 --- a/source/loader/layers/sanitizer/stacktrace.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.cpp @@ -6,12 +6,11 @@ * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file stacktrace.cpp + * @file sanitizer_stacktrace.cpp * */ -#include "stacktrace.hpp" -#include "asan_interceptor.hpp" +#include "sanitizer_stacktrace.hpp" #include "ur_sanitizer_layer.hpp" extern "C" { @@ -93,10 +92,9 @@ void StackTrace::print() const { BacktraceInfo BI = BacktraceSymbols[i]; // Skip runtime modules - if (!getContext()->interceptor->getOptions().Debug && - (Contains(BI, "libsycl.so") || Contains(BI, "libur_loader.so") || - Contains(BI, "libomptarget.rtl.unified_runtime.so") || - Contains(BI, "libomptarget.so"))) { + if (Contains(BI, "libsycl.so") || Contains(BI, "libur_loader.so") || + Contains(BI, "libomptarget.rtl.unified_runtime.so") || + Contains(BI, "libomptarget.so")) { continue; } diff --git a/source/loader/layers/sanitizer/stacktrace.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp similarity index 89% rename from source/loader/layers/sanitizer/stacktrace.hpp rename to source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp index 57811bba01..41443ee78d 100644 --- a/source/loader/layers/sanitizer/stacktrace.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp @@ -6,13 +6,13 @@ * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file stacktrace.hpp + * @file sanitizer_stacktrace.hpp * */ #pragma once -#include "common.hpp" +#include "sanitizer_common.hpp" #include diff --git a/source/loader/layers/sanitizer/ur_sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp similarity index 98% rename from source/loader/layers/sanitizer/ur_sanitizer_utils.cpp rename to source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index 53e4326ed4..900eae405b 100644 --- a/source/loader/layers/sanitizer/ur_sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -6,11 +6,12 @@ * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file ur_sanitizer_utils.cpp + * @file sanitizer_utils.cpp * */ -#include "ur_sanitizer_utils.hpp" +#include "sanitizer_utils.hpp" +#include "sanitizer_common/sanitizer_common.hpp" #include "ur_sanitizer_layer.hpp" namespace ur_sanitizer_layer { diff --git a/source/loader/layers/sanitizer/ur_sanitizer_utils.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp similarity index 95% rename from source/loader/layers/sanitizer/ur_sanitizer_utils.hpp rename to source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index a04886e5e5..6fcb05894e 100644 --- a/source/loader/layers/sanitizer/ur_sanitizer_utils.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -6,13 +6,17 @@ * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file ur_sanitizer_utils.hpp + * @file sanitizer_utils.hpp * */ #pragma once -#include "common.hpp" +#include "sanitizer_libdevice.hpp" +#include "ur_api.h" + +#include +#include namespace ur_sanitizer_layer { diff --git a/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp b/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp index b94235cdf0..d1e00c640c 100644 --- a/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp +++ b/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2024 Intel Corporation * * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. * See LICENSE.TXT @@ -11,7 +11,7 @@ */ #include "ur_sanitizer_layer.hpp" -#include "asan_interceptor.hpp" +#include "asan/asan_ddi.hpp" namespace ur_sanitizer_layer { context_t *getContext() { return context_t::get_direct(); } @@ -21,7 +21,17 @@ context_t::context_t() : logger(logger::create_logger("sanitizer", false, false, logger::Level::WARN)) {} -ur_result_t context_t::tearDown() { return UR_RESULT_SUCCESS; } +ur_result_t context_t::tearDown() { + switch (enabledType) { + case SanitizerType::AddressSanitizer: + destroyAsanInterceptor(); + break; + default: + break; + } + + return UR_RESULT_SUCCESS; +} /////////////////////////////////////////////////////////////////////////////// context_t::~context_t() {} diff --git a/source/loader/layers/sanitizer/ur_sanitizer_layer.hpp b/source/loader/layers/sanitizer/ur_sanitizer_layer.hpp index e7f704f8a8..55291db82a 100644 --- a/source/loader/layers/sanitizer/ur_sanitizer_layer.hpp +++ b/source/loader/layers/sanitizer/ur_sanitizer_layer.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Corporation + * Copyright (C) 2024 Intel Corporation * * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. * See LICENSE.TXT @@ -13,15 +13,12 @@ #pragma once #include "logger/ur_logger.hpp" -#include "ur/ur.hpp" #include "ur_proxy_layer.hpp" #define SANITIZER_COMP_NAME "sanitizer layer" namespace ur_sanitizer_layer { -class SanitizerInterceptor; - enum class SanitizerType { None, AddressSanitizer, @@ -35,7 +32,6 @@ class __urdlllocal context_t : public proxy_layer_context_t, public: ur_dditable_t urDdiTable = {}; logger::Logger logger; - std::unique_ptr interceptor; SanitizerType enabledType = SanitizerType::None; context_t(); @@ -52,4 +48,5 @@ class __urdlllocal context_t : public proxy_layer_context_t, }; context_t *getContext(); + } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 9cc18c66c4..64489c39ac 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -9099,6 +9099,60 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt +__urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + auto pfnEventsWaitWithBarrierExt = + getContext()->urDdiTable.Enqueue.pfnEventsWaitWithBarrierExt; + + if (nullptr == pfnEventsWaitWithBarrierExt) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_events_wait_with_barrier_ext_params_t params = { + &hQueue, &pProperties, &numEventsInWaitList, &phEventWaitList, + &phEvent}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT, + "urEnqueueEventsWaitWithBarrierExt", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueEventsWaitWithBarrierExt\n"); + + ur_result_t result = pfnEventsWaitWithBarrierExt( + hQueue, pProperties, numEventsInWaitList, phEventWaitList, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT, + "urEnqueueEventsWaitWithBarrierExt", ¶ms, + &result, instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT, + ¶ms); + logger.info(" <--- urEnqueueEventsWaitWithBarrierExt({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueNativeCommandExp __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( @@ -9609,6 +9663,11 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable( dditable.pfnWriteHostPipe = pDdiTable->pfnWriteHostPipe; pDdiTable->pfnWriteHostPipe = ur_tracing_layer::urEnqueueWriteHostPipe; + dditable.pfnEventsWaitWithBarrierExt = + pDdiTable->pfnEventsWaitWithBarrierExt; + pDdiTable->pfnEventsWaitWithBarrierExt = + ur_tracing_layer::urEnqueueEventsWaitWithBarrierExt; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index ef7bb019ea..195c1d3c69 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -518,7 +518,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName) { + if (UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -1108,6 +1108,10 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreate( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + if (pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype) { + return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; + } + if (pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type) { return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; } @@ -1507,7 +1511,7 @@ __urdlllocal ur_result_t UR_APICALL urMemGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_MEM_INFO_CONTEXT < propName) { + if (UR_MEM_INFO_REFERENCE_COUNT < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -10139,6 +10143,69 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt +__urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + auto pfnEventsWaitWithBarrierExt = + getContext()->urDdiTable.Enqueue.pfnEventsWaitWithBarrierExt; + + if (nullptr == pfnEventsWaitWithBarrierExt) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL != pProperties && + UR_EXP_ENQUEUE_EXT_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + ur_result_t result = pfnEventsWaitWithBarrierExt( + hQueue, pProperties, numEventsInWaitList, phEventWaitList, phEvent); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueNativeCommandExp __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( @@ -10657,6 +10724,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( dditable.pfnWriteHostPipe = pDdiTable->pfnWriteHostPipe; pDdiTable->pfnWriteHostPipe = ur_validation_layer::urEnqueueWriteHostPipe; + dditable.pfnEventsWaitWithBarrierExt = + pDdiTable->pfnEventsWaitWithBarrierExt; + pDdiTable->pfnEventsWaitWithBarrierExt = + ur_validation_layer::urEnqueueEventsWaitWithBarrierExt; + return result; } diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index c34bde6fd2..b5c3bde6ea 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -69,6 +69,7 @@ EXPORTS urEnqueueDeviceGlobalVariableWrite urEnqueueEventsWait urEnqueueEventsWaitWithBarrier + urEnqueueEventsWaitWithBarrierExt urEnqueueKernelLaunch urEnqueueKernelLaunchCustomExp urEnqueueMemBufferCopy @@ -266,6 +267,7 @@ EXPORTS urPrintEnqueueDeviceGlobalVariableReadParams urPrintEnqueueDeviceGlobalVariableWriteParams urPrintEnqueueEventsWaitParams + urPrintEnqueueEventsWaitWithBarrierExtParams urPrintEnqueueEventsWaitWithBarrierParams urPrintEnqueueKernelLaunchCustomExpParams urPrintEnqueueKernelLaunchParams @@ -310,6 +312,8 @@ EXPORTS urPrintExpCommandBufferUpdateMemobjArgDesc urPrintExpCommandBufferUpdatePointerArgDesc urPrintExpCommandBufferUpdateValueArgDesc + urPrintExpEnqueueExtFlags + urPrintExpEnqueueExtProperties urPrintExpEnqueueNativeCommandFlags urPrintExpEnqueueNativeCommandProperties urPrintExpExternalMemDesc diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 8333ee2fa4..778a5da065 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -69,6 +69,7 @@ urEnqueueDeviceGlobalVariableWrite; urEnqueueEventsWait; urEnqueueEventsWaitWithBarrier; + urEnqueueEventsWaitWithBarrierExt; urEnqueueKernelLaunch; urEnqueueKernelLaunchCustomExp; urEnqueueMemBufferCopy; @@ -266,6 +267,7 @@ urPrintEnqueueDeviceGlobalVariableReadParams; urPrintEnqueueDeviceGlobalVariableWriteParams; urPrintEnqueueEventsWaitParams; + urPrintEnqueueEventsWaitWithBarrierExtParams; urPrintEnqueueEventsWaitWithBarrierParams; urPrintEnqueueKernelLaunchCustomExpParams; urPrintEnqueueKernelLaunchParams; @@ -310,6 +312,8 @@ urPrintExpCommandBufferUpdateMemobjArgDesc; urPrintExpCommandBufferUpdatePointerArgDesc; urPrintExpCommandBufferUpdateValueArgDesc; + urPrintExpEnqueueExtFlags; + urPrintExpEnqueueExtProperties; urPrintExpEnqueueNativeCommandFlags; urPrintExpEnqueueNativeCommandProperties; urPrintExpExternalMemDesc; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index a67879a9eb..86a6ad95a0 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -9220,6 +9220,71 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt +__urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnEventsWaitWithBarrierExt = + dditable->ur.Enqueue.pfnEventsWaitWithBarrierExt; + if (nullptr == pfnEventsWaitWithBarrierExt) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = + pfnEventsWaitWithBarrierExt(hQueue, pProperties, numEventsInWaitList, + phEventWaitListLocal.data(), phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueNativeCommandExp __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( @@ -9712,6 +9777,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( ur_loader::urEnqueueDeviceGlobalVariableRead; pDdiTable->pfnReadHostPipe = ur_loader::urEnqueueReadHostPipe; pDdiTable->pfnWriteHostPipe = ur_loader::urEnqueueWriteHostPipe; + pDdiTable->pfnEventsWaitWithBarrierExt = + ur_loader::urEnqueueEventsWaitWithBarrierExt; } else { // return pointers directly to platform's DDIs *pDdiTable = diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 45ee5e7531..8dca26d4ba 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -870,7 +870,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -1541,6 +1541,7 @@ ur_result_t UR_APICALL urContextSetExtendedDeleter( /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR +/// + `pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype` /// + `pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type` /// + `pImageDesc && pImageDesc->numMipLevel != 0` /// + `pImageDesc && pImageDesc->numSamples != 0` @@ -1890,7 +1891,7 @@ ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hMemory` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_MEM_INFO_CONTEXT < propName` +/// + `::UR_MEM_INFO_REFERENCE_COUNT < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -9435,6 +9436,69 @@ ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a barrier command which waits a list of events to complete +/// before it completes, with optional extended properties +/// +/// @details +/// - If the event list is empty, it waits for all previously enqueued +/// commands to complete. +/// - It blocks command execution - any following commands enqueued after it +/// do not execute until it completes. +/// - It returns an event which can be waited on. +/// +/// @remarks +/// _Analogues_ +/// - **clEnqueueBarrierWithWaitList** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_EXT_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. + ) try { + auto pfnEventsWaitWithBarrierExt = + ur_lib::getContext()->urDdiTable.Enqueue.pfnEventsWaitWithBarrierExt; + if (nullptr == pfnEventsWaitWithBarrierExt) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnEventsWaitWithBarrierExt(hQueue, pProperties, numEventsInWaitList, + phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Immediately enqueue work through a native backend API /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 3a14d9a9de..d8206edb3f 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1069,6 +1069,22 @@ ur_result_t urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpEnqueueExtFlags(enum ur_exp_enqueue_ext_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpEnqueueExtProperties( + const struct ur_exp_enqueue_ext_properties_t params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpEnqueueNativeCommandFlags( enum ur_exp_enqueue_native_command_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { @@ -1746,6 +1762,14 @@ ur_result_t urPrintEnqueueKernelLaunchCustomExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintEnqueueEventsWaitWithBarrierExtParams( + const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintEnqueueCooperativeKernelLaunchExpParams( const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp index 7e53425000..5a23a88ba9 100644 --- a/source/ur/ur.hpp +++ b/source/ur/ur.hpp @@ -304,7 +304,8 @@ class UrReturnHelper { // Array return value where element type is differrent from T template - ur_result_t operator()(const T *t, size_t s) { + std::enable_if_t, ur_result_t> + operator()(const T *t, size_t s) { return ur::getInfoArray(s, param_value_size, param_value, param_value_size_ret, t); } diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 140a20135b..22c76f122e 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -771,7 +771,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -1342,6 +1342,7 @@ ur_result_t UR_APICALL urContextSetExtendedDeleter( /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR +/// + `pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype` /// + `pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type` /// + `pImageDesc && pImageDesc->numMipLevel != 0` /// + `pImageDesc && pImageDesc->numSamples != 0` @@ -1636,7 +1637,7 @@ ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hMemory` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_MEM_INFO_CONTEXT < propName` +/// + `::UR_MEM_INFO_REFERENCE_COUNT < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -8001,6 +8002,61 @@ ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a barrier command which waits a list of events to complete +/// before it completes, with optional extended properties +/// +/// @details +/// - If the event list is empty, it waits for all previously enqueued +/// commands to complete. +/// - It blocks command execution - any following commands enqueued after it +/// do not execute until it completes. +/// - It returns an event which can be waited on. +/// +/// @remarks +/// _Analogues_ +/// - **clEnqueueBarrierWithWaitList** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_EXT_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Immediately enqueue work through a native backend API /// diff --git a/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp b/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp index b3918c7818..065b3520b8 100644 --- a/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp +++ b/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp @@ -8,7 +8,7 @@ #include "ze_api.h" #include -using urLevelZeroKernelNativeHandleTest = uur::urContextTest; +using urLevelZeroKernelNativeHandleTest = uur::urQueueTest; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urLevelZeroKernelNativeHandleTest); TEST_P(urLevelZeroKernelNativeHandleTest, OwnedHandleRelease) { @@ -60,6 +60,13 @@ TEST_P(urLevelZeroKernelNativeHandleTest, OwnedHandleRelease) { ASSERT_SUCCESS(urKernelCreateWithNativeHandle( (ur_native_handle_t)native_kernel, context, program, &kprops, &kernel)); + size_t global_offset = 0; + size_t local_size = 1; + size_t global_size = 1; + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &global_offset, + &local_size, &global_size, 0, nullptr, + nullptr)); + ASSERT_SUCCESS(urKernelRelease(kernel)); ASSERT_SUCCESS(urProgramRelease(program)); } diff --git a/test/adapters/level_zero/v2/CMakeLists.txt b/test/adapters/level_zero/v2/CMakeLists.txt index f6fa03bd6a..f53cf15256 100644 --- a/test/adapters/level_zero/v2/CMakeLists.txt +++ b/test/adapters/level_zero/v2/CMakeLists.txt @@ -33,18 +33,24 @@ add_unittest(level_zero_command_list_cache ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/command_list_cache.cpp ) -add_unittest(level_zero_event_pool - event_pool_test.cpp - ${PROJECT_SOURCE_DIR}/source/ur/ur.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/adapter.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/device.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/platform.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool_cache.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_normal.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_counter.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event.cpp -) +if(CXX_HAS_CFI_SANITIZE) + message(WARNING "Level Zero V2 Event Pool tests are disabled when using CFI sanitizer") + message(NOTE "See https://github.com/oneapi-src/unified-runtime/issues/2324") +else() + add_unittest(level_zero_event_pool + event_pool_test.cpp + ${PROJECT_SOURCE_DIR}/source/ur/ur.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/adapter.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/device.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/platform.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool_cache.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_normal.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_counter.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/queue_api.cpp + ) +endif() add_adapter_test(level_zero_memory_residency FIXTURE DEVICES diff --git a/test/adapters/level_zero/v2/command_list_cache_test.cpp b/test/adapters/level_zero/v2/command_list_cache_test.cpp index fcaae55a96..3ddeaeff6e 100644 --- a/test/adapters/level_zero/v2/command_list_cache_test.cpp +++ b/test/adapters/level_zero/v2/command_list_cache_test.cpp @@ -40,13 +40,13 @@ TEST_P(CommandListCacheTest, CanStoreAndRetriveImmediateAndRegularCmdLists) { // get command lists from the cache for (int i = 0; i < numListsPerType; ++i) { - regCmdListOwners.emplace_back( - cache.getRegularCommandList(device->ZeDevice, IsInOrder, Ordinal)); + regCmdListOwners.emplace_back(cache.getRegularCommandList( + device->ZeDevice, IsInOrder, Ordinal, true)); auto [it, _] = regCmdLists.emplace(regCmdListOwners.back().get()); ASSERT_TRUE(*it != nullptr); immCmdListOwners.emplace_back(cache.getImmediateCommandList( - device->ZeDevice, IsInOrder, Ordinal, Mode, Priority)); + device->ZeDevice, IsInOrder, Ordinal, true, Mode, Priority)); std::tie(it, _) = immCmdLists.emplace(immCmdListOwners.back().get()); ASSERT_TRUE(*it != nullptr); } @@ -57,12 +57,12 @@ TEST_P(CommandListCacheTest, CanStoreAndRetriveImmediateAndRegularCmdLists) { // verify we get back the same command lists for (int i = 0; i < numListsPerType; ++i) { - auto regCmdList = - cache.getRegularCommandList(device->ZeDevice, IsInOrder, Ordinal); + auto regCmdList = cache.getRegularCommandList(device->ZeDevice, + IsInOrder, Ordinal, true); ASSERT_TRUE(regCmdList != nullptr); auto immCmdList = cache.getImmediateCommandList( - device->ZeDevice, IsInOrder, Ordinal, Mode, Priority); + device->ZeDevice, IsInOrder, Ordinal, true, Mode, Priority); ASSERT_TRUE(immCmdList != nullptr); ASSERT_EQ(regCmdLists.erase(regCmdList.get()), 1); @@ -103,7 +103,8 @@ TEST_P(CommandListCacheTest, ImmediateCommandListsHaveProperAttributes) { for (uint32_t Index = 0; Index < QueueGroupProperties[Ordinal].numQueues; Index++) { auto CommandList = cache.getImmediateCommandList( - device->ZeDevice, IsInOrder, Ordinal, Mode, Priority, Index); + device->ZeDevice, IsInOrder, Ordinal, true, Mode, Priority, + Index); ze_device_handle_t ZeDevice; auto Ret = @@ -133,8 +134,9 @@ TEST_P(CommandListCacheTest, ImmediateCommandListsHaveProperAttributes) { } // verify list creation without an index - auto CommandList = cache.getImmediateCommandList( - device->ZeDevice, IsInOrder, Ordinal, Mode, Priority, std::nullopt); + auto CommandList = + cache.getImmediateCommandList(device->ZeDevice, IsInOrder, Ordinal, + true, Mode, Priority, std::nullopt); ze_device_handle_t ZeDevice; auto Ret = zeCommandListGetDeviceHandle(CommandList.get(), &ZeDevice); @@ -207,7 +209,7 @@ TEST_P(CommandListCacheTest, CommandListsAreReusedByQueues) { } // Queues scope ASSERT_EQ(context->commandListCache.getNumImmediateCommandLists(), - NumUniqueQueueTypes * 2); // * 2 for compute and copy + NumUniqueQueueTypes); ASSERT_EQ(context->commandListCache.getNumRegularCommandLists(), 0); } } @@ -236,7 +238,7 @@ TEST_P(CommandListCacheTest, CommandListsCacheIsThreadSafe) { ASSERT_LE( context->commandListCache.getNumImmediateCommandLists(), - NumThreads * 2); // * 2 for compute and copy + NumThreads); } }); } @@ -246,5 +248,5 @@ TEST_P(CommandListCacheTest, CommandListsCacheIsThreadSafe) { } ASSERT_LE(context->commandListCache.getNumImmediateCommandLists(), - NumThreads * 2); + NumThreads); } diff --git a/test/adapters/level_zero/v2/event_pool_test.cpp b/test/adapters/level_zero/v2/event_pool_test.cpp index 9443e8fa7a..1029d471df 100644 --- a/test/adapters/level_zero/v2/event_pool_test.cpp +++ b/test/adapters/level_zero/v2/event_pool_test.cpp @@ -150,7 +150,8 @@ TEST_P(EventPoolTest, Basic) { { auto pool = cache->borrow(device->Id.value(), getParam().flags); - first = pool->allocate(); + first = pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH); zeFirst = first->getZeEvent(); urEventRelease(first); @@ -160,7 +161,8 @@ TEST_P(EventPoolTest, Basic) { { auto pool = cache->borrow(device->Id.value(), getParam().flags); - second = pool->allocate(); + second = pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH); zeSecond = second->getZeEvent(); urEventRelease(second); @@ -179,7 +181,9 @@ TEST_P(EventPoolTest, Threaded) { auto pool = cache->borrow(device->Id.value(), getParam().flags); std::vector events; for (int i = 0; i < 100; ++i) { - events.push_back(pool->allocate()); + events.push_back( + pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH)); } for (int i = 0; i < 100; ++i) { urEventRelease(events[i]); @@ -197,7 +201,9 @@ TEST_P(EventPoolTest, ProviderNormalUseMostFreePool) { auto pool = cache->borrow(device->Id.value(), getParam().flags); std::list events; for (int i = 0; i < 128; ++i) { - events.push_back(pool->allocate()); + events.push_back( + pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH)); } auto frontZeHandle = events.front()->getZeEvent(); for (int i = 0; i < 8; ++i) { @@ -205,7 +211,8 @@ TEST_P(EventPoolTest, ProviderNormalUseMostFreePool) { events.pop_front(); } for (int i = 0; i < 8; ++i) { - auto e = pool->allocate(); + auto e = pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH); events.push_back(e); } diff --git a/test/conformance/adapter/adapter_adapter_native_cpu.match b/test/conformance/adapter/adapter_adapter_native_cpu.match index 1335caf904..ea65399d2f 100644 --- a/test/conformance/adapter/adapter_adapter_native_cpu.match +++ b/test/conformance/adapter/adapter_adapter_native_cpu.match @@ -1,5 +1,5 @@ -{{NONDETERMINISTIC}} -urAdapterGetLastErrorTest.Success -urAdapterGetLastErrorTest.InvalidHandle -urAdapterGetLastErrorTest.InvalidMessagePtr -urAdapterGetLastErrorTest.InvalidErrorPtr +# These pass when the adapter is launched by the loader +{{OPT}}urAdapterGetLastErrorTest.Success +{{OPT}}urAdapterGetLastErrorTest.InvalidHandle +{{OPT}}urAdapterGetLastErrorTest.InvalidMessagePtr +{{OPT}}urAdapterGetLastErrorTest.InvalidErrorPtr diff --git a/test/conformance/context/context_adapter_level_zero.match b/test/conformance/context/context_adapter_level_zero.match index c36611b9a5..82c2e813e5 100644 --- a/test/conformance/context/context_adapter_level_zero.match +++ b/test/conformance/context/context_adapter_level_zero.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} -urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urContextSetExtendedDeleterTest.Success/* diff --git a/test/conformance/context/context_adapter_level_zero_v2.match b/test/conformance/context/context_adapter_level_zero_v2.match index 2e6ea80468..82c2e813e5 100644 --- a/test/conformance/context/context_adapter_level_zero_v2.match +++ b/test/conformance/context/context_adapter_level_zero_v2.match @@ -1 +1 @@ -urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ +{{OPT}}urContextSetExtendedDeleterTest.Success/* diff --git a/test/conformance/context/context_adapter_native_cpu.match b/test/conformance/context/context_adapter_native_cpu.match index 3f80da7c36..82c2e813e5 100644 --- a/test/conformance/context/context_adapter_native_cpu.match +++ b/test/conformance/context/context_adapter_native_cpu.match @@ -1 +1 @@ -urContextSetExtendedDeleterTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urContextSetExtendedDeleterTest.Success/* diff --git a/test/conformance/context/urContextCreate.cpp b/test/conformance/context/urContextCreate.cpp index 0f268a3992..0dc4341bd0 100644 --- a/test/conformance/context/urContextCreate.cpp +++ b/test/conformance/context/urContextCreate.cpp @@ -36,6 +36,17 @@ TEST_P(urContextCreateTest, InvalidNullPointerContext) { urContextCreate(1, &device, nullptr, nullptr)); } +TEST_P(urContextCreateTest, InvalidEnumeration) { + auto device = GetParam(); + + ur_context_properties_t properties{UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES, + nullptr, UR_CONTEXT_FLAGS_MASK}; + uur::raii::Context context = nullptr; + + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION, + urContextCreate(1, &device, &properties, context.ptr())); +} + using urContextCreateMultiDeviceTest = uur::urAllDevicesTest; TEST_F(urContextCreateMultiDeviceTest, Success) { if (devices.size() < 2) { diff --git a/test/conformance/context/urContextCreateWithNativeHandle.cpp b/test/conformance/context/urContextCreateWithNativeHandle.cpp index 9b1c61f14a..599f402f77 100644 --- a/test/conformance/context/urContextCreateWithNativeHandle.cpp +++ b/test/conformance/context/urContextCreateWithNativeHandle.cpp @@ -11,10 +11,8 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urContextCreateWithNativeHandleTest); TEST_P(urContextCreateWithNativeHandleTest, Success) { ur_native_handle_t native_context = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urContextGetNativeHandle(context, &native_context)); - } + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetNativeHandle(context, &native_context)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -35,10 +33,9 @@ TEST_P(urContextCreateWithNativeHandleTest, Success) { TEST_P(urContextCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { ur_native_handle_t native_context = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urContextGetNativeHandle(context, &native_context)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetNativeHandle(context, &native_context)); ur_context_handle_t ctx = nullptr; ur_context_native_properties_t props{ @@ -50,10 +47,9 @@ TEST_P(urContextCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { TEST_P(urContextCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { ur_native_handle_t native_context = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urContextGetNativeHandle(context, &native_context)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetNativeHandle(context, &native_context)); ur_context_handle_t ctx = nullptr; ur_context_native_properties_t props{ diff --git a/test/conformance/context/urContextGetInfo.cpp b/test/conformance/context/urContextGetInfo.cpp index f9f699d511..46bc2cd179 100644 --- a/test/conformance/context/urContextGetInfo.cpp +++ b/test/conformance/context/urContextGetInfo.cpp @@ -17,6 +17,24 @@ struct urContextGetInfoTestWithInfoParam {UR_CONTEXT_INFO_REFERENCE_COUNT, sizeof(uint32_t)}, {UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, sizeof(bool)}, {UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, sizeof(bool)}, + {UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, + sizeof(ur_memory_order_capability_flags_t)}, + {UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, + sizeof(ur_memory_order_capability_flags_t)}, + {UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, + sizeof(ur_memory_order_capability_flags_t)}, + {UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, + sizeof(ur_memory_order_capability_flags_t)}}; + + ctx_info_mem_flags_map = { + {UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, + UR_MEMORY_ORDER_CAPABILITY_FLAGS_MASK}, + {UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, + UR_MEMORY_SCOPE_CAPABILITY_FLAGS_MASK}, + {UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, + UR_MEMORY_ORDER_CAPABILITY_FLAGS_MASK}, + {UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, + UR_MEMORY_SCOPE_CAPABILITY_FLAGS_MASK}, }; } @@ -26,24 +44,30 @@ struct urContextGetInfoTestWithInfoParam } std::unordered_map ctx_info_size_map; + std::unordered_map + ctx_info_mem_flags_map; }; UUR_TEST_SUITE_P(urContextGetInfoTestWithInfoParam, ::testing::Values( - UR_CONTEXT_INFO_NUM_DEVICES, // - UR_CONTEXT_INFO_DEVICES, // - UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, // - UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, // - UR_CONTEXT_INFO_REFERENCE_COUNT // - + UR_CONTEXT_INFO_NUM_DEVICES, // + UR_CONTEXT_INFO_DEVICES, // + UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, // + UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, // + UR_CONTEXT_INFO_REFERENCE_COUNT, // + UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, // + UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, // + UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, // + UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES // ), uur::deviceTestWithParamPrinter); TEST_P(urContextGetInfoTestWithInfoParam, Success) { ur_context_info_t info = getParam(); size_t info_size = 0; - ASSERT_SUCCESS(urContextGetInfo(context, info, 0, nullptr, &info_size)); + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetInfo(context, info, 0, nullptr, &info_size)); ASSERT_NE(info_size, 0); if (const auto expected_size = ctx_info_size_map.find(info); diff --git a/test/conformance/context/urContextSetExtendedDeleter.cpp b/test/conformance/context/urContextSetExtendedDeleter.cpp index 99e72ecaa0..ad09ac2179 100644 --- a/test/conformance/context/urContextSetExtendedDeleter.cpp +++ b/test/conformance/context/urContextSetExtendedDeleter.cpp @@ -21,7 +21,8 @@ TEST_P(urContextSetExtendedDeleterTest, Success) { *static_cast(userdata) = true; }; - ASSERT_SUCCESS(urContextSetExtendedDeleter(context, deleter, &called)); + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextSetExtendedDeleter(context, deleter, &called)); } ASSERT_TRUE(called); } diff --git a/test/conformance/cts_exe.py b/test/conformance/cts_exe.py index 84a5d6e031..355b12446c 100755 --- a/test/conformance/cts_exe.py +++ b/test/conformance/cts_exe.py @@ -122,6 +122,10 @@ def _run_cmd(cmd, comment, filter): parser.add_argument("rest", nargs=argparse.REMAINDER) args = parser.parse_args() + if args.test_command is None or args.failslist is None: + print("Usage: cts_exe.py --test_command (test binary) --failslist (match file) -- (test arguments)") + sys.exit(1) + base_invocation = [args.test_command] + args.rest if os.environ.get("GTEST_OUTPUT") is not None: @@ -142,12 +146,9 @@ def _run_cmd(cmd, comment, filter): for l in f: optional = "{{OPT}}" in l l = l.replace("{{OPT}}", "") - l = l.replace("{{.*}}", "*") if l.startswith("#"): continue - if l.startswith("{{NONDETERMINISTIC}}"): - continue if l.strip() == "": continue diff --git a/test/conformance/device/device_adapter_cuda.match b/test/conformance/device/device_adapter_cuda.match index ff961cc6f5..48e00debe4 100644 --- a/test/conformance/device/device_adapter_cuda.match +++ b/test/conformance/device/device_adapter_cuda.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/device/device_adapter_hip.match b/test/conformance/device/device_adapter_hip.match index ff961cc6f5..48e00debe4 100644 --- a/test/conformance/device/device_adapter_hip.match +++ b/test/conformance/device/device_adapter_hip.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/device/device_adapter_level_zero.match b/test/conformance/device/device_adapter_level_zero.match index ff961cc6f5..48e00debe4 100644 --- a/test/conformance/device/device_adapter_level_zero.match +++ b/test/conformance/device/device_adapter_level_zero.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/device/device_adapter_level_zero_v2.match b/test/conformance/device/device_adapter_level_zero_v2.match index 2b9ecbef70..87140fb10c 100644 --- a/test/conformance/device/device_adapter_level_zero_v2.match +++ b/test/conformance/device/device_adapter_level_zero_v2.match @@ -1,3 +1,2 @@ -{{NONDETERMINISTIC}} {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime {{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GLOBAL_MEM_FREE diff --git a/test/conformance/device/device_adapter_native_cpu.match b/test/conformance/device/device_adapter_native_cpu.match index 2129478fb8..00b3642e71 100644 --- a/test/conformance/device/device_adapter_native_cpu.match +++ b/test/conformance/device/device_adapter_native_cpu.match @@ -1,4 +1,3 @@ -{{NONDETERMINISTIC}} urDeviceCreateWithNativeHandleTest.InvalidNullHandlePlatform urDeviceCreateWithNativeHandleTest.InvalidNullPointerDevice {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/device/urDeviceCreateWithNativeHandle.cpp b/test/conformance/device/urDeviceCreateWithNativeHandle.cpp index 8cffc72cf1..d72435c274 100644 --- a/test/conformance/device/urDeviceCreateWithNativeHandle.cpp +++ b/test/conformance/device/urDeviceCreateWithNativeHandle.cpp @@ -9,10 +9,9 @@ using urDeviceCreateWithNativeHandleTest = uur::urAllDevicesTest; TEST_F(urDeviceCreateWithNativeHandleTest, Success) { for (auto device : devices) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urDeviceGetNativeHandle(device, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetNativeHandle(device, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -32,10 +31,9 @@ TEST_F(urDeviceCreateWithNativeHandleTest, Success) { TEST_F(urDeviceCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { for (auto device : devices) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urDeviceGetNativeHandle(device, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetNativeHandle(device, &native_handle)); ur_device_handle_t dev = nullptr; ur_device_native_properties_t props{ @@ -49,10 +47,9 @@ TEST_F(urDeviceCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { TEST_F(urDeviceCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { for (auto device : devices) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urDeviceGetNativeHandle(device, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetNativeHandle(device, &native_handle)); ur_device_handle_t dev = nullptr; ur_device_native_properties_t props{ diff --git a/test/conformance/device/urDevicePartition.cpp b/test/conformance/device/urDevicePartition.cpp index 2b2939066d..cdba0d9d1c 100644 --- a/test/conformance/device/urDevicePartition.cpp +++ b/test/conformance/device/urDevicePartition.cpp @@ -300,3 +300,60 @@ TEST_F(urDevicePartitionTest, SuccessSubSet) { } } } + +using urDevicePartitionByCountsTestWithParam = + urDevicePartitionTestWithParam>; +TEST_P(urDevicePartitionByCountsTestWithParam, CountsOrdering) { + ur_device_handle_t device = devices[0]; + + if (!uur::hasDevicePartitionSupport(device, + UR_DEVICE_PARTITION_BY_COUNTS)) { + GTEST_SKIP() << "Device \'" << device + << "\' does not support partitioning by counts\n"; + } + + auto requested_counts = GetParam(); + + std::vector property_list; + for (size_t i = 0; i < requested_counts.size(); ++i) { + property_list.push_back( + uur::makePartitionByCountsDesc(requested_counts[i])); + } + + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, nullptr, + property_list.data(), property_list.size()}; + + uint32_t num_sub_devices = 0; + urDevicePartition(device, &properties, 0, nullptr, &num_sub_devices); + + std::vector sub_devices(num_sub_devices); + urDevicePartition(device, &properties, num_sub_devices, sub_devices.data(), + nullptr); + + std::vector actual_counts; + for (const auto &sub_device : sub_devices) { + uint32_t n_compute_units = 0; + getNumberComputeUnits(sub_device, n_compute_units); + actual_counts.push_back(n_compute_units); + urDeviceRelease(sub_device); + } + + ASSERT_EQ(requested_counts, actual_counts); +} + +INSTANTIATE_TEST_SUITE_P( + , urDevicePartitionByCountsTestWithParam, + ::testing::Values(std::vector{2, 4}, std::vector{1, 4}, + std::vector{2, 3}, std::vector{3, 2}, + std::vector{3, 1}), + [](const ::testing::TestParamInfo> &info) { + std::stringstream ss; + for (size_t i = 0; i < info.param.size(); ++i) { + if (i > 0) { + ss << "_"; + } + ss << info.param[i]; + } + return ss.str(); + }); diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 2120d26bf3..1621b01544 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -162,6 +162,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/sequence.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/linker_error.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm_local_mem.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/saxpy_usm_local_mem.cpp b/test/conformance/device_code/saxpy_usm_local_mem.cpp new file mode 100644 index 0000000000..7ef17e59b5 --- /dev/null +++ b/test/conformance/device_code/saxpy_usm_local_mem.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + size_t local_size = 4; + sycl::queue sycl_queue; + uint32_t *X = sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Y = sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Z = sycl::malloc_shared(array_size, sycl_queue); + uint32_t A = 42; + + sycl_queue.submit([&](sycl::handler &cgh) { + sycl::local_accessor local_mem(local_size, cgh); + cgh.parallel_for( + sycl::nd_range<1>{{array_size}, {local_size}}, + [=](sycl::nd_item<1> itemId) { + auto i = itemId.get_global_linear_id(); + auto local_id = itemId.get_local_linear_id(); + local_mem[local_id] = i; + Z[i] = A * X[i] + Y[i] + local_mem[local_id] + + itemId.get_local_range(0); + }); + }); + return 0; +} diff --git a/test/conformance/enqueue/enqueue_adapter_cuda.match b/test/conformance/enqueue/enqueue_adapter_cuda.match index 40de7158d0..8aa9600f5e 100644 --- a/test/conformance/enqueue/enqueue_adapter_cuda.match +++ b/test/conformance/enqueue/enqueue_adapter_cuda.match @@ -1,11 +1,10 @@ -{{NONDETERMINISTIC}} -urEnqueueKernelLaunchTest.InvalidKernelArgs/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueKernelLaunchKernelSubGroupTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueMemBufferMapTestWithWriteFlagParam.SuccessWrite/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MAP_FLAG_WRITE_INVALIDATE_REGION -urEnqueueUSMAdviseWithParamTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ADVICE_FLAG_DEFAULT -urEnqueueUSMAdviseTest.MultipleParamsSuccess/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueUSMPrefetchWithParamTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueTimestampRecordingExpTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueTimestampRecordingExpTest.SuccessBlocking/NVIDIA_CUDA_BACKEND___{{.*}}_ +urEnqueueKernelLaunchTest.InvalidKernelArgs/* +urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/* +urEnqueueKernelLaunchKernelSubGroupTest.Success/* +urEnqueueMemBufferMapTestWithWriteFlagParam.SuccessWrite/*__UR_MAP_FLAG_WRITE_INVALIDATE_REGION +urEnqueueUSMAdviseWithParamTest.Success/*__UR_USM_ADVICE_FLAG_DEFAULT +urEnqueueUSMAdviseTest.MultipleParamsSuccess/* +urEnqueueUSMPrefetchWithParamTest.Success/*__UR_USM_MIGRATION_FLAG_DEFAULT +urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/*__UR_USM_MIGRATION_FLAG_DEFAULT +urEnqueueTimestampRecordingExpTest.Success/* +urEnqueueTimestampRecordingExpTest.SuccessBlocking/* diff --git a/test/conformance/enqueue/enqueue_adapter_hip.match b/test/conformance/enqueue/enqueue_adapter_hip.match index b841a25cf4..c59d228ca8 100644 --- a/test/conformance/enqueue/enqueue_adapter_hip.match +++ b/test/conformance/enqueue/enqueue_adapter_hip.match @@ -1,22 +1,21 @@ -{{NONDETERMINISTIC}} # HIP can't check kernel arguments -urEnqueueKernelLaunchTest.InvalidKernelArgs/AMD_HIP_BACKEND___{{.*}}_ -urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/AMD_HIP_BACKEND___{{.*}}_ -urEnqueueKernelLaunchKernelSubGroupTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urEnqueueKernelLaunchUSMLinkedList.Success/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___copy_row_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___copy_3d_2d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___write_row_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___write_3d_2d +urEnqueueKernelLaunchTest.InvalidKernelArgs/* +urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/* +urEnqueueKernelLaunchKernelSubGroupTest.Success/* +urEnqueueKernelLaunchUSMLinkedList.Success/*__UsePoolEnabled +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_row_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d_2d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_row_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_2d # HIP doesn't ignore unsupported USM advice or prefetching. Instead of # returning UR_RESULT_SUCCESS as per the spec, it instead returns # UR_RESULT_ERROR_ADAPTER_SPECIFIC to issue a warning. These tests will fail # until this is rectified. -urEnqueueUSMAdviseWithParamTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_ADVICE_FLAG_DEFAULT -urEnqueueUSMAdviseTest.MultipleParamsSuccess/AMD_HIP_BACKEND___{{.*}}_ -urEnqueueUSMPrefetchWithParamTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/AMD_HIP_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT +urEnqueueUSMAdviseWithParamTest.Success/*__UR_USM_ADVICE_FLAG_DEFAULT +urEnqueueUSMAdviseTest.MultipleParamsSuccess/* +urEnqueueUSMPrefetchWithParamTest.Success/*__UR_USM_MIGRATION_FLAG_DEFAULT +urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/*__UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueTimestampRecordingExpTest.Success/AMD_HIP_BACKEND___{{.*}} -urEnqueueTimestampRecordingExpTest.SuccessBlocking/AMD_HIP_BACKEND___{{.*}} +urEnqueueTimestampRecordingExpTest.Success/* +urEnqueueTimestampRecordingExpTest.SuccessBlocking/* diff --git a/test/conformance/enqueue/enqueue_adapter_level_zero.match b/test/conformance/enqueue/enqueue_adapter_level_zero.match index 5a9ce841b8..9394f1b0a0 100644 --- a/test/conformance/enqueue/enqueue_adapter_level_zero.match +++ b/test/conformance/enqueue/enqueue_adapter_level_zero.match @@ -1,26 +1,23 @@ -# Note: This file is only for use with cts_exe.py -{{OPT}}urEnqueueEventsWaitTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_2d_3d -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d_2d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_non_zero_offsets_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_different_buffer_sizes_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_column_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d +{{OPT}}urEnqueueEventsWaitTest.Success/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/* +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/* +{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/* +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_2d_3d +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d_2d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_non_zero_offsets_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_different_buffer_sizes_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_column_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d_with_offsets +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_2d_3d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d_2d {{OPT}}urEnqueueMemBufferMapMultiDeviceTest.* -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_with_offsets +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_2d_3d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_2d +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/* {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest/* {{OPT}}urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependencies/* {{OPT}}urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesBarrierOnly/* diff --git a/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match b/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match index c72bf51398..44895d10fa 100644 --- a/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match +++ b/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match @@ -1,104 +1,85 @@ -{{NONDETERMINISTIC}} -urEnqueueKernelLaunchTest.InvalidKernelArgs/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchWithVirtualMemory.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UseEventsEnabled -{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UseEventsDisabled +urEnqueueKernelLaunchTest.InvalidKernelArgs/* +urEnqueueKernelLaunchKernelWgSizeTest.Success/* +urEnqueueKernelLaunchWithVirtualMemory.Success/* +{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/*__UseEventsEnabled +{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/*__UseEventsDisabled {{OPT}}urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest.Success/UseEventsNoQueuePerThread {{OPT}}urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest.Success/NoUseEventsNoQueuePerThread -{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_whole_buffer_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_non_zero_offsets_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_different_buffer_sizes_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_column_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_row_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d_with_offsets -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_2d_3d -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d_2d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_whole_buffer_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_non_zero_offsets_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_different_buffer_sizes_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_column_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_row_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_whole_buffer_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_non_zero_offsets_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_different_buffer_sizes_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_column_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_row_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d -{{OPT}}urEnqueueMemImageCopyTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageReadTest.Success1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.Success2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.Success3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidRegion1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidRegion2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidRegion3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.Success1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.Success2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.Success3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMFill2DNegativeTest.OutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMAdviseTest.InvalidSizeTooLarge/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidNullHandleProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidNullPointerBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidNullHandleProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidNullPointerPipeSymbol/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueTimestampRecordingExpTest.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/* +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_whole_buffer_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_non_zero_offsets_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_different_buffer_sizes_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_column_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_row_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d_with_offsets +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_2d_3d +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d_2d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_whole_buffer_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_non_zero_offsets_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_different_buffer_sizes_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_column_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_row_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d_with_offsets +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_2d_3d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d_2d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_whole_buffer_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_non_zero_offsets_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_different_buffer_sizes_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_column_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_row_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_with_offsets +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_2d_3d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_2d +{{OPT}}urEnqueueMemImageCopyTest.Success/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/* +{{OPT}}urEnqueueMemImageReadTest.Success1D/* +{{OPT}}urEnqueueMemImageReadTest.Success2D/* +{{OPT}}urEnqueueMemImageReadTest.Success3D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueMemImageReadTest.InvalidNullHandleImage/* +{{OPT}}urEnqueueMemImageReadTest.InvalidNullPointerDst/* +{{OPT}}urEnqueueMemImageReadTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion1D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion2D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion3D/* +{{OPT}}urEnqueueMemImageWriteTest.Success1D/* +{{OPT}}urEnqueueMemImageWriteTest.Success2D/* +{{OPT}}urEnqueueMemImageWriteTest.Success3D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidNullHandleImage/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidNullPointerSrc/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin1D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin2D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin3D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion1D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion2D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion3D/* +{{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/* +urEnqueueUSMFill2DNegativeTest.OutOfBounds/* +urEnqueueUSMAdviseTest.InvalidSizeTooLarge/* +urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/* +urEnqueueReadHostPipeTest.InvalidNullHandleQueue/* +urEnqueueReadHostPipeTest.InvalidNullHandleProgram/* +urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/* +urEnqueueReadHostPipeTest.InvalidNullPointerBuffer/* +urEnqueueReadHostPipeTest.InvalidEventWaitList/* +urEnqueueWriteHostPipeTest.InvalidNullHandleQueue/* +urEnqueueWriteHostPipeTest.InvalidNullHandleProgram/* +urEnqueueWriteHostPipeTest.InvalidNullPointerPipeSymbol/* +urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/* +urEnqueueWriteHostPipeTest.InvalidEventWaitList/* +{{OPT}}urEnqueueTimestampRecordingExpTest.SuccessBlocking/* diff --git a/test/conformance/enqueue/enqueue_adapter_native_cpu.match b/test/conformance/enqueue/enqueue_adapter_native_cpu.match index bf0ecdee39..18abf6abfe 100644 --- a/test/conformance/enqueue/enqueue_adapter_native_cpu.match +++ b/test/conformance/enqueue/enqueue_adapter_native_cpu.match @@ -1,390 +1,154 @@ -# Note: This file is only for use with cts_exe.py {{OPT}}urEnqueueEventsWaitMultiDeviceTest.EmptyWaitList {{OPT}}urEnqueueEventsWaitMultiDeviceTest.EmptyWaitListWithEvent {{OPT}}urEnqueueEventsWaitMultiDeviceTest.EnqueueWaitOnADifferentQueue -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullPointerName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullPointerDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitListNullEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitListZeroSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitInvalidEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullPointerName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullPointerSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitListNullEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitListZeroSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitInvalidEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueEventsWaitTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueEventsWaitTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleProgram/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullPointerName/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullPointerDst/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitListNullEvents/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitListZeroSize/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitInvalidEvent/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullHandleProgram/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullPointerName/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullPointerSrc/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitListNullEvents/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitListZeroSize/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitInvalidEvent/* +{{OPT}}urEnqueueEventsWaitTest.Success/* +{{OPT}}urEnqueueEventsWaitTest.InvalidNullPtrEventWaitList/* {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitSingleQueueMultiOps/MultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitSingleQueueMultiOps/NoMultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueues/MultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueues/NoMultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueuesCommonDependency/MultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueuesCommonDependency/NoMultiThread -{{OPT}}urEnqueueEventsWaitWithBarrierTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueEventsWaitWithBarrierTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesBarrierOnly/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ -urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesLaunchOnly/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ -urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependencies/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ -urEnqueueEventsWaitWithBarrierOrderingTest.SuccessNonEventDependencies/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ -{{OPT}}urEnqueueKernelLaunchTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkDimension/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkGroupSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.SuccessWithExplicitLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelStandardTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_31 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_1027 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_32 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_256 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_1_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_31_7 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_1027_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_1_32 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_256_79 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_1_1_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_31_7_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_1027_1_19 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_1_53_19 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_256_79_8 -{{OPT}}urEnqueueKernelLaunchWithVirtualMemory.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchWithUSM.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled -{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000 -{{OPT}}urEnqueueMemBufferFillNegativeTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadRectTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemImageCopyTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__1__patternSize__1 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__256 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__1024__patternSize__256 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__4 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__8 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__16 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__32 -{{OPT}}urEnqueueUSMFillNegativeTest.InvalidNullQueueHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.InvalidNullPtr/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.OutOfBounds/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.invalidPatternSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullQueueHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullPtr/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidPitch/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidWidth/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidHeight/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.invalidPatternSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseWithParamTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ADVICE_FLAG_DEFAULT -{{OPT}}urEnqueueUSMAdviseTest.MultipleParamsSuccess/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidNullPointerMem/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.NonCoherentDeviceMemorySuccessOrWarning/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.Blocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.BlockingWithEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.NonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.WaitForDependencies/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullQueueHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMPrefetchWithParamTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_MIGRATION_FLAG_DEFAULT -{{OPT}}urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_MIGRATION_FLAG_DEFAULT -{{OPT}}urEnqueueUSMPrefetchTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidNullPointerMem/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidNullPointerBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullPointerPipeSymbol/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urEnqueueEventsWaitWithBarrierTest.Success/* +{{OPT}}urEnqueueEventsWaitWithBarrierTest.InvalidNullPtrEventWaitList/* +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesBarrierOnly/*_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesLaunchOnly/*_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependencies/*_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessNonEventDependencies/*_ +{{OPT}}urEnqueueKernelLaunchTest.Success/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleKernel/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkDimension/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkGroupSize/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/* +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/* +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.SuccessWithExplicitLocalSize/* +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/* +{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/* +{{OPT}}urEnqueueKernelLaunchKernelStandardTest.Success/* +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_31 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_1027 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_32 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_256 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_1_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_31_7 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_1027_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_1_32 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_256_79 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_1_1_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_31_7_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_1027_1_19 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_1_53_19 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_256_79_8 +{{OPT}}urEnqueueKernelLaunchWithVirtualMemory.Success/* +{{OPT}}urEnqueueKernelLaunchWithUSM.Success/* +{{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/* +{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/*__UsePoolEnabled +{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/*__UsePoolDisabled +{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/* +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/*__1024 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/*__2500 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/*__4096 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/*__6000 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/*__1024 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/*__2500 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/*__4096 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/*__6000 +{{OPT}}urEnqueueMemBufferFillNegativeTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/* +{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/* +{{OPT}}urEnqueueMemBufferReadRectTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/* +{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/* +{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/* +{{OPT}}urEnqueueMemImageCopyTest.Success/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/* +{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueUSMFillTestWithParam.Success/* +{{OPT}}urEnqueueUSMFillNegativeTest.InvalidNullQueueHandle/* +{{OPT}}urEnqueueUSMFillNegativeTest.InvalidNullPtr/* +{{OPT}}urEnqueueUSMFillNegativeTest.InvalidSize/* +{{OPT}}urEnqueueUSMFillNegativeTest.OutOfBounds/* +{{OPT}}urEnqueueUSMFillNegativeTest.invalidPatternSize/* +{{OPT}}urEnqueueUSMFillNegativeTest.InvalidEventWaitList/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullQueueHandle/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullPtr/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidPitch/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidWidth/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidHeight/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidSize/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.invalidPatternSize/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueUSMAdviseWithParamTest.Success/*__UR_USM_ADVICE_FLAG_DEFAULT +{{OPT}}urEnqueueUSMAdviseTest.MultipleParamsSuccess/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidNullPointerMem/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidEnumeration/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeZero/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/* +{{OPT}}urEnqueueUSMAdviseTest.NonCoherentDeviceMemorySuccessOrWarning/* +{{OPT}}urEnqueueUSMMemcpyTest.Blocking/* +{{OPT}}urEnqueueUSMMemcpyTest.BlockingWithEvent/* +{{OPT}}urEnqueueUSMMemcpyTest.NonBlocking/* +{{OPT}}urEnqueueUSMMemcpyTest.WaitForDependencies/* +{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullQueueHandle/* +{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullDst/* +{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullSrc/* +{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/* +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/* +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/*__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/*__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/*__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/*__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE +{{OPT}}urEnqueueUSMPrefetchWithParamTest.Success/*__UR_USM_MIGRATION_FLAG_DEFAULT +{{OPT}}urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/*__UR_USM_MIGRATION_FLAG_DEFAULT +{{OPT}}urEnqueueUSMPrefetchTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidNullPointerMem/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidEnumeration/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeZero/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidEventWaitList/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidNullHandleProgram/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidNullPointerBuffer/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidEventWaitList/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullHandleProgram/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullPointerPipeSymbol/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidEventWaitList/* urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest.Success/* urEnqueueKernelLaunchIncrementMultiDeviceTest.Success/* urEnqueueKernelLaunchIncrementTest.Success/* diff --git a/test/conformance/enqueue/enqueue_adapter_opencl.match b/test/conformance/enqueue/enqueue_adapter_opencl.match new file mode 100644 index 0000000000..7f0e36cd6c --- /dev/null +++ b/test/conformance/enqueue/enqueue_adapter_opencl.match @@ -0,0 +1,3 @@ +# Note: This file is only for use with cts_exe.py +# Fails when -fsanitize=cfi +{{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueues/MultiThread diff --git a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp index fcb244e94a..29123b57bd 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp @@ -34,9 +34,11 @@ struct urEnqueueUSMFill2DTestWithParam UUR_RETURN_ON_FATAL_FAILURE(urQueueTestWithParam::SetUp()); bool memfill2d_support = false; - ASSERT_SUCCESS(urContextGetInfo( + [[maybe_unused]] ur_result_t result = urContextGetInfo( context, UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, - sizeof(memfill2d_support), &memfill2d_support, nullptr)); + sizeof(memfill2d_support), &memfill2d_support, nullptr); + ASSERT_TRUE(result == UR_RESULT_SUCCESS || + result == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); if (!memfill2d_support) { GTEST_SKIP() << "2D USM mem fill is not supported"; } diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp index b1f7e23b66..e12e79a295 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp @@ -28,9 +28,11 @@ struct urEnqueueUSMMemcpy2DTestWithParam } bool memcpy2d_support = false; - ASSERT_SUCCESS(urContextGetInfo( + [[maybe_unused]] ur_result_t result = urContextGetInfo( context, UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, - sizeof(memcpy2d_support), &memcpy2d_support, nullptr)); + sizeof(memcpy2d_support), &memcpy2d_support, nullptr); + ASSERT_TRUE(result == UR_RESULT_SUCCESS || + result == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); if (!memcpy2d_support) { GTEST_SKIP() << "2D USM memcpy is not supported"; } diff --git a/test/conformance/event/event_adapter_cuda.match b/test/conformance/event/event_adapter_cuda.match index d9e14551da..daa3c281bc 100644 --- a/test/conformance/event/event_adapter_cuda.match +++ b/test/conformance/event/event_adapter_cuda.match @@ -1,7 +1,6 @@ -{{NONDETERMINISTIC}} -urEventGetProfilingInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_PROFILING_INFO_COMMAND_COMPLETE -urEventGetProfilingInfoWithTimingComparisonTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackTest.ValidateParameters/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackTest.AllStates/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackTest.EventAlreadyCompleted/NVIDIA_CUDA_BACKEND___{{.*}}_ +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_COMPLETE +urEventGetProfilingInfoWithTimingComparisonTest.Success/* +urEventSetCallbackTest.Success/* +urEventSetCallbackTest.ValidateParameters/* +urEventSetCallbackTest.AllStates/* +urEventSetCallbackTest.EventAlreadyCompleted/* diff --git a/test/conformance/event/event_adapter_hip.match b/test/conformance/event/event_adapter_hip.match index 6bc909c5fd..daa3c281bc 100644 --- a/test/conformance/event/event_adapter_hip.match +++ b/test/conformance/event/event_adapter_hip.match @@ -1,7 +1,6 @@ -{{NONDETERMINISTIC}} -urEventGetProfilingInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROFILING_INFO_COMMAND_COMPLETE -urEventGetProfilingInfoWithTimingComparisonTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackTest.ValidateParameters/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackTest.AllStates/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackTest.EventAlreadyCompleted/AMD_HIP_BACKEND___{{.*}}_ +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_COMPLETE +urEventGetProfilingInfoWithTimingComparisonTest.Success/* +urEventSetCallbackTest.Success/* +urEventSetCallbackTest.ValidateParameters/* +urEventSetCallbackTest.AllStates/* +urEventSetCallbackTest.EventAlreadyCompleted/* diff --git a/test/conformance/event/event_adapter_level_zero.match b/test/conformance/event/event_adapter_level_zero.match index 5adfbd0fd9..e7fe14c234 100644 --- a/test/conformance/event/event_adapter_level_zero.match +++ b/test/conformance/event/event_adapter_level_zero.match @@ -1,9 +1,8 @@ -# Note: This file is only for use with cts_exe.py -{{OPT}}urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_TYPE -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_COMPLETE -{{OPT}}urEventGetProfilingInfoWithTimingComparisonTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urEventGetInfoTest.Success/*__UR_EVENT_INFO_COMMAND_TYPE +{{OPT}}urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_QUEUED +{{OPT}}urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_SUBMIT +{{OPT}}urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_COMPLETE +{{OPT}}urEventGetProfilingInfoWithTimingComparisonTest.Success/* urEventCreateWithNativeHandleTest.Success/* urEventSetCallbackTest.AllStates/* urEventSetCallbackTest.EventAlreadyCompleted/* diff --git a/test/conformance/event/event_adapter_level_zero_v2.match b/test/conformance/event/event_adapter_level_zero_v2.match index 911e7b6783..dd70962041 100644 --- a/test/conformance/event/event_adapter_level_zero_v2.match +++ b/test/conformance/event/event_adapter_level_zero_v2.match @@ -1,14 +1,7 @@ -{{NONDETERMINISTIC}} -urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_QUEUE -urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_CONTEXT -urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_TYPE -urEventGetInfoNegativeTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetInfoNegativeTest.InvalidSizePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetInfoNegativeTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT -urEventGetProfilingInfoWithTimingComparisonTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackTest.ValidateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackTest.AllStates/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackTest.EventAlreadyCompleted/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_QUEUED +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_SUBMIT +urEventGetProfilingInfoWithTimingComparisonTest.Success/* +urEventSetCallbackTest.Success/* +urEventSetCallbackTest.ValidateParameters/* +urEventSetCallbackTest.AllStates/* +urEventSetCallbackTest.EventAlreadyCompleted/* diff --git a/test/conformance/event/event_adapter_native_cpu.match b/test/conformance/event/event_adapter_native_cpu.match index 17066b6d52..1716dea0ca 100644 --- a/test/conformance/event/event_adapter_native_cpu.match +++ b/test/conformance/event/event_adapter_native_cpu.match @@ -1,33 +1,17 @@ -{{NONDETERMINISTIC}} -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_QUEUE -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_CONTEXT -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_TYPE -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_EXECUTION_STATUS -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_REFERENCE_COUNT -urEventGetInfoNegativeTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidSizePropSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidSizePropSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_QUEUED -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_SUBMIT -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_START -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_END -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_COMPLETE -urEventGetProfilingInfoWithTimingComparisonTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetProfilingInfoNegativeTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetProfilingInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetProfilingInfoNegativeTest.InvalidValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventWaitTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetNativeHandleTest.InvalidNullPointerNativeEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackTest.ValidateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackTest.AllStates/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackTest.EventAlreadyCompleted/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackNegativeTest.InvalidNullPointerCallback/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_QUEUED +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_SUBMIT +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_COMPLETE +urEventGetProfilingInfoWithTimingComparisonTest.Success/* +urEventGetProfilingInfoNegativeTest.InvalidNullHandle/* +urEventGetProfilingInfoNegativeTest.InvalidValue/* +urEventWaitTest.Success/* +urEventSetCallbackTest.Success/* +urEventSetCallbackTest.ValidateParameters/* +urEventSetCallbackTest.AllStates/* +urEventSetCallbackTest.EventAlreadyCompleted/* + +# These crash when ran through the loader +{{OPT}}urEventRetainTest.InvalidNullHandle/* +{{OPT}}urEventReleaseTest.InvalidNullHandle/* +{{OPT}}urEventGetNativeHandleTest.InvalidNullHandleEvent/* +{{OPT}}urEventSetCallbackNegativeTest.InvalidNullHandleEvent/* diff --git a/test/conformance/event/event_adapter_opencl.match b/test/conformance/event/event_adapter_opencl.match new file mode 100644 index 0000000000..2c65414d2d --- /dev/null +++ b/test/conformance/event/event_adapter_opencl.match @@ -0,0 +1,3 @@ +# Note: This file is only for use with cts_exe.py, not the legacy match checker +# These crash when ran through the loader +{{OPT}}urEventSetCallbackTest.ValidateParameters/* diff --git a/test/conformance/event/urEventCreateWithNativeHandle.cpp b/test/conformance/event/urEventCreateWithNativeHandle.cpp index 36ff0b44dc..bfb7113053 100644 --- a/test/conformance/event/urEventCreateWithNativeHandle.cpp +++ b/test/conformance/event/urEventCreateWithNativeHandle.cpp @@ -11,10 +11,9 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEventCreateWithNativeHandleTest); TEST_P(urEventCreateWithNativeHandleTest, Success) { ur_native_handle_t native_event = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urEventGetNativeHandle(event, &native_event)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urEventGetNativeHandle(event, &native_event)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. diff --git a/test/conformance/exp_command_buffer/CMakeLists.txt b/test/conformance/exp_command_buffer/CMakeLists.txt index 9845ba86b1..8b7aaa5a63 100644 --- a/test/conformance/exp_command_buffer/CMakeLists.txt +++ b/test/conformance/exp_command_buffer/CMakeLists.txt @@ -19,4 +19,5 @@ add_conformance_test_with_kernels_environment(exp_command_buffer update/usm_saxpy_kernel_update.cpp update/event_sync.cpp update/kernel_event_sync.cpp + update/local_memory_update.cpp ) diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match new file mode 100644 index 0000000000..40182b9125 --- /dev/null +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match @@ -0,0 +1,11 @@ +# Note: This file is only for use with cts_exe.py +# These cause SIGILL when built with -fsanitize=cfi on Nvidia +{{OPT}}urCommandBufferKernelHandleUpdateTest.Success/* +{{OPT}}urCommandBufferKernelHandleUpdateTest.UpdateAgain/* +{{OPT}}urCommandBufferKernelHandleUpdateTest.RestoreOriginalKernel/* +{{OPT}}urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/* +{{OPT}}urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/* +{{OPT}}urCommandBufferValidUpdateParametersTest.UpdateDimensionsWithoutUpdatingKernel/* +{{OPT}}urCommandBufferValidUpdateParametersTest.UpdateOnlyLocalWorkSize/* +{{OPT}}urCommandBufferValidUpdateParametersTest.SuccessNullptrHandle/* +{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/* diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match new file mode 100644 index 0000000000..da8d6dee07 --- /dev/null +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -0,0 +1,10 @@ +# Note: This file is only for use with cts_exe.py +# These cause SIGILL when built with -fsanitize=cfi on AMD +{{OPT}}urCommandBufferKernelHandleUpdateTest.Success/* +{{OPT}}urCommandBufferKernelHandleUpdateTest.UpdateAgain/* +{{OPT}}urCommandBufferKernelHandleUpdateTest.RestoreOriginalKernel/* +{{OPT}}urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/* +{{OPT}}urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/* +{{OPT}}urCommandBufferValidUpdateParametersTest.UpdateDimensionsWithoutUpdatingKernel/* +{{OPT}}urCommandBufferValidUpdateParametersTest.UpdateOnlyLocalWorkSize/* +{{OPT}}urCommandBufferValidUpdateParametersTest.SuccessNullptrHandle/* diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match index 7e7ecf8d4e..5aa63f1cbc 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match @@ -1,59 +1,46 @@ -{{NONDETERMINISTIC}} -urCommandBufferReleaseExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferReleaseExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferRetainExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferRetainExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendUSMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendUSMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendUSMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendUSMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferAppendKernelLaunchExpTest.Basic/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -KernelCommandEventSyncTest.Basic/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -KernelCommandEventSyncTest.InterCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.USMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.USMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.USMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.USMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MultipleEventCommands/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MultipleEventCommandsBetweenCommandBuffers/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.USMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.USMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.USMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.USMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MultipleEventCommands/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urCommandBufferReleaseExpTest.Success/* +urCommandBufferReleaseExpTest.InvalidNullHandle/* +urCommandBufferRetainExpTest.Success/* +urCommandBufferRetainExpTest.InvalidNullHandle/* +urCommandBufferCommandsTest.urCommandBufferAppendUSMMemcpyExp/* +urCommandBufferCommandsTest.urCommandBufferAppendUSMFillExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyRectExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadRectExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteRectExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferFillExp/* +urCommandBufferCommandsTest.urCommandBufferAppendUSMPrefetchExp/* +urCommandBufferCommandsTest.urCommandBufferAppendUSMAdviseExp/* +urCommandBufferAppendKernelLaunchExpTest.Basic/* +urCommandBufferFillCommandsTest.Buffer/* +urCommandBufferFillCommandsTest.USM/* +KernelCommandEventSyncTest.Basic/* +KernelCommandEventSyncTest.InterCommandBuffer/* +KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/* +CommandEventSyncTest.USMMemcpyExp/* +CommandEventSyncTest.USMFillExp/* +CommandEventSyncTest.MemBufferCopyExp/* +CommandEventSyncTest.MemBufferCopyRectExp/* +CommandEventSyncTest.MemBufferReadExp/* +CommandEventSyncTest.MemBufferReadRectExp/* +CommandEventSyncTest.MemBufferWriteExp/* +CommandEventSyncTest.MemBufferWriteRectExp/* +CommandEventSyncTest.MemBufferFillExp/* +CommandEventSyncTest.USMPrefetchExp/* +CommandEventSyncTest.USMAdviseExp/* +CommandEventSyncTest.MultipleEventCommands/* +CommandEventSyncTest.MultipleEventCommandsBetweenCommandBuffers/* +CommandEventSyncUpdateTest.USMMemcpyExp/* +CommandEventSyncUpdateTest.USMFillExp/* +CommandEventSyncUpdateTest.MemBufferCopyExp/* +CommandEventSyncUpdateTest.MemBufferCopyRectExp/* +CommandEventSyncUpdateTest.MemBufferReadExp/* +CommandEventSyncUpdateTest.MemBufferReadRectExp/* +CommandEventSyncUpdateTest.MemBufferWriteExp/* +CommandEventSyncUpdateTest.MemBufferWriteRectExp/* +CommandEventSyncUpdateTest.MemBufferFillExp/* +CommandEventSyncUpdateTest.USMPrefetchExp/* +CommandEventSyncUpdateTest.USMAdviseExp/* +CommandEventSyncUpdateTest.MultipleEventCommands/* diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index 2ccc267535..c6fe7ad962 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -1,38 +1,43 @@ -{{NONDETERMINISTIC}} -{{OPT}}urCommandBufferReleaseCommandExpTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferRetainCommandExpTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferAppendKernelLaunchExpTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.UpdateGlobalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.OverrideUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.OverrideArgList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.Update3D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.Update2D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.Update1D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncTest.InterCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/SYCL_NATIVE_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncUpdateTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncUpdateTest.InvalidWaitUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncUpdateTest.InvalidSignalUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.Success/* +{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/* +{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/* +{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/* +{{OPT}}urCommandBufferRetainCommandExpTest.Success/* +{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/* +{{OPT}}urCommandBufferAppendKernelLaunchExpTest.Basic/* +{{OPT}}BufferFillCommandTest.UpdateParameters/* +{{OPT}}BufferFillCommandTest.UpdateGlobalSize/* +{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/* +{{OPT}}BufferFillCommandTest.OverrideUpdate/* +{{OPT}}BufferFillCommandTest.OverrideArgList/* +{{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/* +{{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/* +{{OPT}}InvalidUpdateTest.InvalidDimensions/* +{{OPT}}USMFillCommandTest.UpdateParameters/* +{{OPT}}USMFillCommandTest.UpdateNull/* +{{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/* +{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/* +{{OPT}}BufferSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateNullptrKernel/* +{{OPT}}NDRangeUpdateTest.Update3D/* +{{OPT}}NDRangeUpdateTest.Update2D/* +{{OPT}}NDRangeUpdateTest.Update1D/* +{{OPT}}NDRangeUpdateTest.ImplToUserDefinedLocalSize/* +{{OPT}}NDRangeUpdateTest.UserToImplDefinedLocalSize/* +{{OPT}}USMSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/* +{{OPT}}KernelCommandEventSyncTest.Basic/* +{{OPT}}KernelCommandEventSyncTest.InterCommandBuffer/* +{{OPT}}KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/* +{{OPT}}KernelCommandEventSyncUpdateTest.Basic/* +{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/* +{{OPT}}KernelCommandEventSyncUpdateTest.InvalidWaitUpdate/* +{{OPT}}KernelCommandEventSyncUpdateTest.InvalidSignalUpdate/* +{{OPT}}LocalMemoryUpdateTest.UpdateParameters/* +{{OPT}}LocalMemoryUpdateTest.UpdateParametersAndLocalSize/* +{{OPT}}LocalMemoryMultiUpdateTest.UpdateParameters/* +{{OPT}}LocalMemoryMultiUpdateTest.UpdateWithoutBlocking/* diff --git a/test/conformance/exp_command_buffer/update/local_memory_update.cpp b/test/conformance/exp_command_buffer/update/local_memory_update.cpp new file mode 100644 index 0000000000..c295556fdb --- /dev/null +++ b/test/conformance/exp_command_buffer/update/local_memory_update.cpp @@ -0,0 +1,531 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../fixtures.h" +#include +#include + +// Test that updating a command-buffer with a single kernel command +// taking a local memory argument works correctly. + +struct LocalMemoryUpdateTestBase + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + virtual void SetUp() override { + program_name = "saxpy_usm_local_mem"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + // HIP has extra args for local memory so we define an offset for arg indices here for updating + hip_arg_offset = backend == UR_PLATFORM_BACKEND_HIP ? 3 : 0; + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + const size_t allocation_size = + sizeof(uint32_t) * global_size * local_size; + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(shared_ptr, pattern.data(), allocation_size); + } + size_t current_index = 0; + // Index 0 is local_mem arg + ASSERT_SUCCESS(urKernelSetArgLocal(kernel, current_index++, + local_mem_size, nullptr)); + + //Hip has extr args for local mem at index 1-3 + if (backend == UR_PLATFORM_BACKEND_HIP) { + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_index++, + sizeof(local_size), nullptr, + &local_size)); + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_index++, + sizeof(local_size), nullptr, + &local_size)); + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_index++, + sizeof(local_size), nullptr, + &local_size)); + } + + // Index 1 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, current_index++, nullptr, + shared_ptrs[0])); + // Index 2 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_index++, sizeof(A), + nullptr, &A)); + // Index 3 is X + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, current_index++, nullptr, + shared_ptrs[1])); + // Index 4 is Y + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, current_index++, nullptr, + shared_ptrs[2])); + } + + void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, + size_t length, size_t local_size) { + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X[i] + Y[i] + i + local_size; + ASSERT_EQ(result, output[i]); + } + } + + virtual void TearDown() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t local_mem_size = local_size * sizeof(uint32_t); + static constexpr size_t global_size = 16; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr, + nullptr}; + + uint32_t hip_arg_offset = 0; +}; + +struct LocalMemoryUpdateTest : LocalMemoryUpdateTestBase { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(LocalMemoryUpdateTestBase::SetUp()); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, 0, nullptr, + nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE(LocalMemoryUpdateTestBase::TearDown()); + } + + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(LocalMemoryUpdateTest); + +TEST_P(LocalMemoryUpdateTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size, local_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + ur_exp_command_buffer_update_value_arg_desc_t new_value_descs[2]; + + // New local_mem at index 0 + new_value_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + local_mem_size, // argSize + nullptr, // pProperties + nullptr, // hArgValue + }; + + // New A at index 2 + uint32_t new_A = 33; + new_value_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2 + hip_arg_offset, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // New X at index 3 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3 + hip_arg_offset, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 4 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 4 + hip_arg_offset, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + 2, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + new_value_descs, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size, local_size); +} + +TEST_P(LocalMemoryUpdateTest, UpdateParametersAndLocalSize) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size, local_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + std::vector + new_value_descs{}; + + size_t new_local_size = local_size * 2; + size_t new_local_mem_size = new_local_size * sizeof(uint32_t); + // New local_mem at index 0 + new_value_descs.push_back({ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + new_local_mem_size, // argSize + nullptr, // pProperties + nullptr, // hArgValue + }); + + if (backend == UR_PLATFORM_BACKEND_HIP) { + new_value_descs.push_back({ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_local_size), // argSize + nullptr, // pProperties + &new_local_size, // hArgValue + }); + new_value_descs.push_back({ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_local_size), // argSize + nullptr, // pProperties + &new_local_size, // hArgValue + }); + new_value_descs.push_back({ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + sizeof(new_local_size), // argSize + nullptr, // pProperties + &new_local_size, // hArgValue + }); + } + + // New A at index 2 + uint32_t new_A = 33; + new_value_descs.push_back({ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2 + hip_arg_offset, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }); + + // New X at index 3 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3 + hip_arg_offset, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 4 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 4 + hip_arg_offset, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + static_cast(new_value_descs.size()), // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + new_value_descs.data(), // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + &new_local_size, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size, new_local_size); +} + +struct LocalMemoryMultiUpdateTest : LocalMemoryUpdateTestBase { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(LocalMemoryUpdateTestBase::SetUp()); + + // Append kernel command to command-buffer and close command-buffer + for (unsigned node = 0; node < nodes; node++) { + // We need to set the local memory arg each time because it is + // cleared in the kernel handle after being used. + ASSERT_SUCCESS( + urKernelSetArgLocal(kernel, 0, local_mem_size, nullptr)); + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, 0, nullptr, 0, nullptr, + nullptr, nullptr, &command_handles[node])); + ASSERT_NE(command_handles[node], nullptr); + } + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + for (auto &handle : command_handles) { + if (handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(handle)); + } + } + UUR_RETURN_ON_FATAL_FAILURE(LocalMemoryUpdateTestBase::TearDown()); + } + + static constexpr size_t nodes = 1024; + static constexpr uint32_t A = 42; + std::array command_handles{}; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(LocalMemoryMultiUpdateTest); + +TEST_P(LocalMemoryMultiUpdateTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size, local_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + ur_exp_command_buffer_update_value_arg_desc_t new_value_descs[2]; + + // New local_mem at index 0 + new_value_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + local_mem_size, // argSize + nullptr, // pProperties + nullptr, // hArgValue + }; + + // New A at index 2 + uint32_t new_A = 33; + new_value_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2 + hip_arg_offset, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // New X at index 3 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3 + hip_arg_offset, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 4 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 4 + hip_arg_offset, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + 2, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + new_value_descs, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + for (auto &handle : command_handles) { + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); + } + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size, local_size); +} + +TEST_P(LocalMemoryMultiUpdateTest, UpdateWithoutBlocking) { + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + ur_exp_command_buffer_update_value_arg_desc_t new_value_descs[2]; + + // New local_mem at index 0 + new_value_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + local_mem_size, // argSize + nullptr, // pProperties + nullptr, // hArgValue + }; + + // New A at index 2 + uint32_t new_A = 33; + new_value_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2 + hip_arg_offset, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // New X at index 3 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3 + hip_arg_offset, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 4 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 4 + hip_arg_offset, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + 2, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + new_value_descs, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + // Enqueue without calling urQueueFinish after + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Update kernel and enqueue command-buffer again + for (auto &handle : command_handles) { + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); + } + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size, local_size); +} diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp index 85e6beccf9..bbadc2b57b 100644 --- a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -199,6 +199,46 @@ TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { Validate((uint32_t *)new_shared_ptr, global_size, new_val); } +// Test using a different global size to fill and larger USM output buffer +TEST_P(USMFillCommandTest, UpdateNull) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + // Set nullptr as kernel output at index 0 + void *null_ptr = nullptr; + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &null_ptr, // pArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Verify update kernel succeeded but don't run to avoid dereferencing + // the nullptr. + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); +} + // Test updating a command-buffer with multiple USM fill kernel commands struct USMMultipleFillCommandTest : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { diff --git a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp index 1dc34c00fd..ddf8730eb7 100644 --- a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp @@ -284,6 +284,59 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { Validate(new_output, new_X, new_Y, new_A, global_size); } +// Checks that passing nullptr to hNewKernel even when kernel binary updates +// is not supported by the adapter. +TEST_P(USMMultiSaxpyKernelTest, UpdateNullptrKernel) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size); + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + nullptr, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + for (auto &handle : command_handles) { + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); + } + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + Validate(new_output, X, Y, new_A, global_size); +} + TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { // Prepare new inputs ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; diff --git a/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero_v2.match b/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero_v2.match index d4645b3ffc..f8cf4c4c88 100644 --- a/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero_v2.match +++ b/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero_v2.match @@ -1,5 +1,4 @@ -{{NONDETERMINISTIC}} -urLevelZeroEnqueueNativeCommandTest.Success{{.*}} -urLevelZeroEnqueueNativeCommandTest.Dependencies{{.*}} -urLevelZeroEnqueueNativeCommandTest.DependenciesURBefore{{.*}} -urLevelZeroEnqueueNativeCommandTest.DependenciesURAfter{{.*}} +urLevelZeroEnqueueNativeCommandTest.Success* +urLevelZeroEnqueueNativeCommandTest.Dependencies* +urLevelZeroEnqueueNativeCommandTest.DependenciesURBefore* +urLevelZeroEnqueueNativeCommandTest.DependenciesURAfter* diff --git a/test/conformance/exp_launch_properties/exp_launch_properties_adapter_native_cpu.match b/test/conformance/exp_launch_properties/exp_launch_properties_adapter_native_cpu.match index f8b1e49e44..4a4837c5ab 100644 --- a/test/conformance/exp_launch_properties/exp_launch_properties_adapter_native_cpu.match +++ b/test/conformance/exp_launch_properties/exp_launch_properties_adapter_native_cpu.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} -urEnqueueKernelLaunchCustomTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urEnqueueKernelLaunchCustomTest.Success/* diff --git a/test/conformance/integration/integration_adapter_level_zero.match b/test/conformance/integration/integration_adapter_level_zero.match index a49ad93a94..460aa8cf4e 100644 --- a/test/conformance/integration/integration_adapter_level_zero.match +++ b/test/conformance/integration/integration_adapter_level_zero.match @@ -1,7 +1,3 @@ -{{NONDETERMINISTIC}} -{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE -{{OPT}}QueueUSMTestWithParam.QueueUSMTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueUSMTestWithParam.QueueUSMTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE -{{OPT}}QueueBufferTestWithParam.QueueBufferTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueBufferTestWithParam.QueueBufferTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE +{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/* +{{OPT}}QueueUSMTestWithParam.QueueUSMTest/* +{{OPT}}QueueBufferTestWithParam.QueueBufferTest/* diff --git a/test/conformance/integration/integration_adapter_level_zero_v2.match b/test/conformance/integration/integration_adapter_level_zero_v2.match index a49ad93a94..460aa8cf4e 100644 --- a/test/conformance/integration/integration_adapter_level_zero_v2.match +++ b/test/conformance/integration/integration_adapter_level_zero_v2.match @@ -1,7 +1,3 @@ -{{NONDETERMINISTIC}} -{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE -{{OPT}}QueueUSMTestWithParam.QueueUSMTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueUSMTestWithParam.QueueUSMTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE -{{OPT}}QueueBufferTestWithParam.QueueBufferTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueBufferTestWithParam.QueueBufferTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE +{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/* +{{OPT}}QueueUSMTestWithParam.QueueUSMTest/* +{{OPT}}QueueBufferTestWithParam.QueueBufferTest/* diff --git a/test/conformance/integration/integration_adapter_native_cpu.match b/test/conformance/integration/integration_adapter_native_cpu.match index 159e57a144..fe5e7567ed 100644 --- a/test/conformance/integration/integration_adapter_native_cpu.match +++ b/test/conformance/integration/integration_adapter_native_cpu.match @@ -1,7 +1,3 @@ -{{NONDETERMINISTIC}} -QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__IN_ORDER_QUEUE -QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__OUT_OF_ORDER_QUEUE -QueueUSMTestWithParam.QueueUSMTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__IN_ORDER_QUEUE -QueueUSMTestWithParam.QueueUSMTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__OUT_OF_ORDER_QUEUE -QueueBufferTestWithParam.QueueBufferTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__IN_ORDER_QUEUE -QueueBufferTestWithParam.QueueBufferTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__OUT_OF_ORDER_QUEUE +QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/* +QueueUSMTestWithParam.QueueUSMTest/* +QueueBufferTestWithParam.QueueBufferTest/* diff --git a/test/conformance/kernel/kernel_adapter_cuda.match b/test/conformance/kernel/kernel_adapter_cuda.match index b05b2fda58..cafcdf54c5 100644 --- a/test/conformance/kernel/kernel_adapter_cuda.match +++ b/test/conformance/kernel/kernel_adapter_cuda.match @@ -1,7 +1,6 @@ -{{NONDETERMINISTIC}} -urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgLocalTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgValueTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/* +{{OPT}}urKernelSetArgLocalTest.InvalidKernelArgumentIndex/* +{{OPT}}urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/* +{{OPT}}urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/* +{{OPT}}urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/* +{{OPT}}urKernelSetArgValueTest.InvalidKernelArgumentIndex/* diff --git a/test/conformance/kernel/kernel_adapter_hip.match b/test/conformance/kernel/kernel_adapter_hip.match index 4e6ab18293..f8ea9e3e99 100644 --- a/test/conformance/kernel/kernel_adapter_hip.match +++ b/test/conformance/kernel/kernel_adapter_hip.match @@ -1,8 +1,7 @@ -{{NONDETERMINISTIC}} -urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/AMD_HIP_BACKEND___{{.*}}_ -urKernelGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_REGS -urKernelSetArgLocalTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ -urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ -urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ -urKernelSetArgValueTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ -urKernelSetArgValueTest.InvalidKernelArgumentSize/AMD_HIP_BACKEND___{{.*}}_ +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/* +urKernelGetInfoTest.Success/*__UR_KERNEL_INFO_NUM_REGS +urKernelSetArgLocalTest.InvalidKernelArgumentIndex/* +urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/* +urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/* +urKernelSetArgValueTest.InvalidKernelArgumentIndex/* +urKernelSetArgValueTest.InvalidKernelArgumentSize/* diff --git a/test/conformance/kernel/kernel_adapter_level_zero.match b/test/conformance/kernel/kernel_adapter_level_zero.match index cf83e73ff3..4a4868eff2 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero.match +++ b/test/conformance/kernel/kernel_adapter_level_zero.match @@ -1,5 +1,4 @@ -{{NONDETERMINISTIC}} -urKernelSetExecInfoTest.SuccessIndirectAccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoTest.SuccessIndirectAccess/* +urKernelSetExecInfoUSMPointersTest.SuccessHost/* +urKernelSetExecInfoUSMPointersTest.SuccessDevice/* +urKernelSetExecInfoUSMPointersTest.SuccessShared/* diff --git a/test/conformance/kernel/kernel_adapter_level_zero_v2.match b/test/conformance/kernel/kernel_adapter_level_zero_v2.match index cf83e73ff3..4a4868eff2 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero_v2.match +++ b/test/conformance/kernel/kernel_adapter_level_zero_v2.match @@ -1,5 +1,4 @@ -{{NONDETERMINISTIC}} -urKernelSetExecInfoTest.SuccessIndirectAccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoTest.SuccessIndirectAccess/* +urKernelSetExecInfoUSMPointersTest.SuccessHost/* +urKernelSetExecInfoUSMPointersTest.SuccessDevice/* +urKernelSetExecInfoUSMPointersTest.SuccessShared/* diff --git a/test/conformance/kernel/kernel_adapter_native_cpu.match b/test/conformance/kernel/kernel_adapter_native_cpu.match index 368f4ad358..7ca10ec3d2 100644 --- a/test/conformance/kernel/kernel_adapter_native_cpu.match +++ b/test/conformance/kernel/kernel_adapter_native_cpu.match @@ -1,187 +1,83 @@ -{{NONDETERMINISTIC}} -urKernelCreateTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateTest.InvalidNullPointerName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateTest.InvalidNullPointerKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateTest.InvalidKernelName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateWithNativeHandleTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateWithNativeHandleTest.InvalidNullPointerNativeKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE -urKernelGetGroupInfoSingleTest.CompileWorkGroupSizeEmpty/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetGroupInfoSingleTest.CompileMaxWorkGroupSizeEmpty/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoSingleTest.KernelNameCorrect/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetInfoSingleTest.KernelContextCorrect/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetNativeHandleTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetNativeHandleTest.InvalidNullPointerNativeKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSubGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoSingleTest.CompileNumSubgroupsIsZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelReleaseTest.KernelReleaseAfterProgramRelease/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelReleaseTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelRetainTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgLocalTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgLocalTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgLocalTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgMemObjTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgMemObjTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerTest.SuccessHost/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerTest.SuccessDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerTest.SuccessShared/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerNegativeTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTest.SuccessWithProps/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgSamplerTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgSamplerTest.InvalidNullHandleArgValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.InvalidNullPointerArgValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.InvalidKernelArgumentSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoTest.SuccessIndirectAccess/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoUSMPointersTest.SuccessHost/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoUSMPointersTest.SuccessDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoUSMPointersTest.SuccessShared/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoCacheConfigTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_CACHE_CONFIG_DEFAULT -urKernelSetExecInfoCacheConfigTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_CACHE_CONFIG_LARGE_SLM -urKernelSetExecInfoCacheConfigTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_CACHE_CONFIG_LARGE_DATA -urKernelSetSpecializationConstantsTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidSizeCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidValueSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidValueId/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidValuePtr/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsNegativeTest.Unsupported/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.Success2D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.Success3D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidWorkDimension/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidGlobalOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidGlobalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidSuggestedLocalWorkSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urKernelCreateTest.Success/* +urKernelCreateTest.InvalidNullHandleProgram/* +urKernelCreateTest.InvalidNullPointerName/* +urKernelCreateTest.InvalidNullPointerKernel/* +urKernelCreateTest.InvalidKernelName/* +urKernelCreateWithNativeHandleTest.Success/* +urKernelCreateWithNativeHandleTest.InvalidNullHandleContext/* +urKernelCreateWithNativeHandleTest.InvalidNullPointerNativeKernel/* +urKernelGetGroupInfoTest.Success/* +urKernelGetGroupInfoTest.InvalidNullHandleKernel/* +urKernelGetGroupInfoTest.InvalidNullHandleDevice/* +urKernelGetGroupInfoTest.InvalidEnumeration/* +urKernelGetGroupInfoSingleTest.CompileWorkGroupSizeEmpty/* +urKernelGetGroupInfoSingleTest.CompileMaxWorkGroupSizeEmpty/* +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/* +urKernelGetInfoTest.Success/* +urKernelGetInfoTest.InvalidNullHandleKernel/* +urKernelGetInfoTest.InvalidEnumeration/* +urKernelGetInfoTest.InvalidSizeZero/* +urKernelGetInfoTest.InvalidSizeSmall/* +urKernelGetInfoTest.InvalidNullPointerPropValue/* +urKernelGetInfoTest.InvalidNullPointerPropSizeRet/* +urKernelGetInfoSingleTest.KernelNameCorrect/* +urKernelGetInfoSingleTest.KernelContextCorrect/* +urKernelGetNativeHandleTest.Success/* +urKernelGetNativeHandleTest.InvalidNullHandleKernel/* +urKernelGetNativeHandleTest.InvalidNullPointerNativeKernel/* +urKernelGetSubGroupInfoTest.Success/* +urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/* +urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/* +urKernelGetSubGroupInfoTest.InvalidEnumeration/* +urKernelGetSubGroupInfoSingleTest.CompileNumSubgroupsIsZero/* +urKernelReleaseTest.Success/* +urKernelReleaseTest.KernelReleaseAfterProgramRelease/* +urKernelReleaseTest.InvalidNullHandleKernel/* +urKernelRetainTest.Success/* +urKernelRetainTest.InvalidNullHandleKernel/* +urKernelSetArgLocalTest.Success/* +urKernelSetArgLocalTest.InvalidNullHandleKernel/* +urKernelSetArgLocalTest.InvalidKernelArgumentIndex/* +urKernelSetArgMemObjTest.Success/* +urKernelSetArgMemObjTest.InvalidNullHandleKernel/* +urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/* +urKernelSetArgPointerTest.SuccessHost/* +urKernelSetArgPointerTest.SuccessDevice/* +urKernelSetArgPointerTest.SuccessShared/* +urKernelSetArgPointerNegativeTest.InvalidNullHandleKernel/* +urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/* +urKernelSetArgSamplerTestWithParam.Success/* +urKernelSetArgSamplerTest.SuccessWithProps/* +urKernelSetArgSamplerTest.InvalidNullHandleKernel/* +urKernelSetArgSamplerTest.InvalidNullHandleArgValue/* +urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/* +urKernelSetArgValueTest.Success/* +urKernelSetArgValueTest.InvalidNullHandleKernel/* +urKernelSetArgValueTest.InvalidNullPointerArgValue/* +urKernelSetArgValueTest.InvalidKernelArgumentIndex/* +urKernelSetArgValueTest.InvalidKernelArgumentSize/* +urKernelSetExecInfoTest.SuccessIndirectAccess/* +urKernelSetExecInfoTest.InvalidNullHandleKernel/* +urKernelSetExecInfoTest.InvalidEnumeration/* +urKernelSetExecInfoTest.InvalidNullPointerPropValue/* +urKernelSetExecInfoUSMPointersTest.SuccessHost/* +urKernelSetExecInfoUSMPointersTest.SuccessDevice/* +urKernelSetExecInfoUSMPointersTest.SuccessShared/* +urKernelSetExecInfoCacheConfigTest.Success/* +urKernelSetSpecializationConstantsTest.Success/* +urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/* +urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/* +urKernelSetSpecializationConstantsTest.InvalidSizeCount/* +urKernelSetSpecializationConstantsTest.InvalidValueSize/* +urKernelSetSpecializationConstantsTest.InvalidValueId/* +urKernelSetSpecializationConstantsTest.InvalidValuePtr/* +urKernelSetSpecializationConstantsNegativeTest.Unsupported/* +urKernelGetSuggestedLocalWorkSizeTest.Success/* +urKernelGetSuggestedLocalWorkSizeTest.Success2D/* +urKernelGetSuggestedLocalWorkSizeTest.Success3D/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidNullHandleKernel/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidNullHandleQueue/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidWorkDimension/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidGlobalOffset/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidGlobalSize/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidSuggestedLocalWorkSize/* diff --git a/test/conformance/kernel/kernel_adapter_opencl.match b/test/conformance/kernel/kernel_adapter_opencl.match index d65c8e51c8..687d7be2e7 100644 --- a/test/conformance/kernel/kernel_adapter_opencl.match +++ b/test/conformance/kernel/kernel_adapter_opencl.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} -urKernelGetInfoTest.Success/Intel_R__OpenCL_{{.*}}_UR_KERNEL_INFO_NUM_REGS +urKernelGetInfoTest.Success/*_UR_KERNEL_INFO_NUM_REGS diff --git a/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp b/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp index 8640463334..1c7acf5fab 100644 --- a/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp +++ b/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp @@ -8,10 +8,9 @@ struct urKernelCreateWithNativeHandleTest : uur::urKernelTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urKernelGetNativeHandle(kernel, &native_kernel_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urKernelGetNativeHandle(kernel, &native_kernel_handle)); } void TearDown() override { diff --git a/test/conformance/memory/memory_adapter_cuda.match b/test/conformance/memory/memory_adapter_cuda.match index c5b70e8559..255559f575 100644 --- a/test/conformance/memory/memory_adapter_cuda.match +++ b/test/conformance/memory/memory_adapter_cuda.match @@ -1,4 +1,6 @@ -{{NONDETERMINISTIC}} -urMemImageCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urMemImageCremBufferCrateTestWith1DMemoryTypeParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_TYPE_IMAGE1D_ARRAY -{{OPT}}urMemImageCreateTestWith2DMemoryTypeParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_TYPE_IMAGE2D_ARRAY +urMemImageCreateTest.InvalidSize/* +{{OPT}}urMemImageCremBufferCrateTestWith1DMemoryTypeParam.Success/*__UR_MEM_TYPE_IMAGE1D_ARRAY +{{OPT}}urMemImageCreateTestWith2DMemoryTypeParam.Success/*__UR_MEM_TYPE_IMAGE2D_ARRAY +urMemBufferCreateWithNativeHandleTest.Success/* +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/* +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/* diff --git a/test/conformance/memory/memory_adapter_hip.match b/test/conformance/memory/memory_adapter_hip.match index 589542df7f..a4181fcc8a 100644 --- a/test/conformance/memory/memory_adapter_hip.match +++ b/test/conformance/memory/memory_adapter_hip.match @@ -1,4 +1,6 @@ -{{NONDETERMINISTIC}} -urMemImageCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}} -urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} -urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} +urMemImageCreateTest.InvalidSize/* +urMemImageGetInfoTest.Success/* +urMemBufferCreateWithNativeHandleTest.Success/* +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/* +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/* +urMemImageCreateWithNativeHandleTest.Success/* diff --git a/test/conformance/memory/memory_adapter_level_zero.match b/test/conformance/memory/memory_adapter_level_zero.match index 8dbd2ac9db..d137fc5ac1 100644 --- a/test/conformance/memory/memory_adapter_level_zero.match +++ b/test/conformance/memory/memory_adapter_level_zero.match @@ -1,35 +1,15 @@ -# Note: This file is only for use with cts_exe.py {{OPT}}urMemBufferMultiQueueMemBufferTest.WriteBack -urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT +urMemBufferPartitionWithFlagsTest.Success/*__UR_MEM_FLAG_WRITE_ONLY +urMemBufferPartitionWithFlagsTest.Success/*__UR_MEM_FLAG_READ_ONLY +urMemBufferPartitionTest.InvalidValueCreateType/* +urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/* +{{OPT}}urMemImageCreateWithNativeHandleTest.Success/* +{{OPT}}urMemGetInfoImageTest.Success/*__UR_MEM_INFO_SIZE +{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/*__UR_IMAGE_CHANNEL_ORDER_RGBA__* + # These tests fail in the "Multi device testing" job, but pass in the hardware specific test {{OPT}}urMemImageCreateTest.InvalidImageDescStype/* {{OPT}}urMemImageCreateTest.InvalidSize/* {{OPT}}urMemImageCreateWithHostPtrFlagsTest.Success/* -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_WIDTH +{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/* +{{OPT}}urMemImageGetInfoTest.Success/* diff --git a/test/conformance/memory/memory_adapter_level_zero_v2.match b/test/conformance/memory/memory_adapter_level_zero_v2.match index 89f708aae1..d2f34a947d 100644 --- a/test/conformance/memory/memory_adapter_level_zero_v2.match +++ b/test/conformance/memory/memory_adapter_level_zero_v2.match @@ -1,277 +1,20 @@ -{{NONDETERMINISTIC}} -{{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -{{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH +{{OPT}}urMemBufferPartitionWithFlagsTest.Success/* +{{OPT}}urMemBufferPartitionTest.InvalidValueCreateType/* +{{OPT}}urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/* +{{OPT}}urMemGetInfoImageTest.Success/* +{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/* +{{OPT}}urMemImageGetInfoTest.Success/* +{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/* +{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/* +{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/* +{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/* +{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/* +{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.Success/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/* +{{OPT}}urMemImageCreateWithNativeHandleTest.Success/* +{{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullHandle/* +{{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullPointer/* diff --git a/test/conformance/memory/memory_adapter_native_cpu.match b/test/conformance/memory/memory_adapter_native_cpu.match index 5bdd88804b..aafd22075c 100644 --- a/test/conformance/memory/memory_adapter_native_cpu.match +++ b/test/conformance/memory/memory_adapter_native_cpu.match @@ -1,234 +1,16 @@ -{{NONDETERMINISTIC}} -urMemBufferPartitionTest.InvalidValueCreateType/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_SIZE -urMemGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_CONTEXT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemBufferPartitionWithFlagsTest.Success/*__UR_MEM_FLAG_WRITE_ONLY +urMemBufferPartitionWithFlagsTest.Success/*__UR_MEM_FLAG_READ_ONLY +urMemBufferPartitionTest.InvalidValueCreateType/* +urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/* +urMemGetInfoTestWithParam.Success/* +urMemGetInfoTest.InvalidSizeSmall/* +urMemImageCreateTestWithImageFormatParam.Success/* +urMemReleaseTest.Success/* +urMemReleaseTest.CheckReferenceCount/* +urMemRetainTest.Success/* +urMemRetainTest.CheckReferenceCount/* +urMemBufferCreateWithNativeHandleTest.Success/* +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/* +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/* +urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/* +urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/* diff --git a/test/conformance/memory/memory_adapter_opencl.match b/test/conformance/memory/memory_adapter_opencl.match deleted file mode 100644 index b57e3876d0..0000000000 --- a/test/conformance/memory/memory_adapter_opencl.match +++ /dev/null @@ -1,2 +0,0 @@ -{{NONDETERMINISTIC}} -urMemImageCreateTest.InvalidImageDescStype/Intel_R__OpenCL___{{.*}} diff --git a/test/conformance/memory/urMemBufferCreate.cpp b/test/conformance/memory/urMemBufferCreate.cpp index 2e9b46114d..df90c4b63a 100644 --- a/test/conformance/memory/urMemBufferCreate.cpp +++ b/test/conformance/memory/urMemBufferCreate.cpp @@ -31,6 +31,19 @@ TEST_P(urMemBufferCreateWithFlagsTest, InvalidNullHandleContext) { urMemBufferCreate(nullptr, getParam(), 4096, nullptr, buffer.ptr())); } +TEST_P(urMemBufferCreateWithFlagsTest, InvalidNullPointerBuffer) { + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_POINTER, + urMemBufferCreate(context, getParam(), 4096, nullptr, nullptr)); +} + +TEST_P(urMemBufferCreateWithFlagsTest, InvalidBufferSizeZero) { + uur::raii::Mem buffer = nullptr; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_BUFFER_SIZE, + urMemBufferCreate(context, getParam(), 0, nullptr, buffer.ptr())); +} + using urMemBufferCreateTest = uur::urContextTest; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemBufferCreateTest); @@ -41,6 +54,37 @@ TEST_P(urMemBufferCreateTest, InvalidEnumerationFlags) { nullptr, buffer.ptr())); } +TEST_P(urMemBufferCreateTest, InvalidHostPtrNullProperties) { + uur::raii::Mem buffer = nullptr; + ur_mem_flags_t flags = + UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemBufferCreate(context, flags, 4096, nullptr, buffer.ptr())); +} + +TEST_P(urMemBufferCreateTest, InvalidHostPtrNullHost) { + uur::raii::Mem buffer = nullptr; + ur_mem_flags_t flags = + UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + ur_buffer_properties_t properties; + properties.pHost = nullptr; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemBufferCreate(context, flags, 4096, &properties, buffer.ptr())); +} + +TEST_P(urMemBufferCreateTest, InvalidHostPtrValidHost) { + uur::raii::Mem buffer = nullptr; + ur_mem_flags_t flags = 0; + ur_buffer_properties_t properties; + int data = 42; + properties.pHost = &data; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemBufferCreate(context, flags, 4096, &properties, buffer.ptr())); +} + using urMemBufferCreateWithHostPtrFlagsTest = urMemBufferCreateTestWithFlagsParam; UUR_TEST_SUITE_P(urMemBufferCreateWithHostPtrFlagsTest, @@ -59,23 +103,3 @@ TEST_P(urMemBufferCreateWithHostPtrFlagsTest, SUCCESS) { ASSERT_SUCCESS(urMemBufferCreate(context, getParam(), 4096, &properties, buffer.ptr())); } - -TEST_P(urMemBufferCreateWithHostPtrFlagsTest, InvalidHostPtr) { - uur::raii::Mem buffer = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_HOST_PTR, - urMemBufferCreate(context, getParam(), 4096, nullptr, buffer.ptr())); -} - -TEST_P(urMemBufferCreateWithFlagsTest, InvalidNullPointerBuffer) { - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_NULL_POINTER, - urMemBufferCreate(context, getParam(), 4096, nullptr, nullptr)); -} - -TEST_P(urMemBufferCreateWithFlagsTest, InvalidBufferSizeZero) { - uur::raii::Mem buffer = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_BUFFER_SIZE, - urMemBufferCreate(context, getParam(), 0, nullptr, buffer.ptr())); -} diff --git a/test/conformance/memory/urMemBufferCreateWithNativeHandle.cpp b/test/conformance/memory/urMemBufferCreateWithNativeHandle.cpp index 8b98076cf6..78e2510369 100644 --- a/test/conformance/memory/urMemBufferCreateWithNativeHandle.cpp +++ b/test/conformance/memory/urMemBufferCreateWithNativeHandle.cpp @@ -11,23 +11,15 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemBufferCreateWithNativeHandleTest); TEST_P(urMemBufferCreateWithNativeHandleTest, Success) { ur_native_handle_t hNativeMem = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urMemGetNativeHandle(buffer, device, &hNativeMem)); - } + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &hNativeMem)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. // We can however convert the native_handle back into a unified-runtime handle // and perform some query on it to verify that it works. ur_mem_handle_t mem = nullptr; - ur_mem_native_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, - /*.pNext =*/nullptr, - /*.isNativeHandleOwned =*/false, - }; - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urMemBufferCreateWithNativeHandle(hNativeMem, context, &props, &mem)); + ASSERT_SUCCESS( + urMemBufferCreateWithNativeHandle(hNativeMem, context, nullptr, &mem)); ASSERT_NE(mem, nullptr); size_t alloc_size = 0; @@ -37,12 +29,81 @@ TEST_P(urMemBufferCreateWithNativeHandleTest, Success) { ASSERT_SUCCESS(urMemRelease(mem)); } -using urMemBufferMultiQueueMemBufferTest = uur::urMultiDeviceMemBufferQueueTest; +TEST_P(urMemBufferCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { + ur_native_handle_t native_handle = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &native_handle)); + + ur_mem_handle_t mem = nullptr; + ur_mem_native_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, + /*.pNext =*/nullptr, + /*.isNativeHandleOwned =*/true, + }; + ASSERT_SUCCESS(urMemBufferCreateWithNativeHandle(native_handle, context, + &props, &mem)); + ASSERT_NE(nullptr, mem); + ur_context_handle_t mem_context = nullptr; + ASSERT_SUCCESS(urMemGetInfo(mem, UR_MEM_INFO_CONTEXT, + sizeof(ur_context_handle_t), &mem_context, + nullptr)); + ASSERT_EQ(context, mem_context); +} + +TEST_P(urMemBufferCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { + ur_native_handle_t native_handle = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &native_handle)); + + ur_mem_handle_t mem = nullptr; + ur_mem_native_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, + /*.pNext =*/nullptr, + /*.isNativeHandleOwned =*/false, + }; + ASSERT_SUCCESS(urMemBufferCreateWithNativeHandle(native_handle, context, + &props, &mem)); + ASSERT_NE(nullptr, mem); + + ur_context_handle_t mem_context = nullptr; + ASSERT_SUCCESS(urMemGetInfo(mem, UR_MEM_INFO_CONTEXT, + sizeof(ur_context_handle_t), &mem_context, + nullptr)); + ASSERT_EQ(context, mem_context); +} + +TEST_P(urMemBufferCreateWithNativeHandleTest, InvalidNullHandle) { + ur_native_handle_t hNativeMem = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &hNativeMem)); + + ur_mem_handle_t mem = nullptr; + ur_mem_native_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, + /*.pNext =*/nullptr, + /*.isNativeHandleOwned =*/false, + }; + ASSERT_EQ( + urMemBufferCreateWithNativeHandle(hNativeMem, nullptr, &props, &mem), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +TEST_P(urMemBufferCreateWithNativeHandleTest, InvalidNullPointer) { + ur_native_handle_t hNativeMem = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &hNativeMem)); + + ur_mem_native_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, + /*.pNext =*/nullptr, + /*.isNativeHandleOwned =*/false, + }; + ASSERT_EQ( + urMemBufferCreateWithNativeHandle(hNativeMem, context, &props, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); +} + +using urMemBufferMultiQueueMemBufferTest = uur::urMultiDeviceMemBufferQueueTest; TEST_F(urMemBufferMultiQueueMemBufferTest, WriteBack) { void *ptr; ASSERT_SUCCESS(urUSMHostAlloc(context, nullptr, nullptr, size, &ptr)); - ur_mem_handle_t mem = nullptr; ur_mem_native_properties_t props = { /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, @@ -54,26 +115,20 @@ TEST_F(urMemBufferMultiQueueMemBufferTest, WriteBack) { reinterpret_cast(ptr), context, &props, &mem)); } ASSERT_NE(mem, nullptr); - const uint8_t pattern = 0x11; std::vector src(size, pattern); - // write data to the buffer and destroy the buffer ASSERT_SUCCESS(urEnqueueMemBufferWrite(queues[1], mem, true, 0, size, src.data(), 0, nullptr, nullptr)); ASSERT_SUCCESS(urMemRelease(mem)); - // Create the buffer again and read back the data, data should have been written to the // memory behind the native handle. Use different queue to test data migration logic. ASSERT_SUCCESS(urMemBufferCreateWithNativeHandle( reinterpret_cast(ptr), context, &props, &mem)); ASSERT_NE(mem, nullptr); - std::vector dst(size, 0); ASSERT_SUCCESS(urEnqueueMemBufferRead(queues[0], mem, true, 0, size, dst.data(), 0, nullptr, nullptr)); - ASSERT_EQ(src, dst); - ASSERT_SUCCESS(urMemRelease(mem)); } diff --git a/test/conformance/memory/urMemBufferPartition.cpp b/test/conformance/memory/urMemBufferPartition.cpp index 01ab7f8c62..fca20693c7 100644 --- a/test/conformance/memory/urMemBufferPartition.cpp +++ b/test/conformance/memory/urMemBufferPartition.cpp @@ -6,19 +6,32 @@ #include "uur/fixtures.h" #include "uur/raii.h" -using urMemBufferPartitionTest = uur::urMemBufferTest; -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemBufferPartitionTest); +using urMemBufferPartitionWithFlagsTest = + uur::urContextTestWithParam; +UUR_TEST_SUITE_P(urMemBufferPartitionWithFlagsTest, + ::testing::Values(UR_MEM_FLAG_READ_WRITE, + UR_MEM_FLAG_WRITE_ONLY, + UR_MEM_FLAG_READ_ONLY), + uur::deviceTestWithParamPrinter); -TEST_P(urMemBufferPartitionTest, Success) { - ur_buffer_region_t region{UR_STRUCTURE_TYPE_BUFFER_REGION, nullptr, 0, - 1024}; +TEST_P(urMemBufferPartitionWithFlagsTest, Success) { + uur::raii::Mem buffer = nullptr; + + ASSERT_SUCCESS( + urMemBufferCreate(context, getParam(), 1024, nullptr, buffer.ptr())); + ASSERT_NE(nullptr, buffer); + + ur_buffer_region_t region{UR_STRUCTURE_TYPE_BUFFER_REGION, nullptr, 0, 512}; uur::raii::Mem partition = nullptr; - ASSERT_SUCCESS(urMemBufferPartition(buffer, UR_MEM_FLAG_READ_WRITE, + ASSERT_SUCCESS(urMemBufferPartition(buffer, getParam(), UR_BUFFER_CREATE_TYPE_REGION, ®ion, partition.ptr())); ASSERT_NE(partition, nullptr); } +using urMemBufferPartitionTest = uur::urMemBufferTest; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemBufferPartitionTest); + TEST_P(urMemBufferPartitionTest, InvalidNullHandleBuffer) { ur_buffer_region_t region{UR_STRUCTURE_TYPE_BUFFER_REGION, nullptr, 0, 1024}; diff --git a/test/conformance/memory/urMemGetInfo.cpp b/test/conformance/memory/urMemGetInfo.cpp index a28b581a26..3f933d39a9 100644 --- a/test/conformance/memory/urMemGetInfo.cpp +++ b/test/conformance/memory/urMemGetInfo.cpp @@ -6,19 +6,21 @@ #include #include -using urMemGetInfoTest = uur::urMemBufferTestWithParam; +using urMemGetInfoTestWithParam = uur::urMemBufferTestWithParam; -static constexpr std::array mem_info_values{ - UR_MEM_INFO_SIZE, UR_MEM_INFO_CONTEXT}; +static constexpr std::array mem_info_values{ + UR_MEM_INFO_SIZE, UR_MEM_INFO_CONTEXT, UR_MEM_INFO_REFERENCE_COUNT}; static std::unordered_map mem_info_size_map = { {UR_MEM_INFO_SIZE, sizeof(size_t)}, {UR_MEM_INFO_CONTEXT, sizeof(ur_context_handle_t)}, + {UR_MEM_INFO_REFERENCE_COUNT, sizeof(uint32_t)}, }; -UUR_TEST_SUITE_P(urMemGetInfoTest, ::testing::ValuesIn(mem_info_values), +UUR_TEST_SUITE_P(urMemGetInfoTestWithParam, + ::testing::ValuesIn(mem_info_values), uur::deviceTestWithParamPrinter); -TEST_P(urMemGetInfoTest, Success) { +TEST_P(urMemGetInfoTestWithParam, Success) { ur_mem_info_t info = getParam(); size_t size; ASSERT_SUCCESS(urMemGetInfo(buffer, info, 0, nullptr, &size)); @@ -44,11 +46,20 @@ TEST_P(urMemGetInfoTest, Success) { ASSERT_GE(*returned_size, allocation_size); break; } + case UR_MEM_INFO_REFERENCE_COUNT: { + const size_t ReferenceCount = + *reinterpret_cast(info_data.data()); + ASSERT_GT(ReferenceCount, 0); + break; + } default: break; } } +using urMemGetInfoTest = uur::urMemBufferTest; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemGetInfoTest); + TEST_P(urMemGetInfoTest, InvalidNullHandleMemory) { size_t mem_size = 0; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, @@ -108,16 +119,30 @@ TEST_P(urMemGetInfoImageTest, Success) { std::vector info_data(size); ASSERT_SUCCESS(urMemGetInfo(image, info, size, info_data.data(), nullptr)); - if (info == UR_MEM_INFO_SIZE) { + switch (info) { + case UR_MEM_INFO_SIZE: { const size_t ExpectedPixelSize = sizeof(float) * 4 /*NumChannels*/; const size_t ExpectedImageSize = ExpectedPixelSize * desc.arraySize * desc.width * desc.height * desc.depth; const size_t ImageSizeBytes = *reinterpret_cast(info_data.data()); ASSERT_EQ(ImageSizeBytes, ExpectedImageSize); - } else if (info == UR_MEM_INFO_CONTEXT) { + break; + } + case UR_MEM_INFO_CONTEXT: { ur_context_handle_t InfoContext = *reinterpret_cast(info_data.data()); ASSERT_EQ(InfoContext, context); + break; + } + case UR_MEM_INFO_REFERENCE_COUNT: { + const size_t ReferenceCount = + *reinterpret_cast(info_data.data()); + ASSERT_GT(ReferenceCount, 0); + break; + } + + default: + break; } } diff --git a/test/conformance/memory/urMemImageCreate.cpp b/test/conformance/memory/urMemImageCreate.cpp index 28d5d9c4e3..bfb4f6e719 100644 --- a/test/conformance/memory/urMemImageCreate.cpp +++ b/test/conformance/memory/urMemImageCreate.cpp @@ -286,6 +286,25 @@ TEST_P(urMemImageCreateTest, InvalidImageDescSlicePitch) { nullptr, image_handle.ptr())); } +TEST_P(urMemImageCreateTest, InvalidHostPtrNullHost) { + uur::raii::Mem image_handle = nullptr; + ur_mem_flags_t flags = + UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemImageCreate(context, flags, &image_format, + &image_desc, nullptr, + image_handle.ptr())); +} + +TEST_P(urMemImageCreateTest, InvalidHostPtrValidHost) { + uur::raii::Mem image_handle = nullptr; + ur_mem_flags_t flags = 0; + int data = 42; + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemImageCreate(context, flags, &image_format, + &image_desc, &data, image_handle.ptr())); +} + using urMemImageCreateWithHostPtrFlagsTest = urMemImageCreateTestWithParam; @@ -306,11 +325,3 @@ TEST_P(urMemImageCreateWithHostPtrFlagsTest, Success) { image_handle.ptr())); ASSERT_NE(nullptr, image_handle.ptr()); } - -TEST_P(urMemImageCreateWithHostPtrFlagsTest, InvalidHostPtr) { - uur::raii::Mem image_handle = nullptr; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_HOST_PTR, - urMemImageCreate(context, getParam(), &image_format, - &image_desc, nullptr, - image_handle.ptr())); -} diff --git a/test/conformance/memory/urMemImageCreateWithNativeHandle.cpp b/test/conformance/memory/urMemImageCreateWithNativeHandle.cpp index 3404b4203f..c33cc814a3 100644 --- a/test/conformance/memory/urMemImageCreateWithNativeHandle.cpp +++ b/test/conformance/memory/urMemImageCreateWithNativeHandle.cpp @@ -10,15 +10,11 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemImageCreateWithNativeHandleTest); TEST_P(urMemImageCreateWithNativeHandleTest, Success) { ur_native_handle_t native_handle = 0; - if (urMemGetNativeHandle(image, device, &native_handle)) { - GTEST_SKIP(); - } + ASSERT_SUCCESS(urMemGetNativeHandle(image, device, &native_handle)); ur_mem_handle_t mem = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urMemImageCreateWithNativeHandle(native_handle, context, &image_format, - &image_desc, nullptr, &mem)); + ASSERT_SUCCESS(urMemImageCreateWithNativeHandle( + native_handle, context, &image_format, &image_desc, nullptr, &mem)); ASSERT_NE(nullptr, mem); ur_context_handle_t mem_context = nullptr; @@ -27,3 +23,24 @@ TEST_P(urMemImageCreateWithNativeHandleTest, Success) { nullptr)); ASSERT_EQ(context, mem_context); } + +TEST_P(urMemImageCreateWithNativeHandleTest, InvalidNullHandle) { + ur_native_handle_t native_handle = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(image, device, &native_handle)); + + ur_mem_handle_t mem = nullptr; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_HANDLE, + urMemImageCreateWithNativeHandle(native_handle, nullptr, &image_format, + &image_desc, nullptr, &mem)); +} + +TEST_P(urMemImageCreateWithNativeHandleTest, InvalidNullPointer) { + ur_native_handle_t native_handle = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(image, device, &native_handle)); + + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_POINTER, + urMemImageCreateWithNativeHandle(native_handle, context, &image_format, + &image_desc, nullptr, nullptr)); +} diff --git a/test/conformance/memory/urMemRelease.cpp b/test/conformance/memory/urMemRelease.cpp index 3e84142638..730b6aa854 100644 --- a/test/conformance/memory/urMemRelease.cpp +++ b/test/conformance/memory/urMemRelease.cpp @@ -16,3 +16,24 @@ TEST_P(urMemReleaseTest, InvalidNullHandleMem) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urMemRelease(nullptr)); } + +TEST_P(urMemReleaseTest, CheckReferenceCount) { + uint32_t referenceCount = 0; + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 1); + + ASSERT_SUCCESS(urMemRetain(buffer)); + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 2); + + ASSERT_SUCCESS(urMemRelease(buffer)); + + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 1); +} diff --git a/test/conformance/memory/urMemRetain.cpp b/test/conformance/memory/urMemRetain.cpp index 895d68097e..a58896a91b 100644 --- a/test/conformance/memory/urMemRetain.cpp +++ b/test/conformance/memory/urMemRetain.cpp @@ -9,9 +9,31 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemRetainTest); TEST_P(urMemRetainTest, Success) { ASSERT_SUCCESS(urMemRetain(buffer)); - EXPECT_SUCCESS(urMemRelease(buffer)); + ASSERT_SUCCESS(urMemRelease(buffer)); } TEST_P(urMemRetainTest, InvalidNullHandleMem) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urMemRetain(nullptr)); } + +TEST_P(urMemRetainTest, CheckReferenceCount) { + uint32_t referenceCount = 0; + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 1); + + { ASSERT_SUCCESS(urMemRetain(buffer)); } + + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 2); + + ASSERT_SUCCESS(urMemRelease(buffer)); + + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 1); +} diff --git a/test/conformance/platform/urPlatformCreateWithNativeHandle.cpp b/test/conformance/platform/urPlatformCreateWithNativeHandle.cpp index 41fe59442d..6b56f9b661 100644 --- a/test/conformance/platform/urPlatformCreateWithNativeHandle.cpp +++ b/test/conformance/platform/urPlatformCreateWithNativeHandle.cpp @@ -10,10 +10,9 @@ using urPlatformCreateWithNativeHandleTest = uur::platform::urPlatformTest; TEST_F(urPlatformCreateWithNativeHandleTest, Success) { for (auto platform : platforms) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urPlatformGetNativeHandle(platform, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urPlatformGetNativeHandle(platform, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -33,10 +32,9 @@ TEST_F(urPlatformCreateWithNativeHandleTest, Success) { TEST_F(urPlatformCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { for (auto platform : platforms) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urPlatformGetNativeHandle(platform, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urPlatformGetNativeHandle(platform, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -58,10 +56,9 @@ TEST_F(urPlatformCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { TEST_F(urPlatformCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { for (auto platform : platforms) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urPlatformGetNativeHandle(platform, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urPlatformGetNativeHandle(platform, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. diff --git a/test/conformance/platform/urPlatformGetBackendOption.cpp b/test/conformance/platform/urPlatformGetBackendOption.cpp index cfddb25e10..06ac618580 100644 --- a/test/conformance/platform/urPlatformGetBackendOption.cpp +++ b/test/conformance/platform/urPlatformGetBackendOption.cpp @@ -46,5 +46,5 @@ TEST_F(urPlatfromGetBackendOptionTest, InvalidValueFrontendOption) { const char *platformOption = nullptr; ASSERT_EQ_RESULT( UR_RESULT_ERROR_INVALID_VALUE, - urPlatformGetBackendOption(platform, "-sycl-sucks", &platformOption)); + urPlatformGetBackendOption(platform, "-invalid-opt", &platformOption)); } diff --git a/test/conformance/program/CMakeLists.txt b/test/conformance/program/CMakeLists.txt index 31235eaf71..4db93881f4 100644 --- a/test/conformance/program/CMakeLists.txt +++ b/test/conformance/program/CMakeLists.txt @@ -8,6 +8,7 @@ add_conformance_test_with_kernels_environment(program urProgramCompile.cpp urProgramCreateWithBinary.cpp urMultiDeviceProgramCreateWithBinary.cpp + urMultiDeviceProgramCreateWithIL.cpp urProgramCreateWithIL.cpp urProgramCreateWithNativeHandle.cpp urProgramGetBuildInfo.cpp diff --git a/test/conformance/program/program_adapter_cuda.match b/test/conformance/program/program_adapter_cuda.match index fac749462e..11bf1c3e67 100644 --- a/test/conformance/program/program_adapter_cuda.match +++ b/test/conformance/program/program_adapter_cuda.match @@ -1,14 +1,13 @@ -{{NONDETERMINISTIC}} -urProgramBuildTest.BuildFailure/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urProgramCreateWithILTest.Success/NVIDIA_CUDA_BACKEND___{{.*}} -{{OPT}}urProgramCreateWithILTest.SuccessWithProperties/NVIDIA_CUDA_BACKEND___{{.*}} -{{OPT}}urProgramCreateWithILTest.BuildInvalidProgram/NVIDIA_CUDA_BACKEND___{{.*}} +urProgramBuildTest.BuildFailure/* +{{OPT}}urProgramCreateWithILTest.Success/* +{{OPT}}urProgramCreateWithILTest.SuccessWithProperties/* +{{OPT}}urProgramCreateWithILTest.BuildInvalidProgram/* # This test flakily fails -{{OPT}}urProgramGetBuildInfoSingleTest.LogIsNullTerminated/NVIDIA_CUDA_BACKEND___{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.Success/NVIDIA_CUDA_BACKEND___{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.UseDefaultValue/NVIDIA_CUDA_BACKEND___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValueSize/NVIDIA_CUDA_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValueId/NVIDIA_CUDA_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValuePtr/NVIDIA_CUDA_BACKEND___{{.*}}_ -urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/NVIDIA_CUDA_BACKEND___{{.*}}_ -urProgramSetMultipleSpecializationConstantsTest.SingleCall/NVIDIA_CUDA_BACKEND___{{.*}}_ +{{OPT}}urProgramGetBuildInfoSingleTest.LogIsNullTerminated/* +{{OPT}}urProgramSetSpecializationConstantsTest.Success/* +{{OPT}}urProgramSetSpecializationConstantsTest.UseDefaultValue/* +urProgramSetSpecializationConstantsTest.InvalidValueSize/* +urProgramSetSpecializationConstantsTest.InvalidValueId/* +urProgramSetSpecializationConstantsTest.InvalidValuePtr/* +urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/* +urProgramSetMultipleSpecializationConstantsTest.SingleCall/* diff --git a/test/conformance/program/program_adapter_hip.match b/test/conformance/program/program_adapter_hip.match index 2f93f09660..69fe6ac1bb 100644 --- a/test/conformance/program/program_adapter_hip.match +++ b/test/conformance/program/program_adapter_hip.match @@ -1,19 +1,18 @@ -{{NONDETERMINISTIC}} -urProgramBuildTest.BuildFailure/AMD_HIP_BACKEND___{{.*}}_ +urProgramBuildTest.BuildFailure/* # HIP hasn't implemented urProgramCreateWithNativeHandleTest -{{OPT}}urProgramCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urProgramCreateWithNativeHandleTest.Success/* # HIP doesn't expose kernel numbers or names -urProgramGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_NUM_KERNELS -urProgramGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_KERNEL_NAMES +urProgramGetInfoTest.Success/*__UR_PROGRAM_INFO_NUM_KERNELS +urProgramGetInfoTest.Success/*__UR_PROGRAM_INFO_KERNEL_NAMES # HIP hasn't implemented urProgramLink -{{OPT}}urProgramLinkTest.Success/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urProgramLinkTest.Success/* # Hip doesn't support specialization constants -urProgramSetSpecializationConstantsTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.UseDefaultValue/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValueSize/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValueId/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValuePtr/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetMultipleSpecializationConstantsTest.SingleCall/AMD_HIP_BACKEND___{{.*}}_ +urProgramSetSpecializationConstantsTest.Success/* +urProgramSetSpecializationConstantsTest.UseDefaultValue/* +urProgramSetSpecializationConstantsTest.InvalidValueSize/* +urProgramSetSpecializationConstantsTest.InvalidValueId/* +urProgramSetSpecializationConstantsTest.InvalidValuePtr/* +urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/* +urProgramSetMultipleSpecializationConstantsTest.SingleCall/* diff --git a/test/conformance/program/program_adapter_level_zero.match b/test/conformance/program/program_adapter_level_zero.match index bd7e269d9f..97d6869b81 100644 --- a/test/conformance/program/program_adapter_level_zero.match +++ b/test/conformance/program/program_adapter_level_zero.match @@ -1,4 +1,3 @@ -{{NONDETERMINISTIC}} -urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urProgramSetSpecializationConstantsTest.InvalidValueSize/* +urProgramSetSpecializationConstantsTest.InvalidValueId/* +urProgramSetSpecializationConstantsTest.InvalidValuePtr/* diff --git a/test/conformance/program/program_adapter_level_zero_v2.match b/test/conformance/program/program_adapter_level_zero_v2.match index 892b7cfb51..97d6869b81 100644 --- a/test/conformance/program/program_adapter_level_zero_v2.match +++ b/test/conformance/program/program_adapter_level_zero_v2.match @@ -1,4 +1,3 @@ -{{NONDETERMINISTIC}} -urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramSetSpecializationConstantsTest.InvalidValueSize/* +urProgramSetSpecializationConstantsTest.InvalidValueId/* +urProgramSetSpecializationConstantsTest.InvalidValuePtr/* diff --git a/test/conformance/program/program_adapter_native_cpu.match b/test/conformance/program/program_adapter_native_cpu.match index 47163ce042..bd04ab9e8c 100644 --- a/test/conformance/program/program_adapter_native_cpu.match +++ b/test/conformance/program/program_adapter_native_cpu.match @@ -1,147 +1,79 @@ -{{NONDETERMINISTIC}} -{{OPT}}urProgramBuildTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramBuildTest.SuccessWithOptions/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramBuildTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramBuildTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramBuildTest.BuildFailure/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCompileTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCompileTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCompileTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerBinary/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerMetadata/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidSizePropertyCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.BuildInvalidProgramBinary/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.InvalidNullPointerSource/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.InvalidSizeLength/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.InvalidNullPointerProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.BuildInvalidProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetBuildInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_STATUS -{{OPT}}urProgramGetBuildInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_OPTIONS -{{OPT}}urProgramGetBuildInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_LOG -{{OPT}}urProgramGetBuildInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_BINARY_TYPE -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_STATUS -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_OPTIONS -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_LOG -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_BINARY_TYPE -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_STATUS -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_OPTIONS -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_LOG -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_BINARY_TYPE -{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_STATUS -{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_OPTIONS -{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_LOG -{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_BINARY_TYPE -{{OPT}}urProgramGetBuildInfoSingleTest.LogIsNullTerminated/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidKernelName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidNullPointerFunctionName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidNullPointerFunctionPointer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidVariableName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullPointerVariableName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullPointerVariablePointer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoSingleTest.NumDevicesIsNonzero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetInfoSingleTest.NumDevicesMatchesDeviceArray/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetInfoSingleTest.NumDevicesMatchesContextNumDevices/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetNativeHandleTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetNativeHandleTest.InvalidNullPointerNativeProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.InvalidNullPointerProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.InvalidNullPointerInputPrograms/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.InvalidSizeCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.SetOutputOnZeroCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkErrorTest.LinkFailure/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramReleaseTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramRetainTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.UseDefaultValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidSizeCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValueSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValueId/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValuePtr/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetMultipleSpecializationConstantsTest.SingleCall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urProgramBuildTest.Success/* +{{OPT}}urProgramBuildTest.SuccessWithOptions/* +{{OPT}}urProgramBuildTest.InvalidNullHandleContext/* +{{OPT}}urProgramBuildTest.InvalidNullHandleProgram/* +{{OPT}}urProgramBuildTest.BuildFailure/* +{{OPT}}urProgramCompileTest.Success/* +{{OPT}}urProgramCompileTest.InvalidNullHandleContext/* +{{OPT}}urProgramCompileTest.InvalidNullHandleProgram/* +{{OPT}}urProgramCreateWithBinaryTest.Success/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullHandleContext/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullHandleDevice/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerBinary/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerProgram/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerMetadata/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidSizePropertyCount/* +{{OPT}}urProgramCreateWithBinaryTest.BuildInvalidProgramBinary/* +{{OPT}}urProgramCreateWithILTest.Success/* +{{OPT}}urProgramCreateWithILTest.SuccessWithProperties/* +{{OPT}}urProgramCreateWithILTest.InvalidNullHandle/* +{{OPT}}urProgramCreateWithILTest.InvalidNullPointerSource/* +{{OPT}}urProgramCreateWithILTest.InvalidSizeLength/* +{{OPT}}urProgramCreateWithILTest.InvalidNullPointerProgram/* +{{OPT}}urProgramCreateWithILTest.BuildInvalidProgram/* +{{OPT}}urProgramCreateWithNativeHandleTest.Success/* +{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/* +{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/* +{{OPT}}urProgramGetBuildInfoTest.Success/* +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/* +{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/* +{{OPT}}urProgramGetBuildInfoSingleTest.LogIsNullTerminated/* +{{OPT}}urProgramGetFunctionPointerTest.Success/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidKernelName/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidNullHandleDevice/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidNullPointerFunctionName/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidNullPointerFunctionPointer/* +{{OPT}}urProgramGetGlobalVariablePointerTest.Success/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullHandleDevice/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidVariableName/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullPointerVariableName/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullPointerVariablePointer/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidProgramExecutable/* +{{OPT}}urProgramGetInfoTest.Success/* +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetInfoTest.InvalidEnumeration/* +{{OPT}}urProgramGetInfoTest.InvalidSizeZero/* +{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/* +{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/* +{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/* +{{OPT}}urProgramGetInfoSingleTest.NumDevicesIsNonzero/* +{{OPT}}urProgramGetInfoSingleTest.NumDevicesMatchesDeviceArray/* +{{OPT}}urProgramGetInfoSingleTest.NumDevicesMatchesContextNumDevices/* +{{OPT}}urProgramGetNativeHandleTest.Success/* +{{OPT}}urProgramGetNativeHandleTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetNativeHandleTest.InvalidNullPointerNativeProgram/* +{{OPT}}urProgramLinkTest.Success/* +{{OPT}}urProgramLinkTest.InvalidNullHandleContext/* +{{OPT}}urProgramLinkTest.InvalidNullPointerProgram/* +{{OPT}}urProgramLinkTest.InvalidNullPointerInputPrograms/* +{{OPT}}urProgramLinkTest.InvalidSizeCount/* +{{OPT}}urProgramLinkTest.SetOutputOnZeroCount/* +{{OPT}}urProgramLinkErrorTest.LinkFailure/* +{{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/* +{{OPT}}urProgramReleaseTest.Success/* +{{OPT}}urProgramReleaseTest.InvalidNullHandleProgram/* +{{OPT}}urProgramRetainTest.Success/* +{{OPT}}urProgramRetainTest.InvalidNullHandleProgram/* +{{OPT}}urProgramSetSpecializationConstantsTest.Success/* +{{OPT}}urProgramSetSpecializationConstantsTest.UseDefaultValue/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidNullHandleProgram/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidSizeCount/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValueSize/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValueId/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValuePtr/* +{{OPT}}urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/* +{{OPT}}urProgramSetMultipleSpecializationConstantsTest.SingleCall/* diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp index 95a135af1c..9ff11d9016 100644 --- a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp +++ b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp @@ -12,11 +12,7 @@ struct urMultiDeviceProgramCreateWithBinaryTest void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceProgramTest::SetUp()); - // First obtain binaries for all devices from the compiler SPIRV program. - devices = uur::DevicesEnvironment::instance->devices; - if (devices.size() < 2) { - GTEST_SKIP(); - } + // First obtain binaries for all devices from the compiled SPIRV program. ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); size_t binary_sizes_len = 0; ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, @@ -51,7 +47,6 @@ struct urMultiDeviceProgramCreateWithBinaryTest } std::vector> binaries; - std::vector devices; std::vector pointers; std::vector binary_sizes; ur_program_handle_t binary_program = nullptr; diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp new file mode 100644 index 0000000000..652de93540 --- /dev/null +++ b/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp @@ -0,0 +1,63 @@ + +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +using urMultiDeviceProgramTest = uur::urMultiDeviceProgramTest; + +// Test binary sizes and binaries obtained from urProgramGetInfo when program is built for a subset of devices in the context. +TEST_F(urMultiDeviceProgramTest, urMultiDeviceProgramGetInfo) { + // Run test only for level zero backend which supports urProgramBuildExp. + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend != UR_PLATFORM_BACKEND_LEVEL_ZERO) { + GTEST_SKIP(); + } + + std::vector associated_devices(devices.size()); + ASSERT_SUCCESS( + urProgramGetInfo(program, UR_PROGRAM_INFO_DEVICES, + associated_devices.size() * sizeof(ur_device_handle_t), + associated_devices.data(), nullptr)); + + // Build program for the first half of devices. + auto subset = std::vector( + associated_devices.begin(), + associated_devices.begin() + associated_devices.size() / 2); + ASSERT_SUCCESS( + urProgramBuildExp(program, subset.size(), subset.data(), nullptr)); + + std::vector binary_sizes(associated_devices.size()); + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, + binary_sizes.size() * sizeof(size_t), + binary_sizes.data(), nullptr)); + + std::vector> binaries(associated_devices.size()); + std::vector pointers(associated_devices.size()); + for (size_t i = 0; i < associated_devices.size() / 2; i++) { + ASSERT_NE(binary_sizes[i], 0); + binaries[i].resize(binary_sizes[i]); + pointers[i] = binaries[i].data(); + } + for (size_t i = associated_devices.size() / 2; + i < associated_devices.size(); i++) { + ASSERT_EQ(binary_sizes[i], 0); + pointers[i] = binaries[i].data(); + } + + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, + sizeof(uint8_t *) * pointers.size(), + pointers.data(), nullptr)); + for (size_t i = 0; i < associated_devices.size() / 2; i++) { + ASSERT_NE(binaries[i].size(), 0); + } + for (size_t i = associated_devices.size() / 2; + i < associated_devices.size(); i++) { + ASSERT_EQ(binaries[i].size(), 0); + } +} diff --git a/test/conformance/program/urProgramGetGlobalVariablePointer.cpp b/test/conformance/program/urProgramGetGlobalVariablePointer.cpp index c791cb8c7a..aff3ad8803 100644 --- a/test/conformance/program/urProgramGetGlobalVariablePointer.cpp +++ b/test/conformance/program/urProgramGetGlobalVariablePointer.cpp @@ -59,3 +59,32 @@ TEST_P(urProgramGetGlobalVariablePointerTest, &global_variable_size, nullptr), UR_RESULT_ERROR_INVALID_NULL_POINTER); } + +TEST_P(urProgramGetGlobalVariablePointerTest, InvalidProgramExecutable) { + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(ur_platform_backend_t), &backend, + nullptr)); + if (backend != UR_PLATFORM_BACKEND_LEVEL_ZERO) { + GTEST_SKIP(); + } + // Get IL from the compiled program. + size_t il_size = 0; + ASSERT_SUCCESS( + urProgramGetInfo(program, UR_PROGRAM_INFO_IL, 0, nullptr, &il_size)); + ASSERT_GT(il_size, 0); + std::vector il(il_size); + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_IL, il_size, + il.data(), nullptr)); + // Create program with IL. + ur_program_handle_t program_with_il; + ASSERT_SUCCESS(urProgramCreateWithIL(context, il.data(), il.size(), nullptr, + &program_with_il)); + // Expect error when trying to get global variable pointer from a program which is not in exe state. + size_t global_variable_size = 0; + void *global_variable_pointer; + ASSERT_EQ_RESULT(urProgramGetGlobalVariablePointer( + device, program_with_il, global_var.name.c_str(), + &global_variable_size, &global_variable_pointer), + UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE); +} diff --git a/test/conformance/queue/queue_adapter_native_cpu.match b/test/conformance/queue/queue_adapter_native_cpu.match index 32ea573390..1c48a80fed 100644 --- a/test/conformance/queue/queue_adapter_native_cpu.match +++ b/test/conformance/queue/queue_adapter_native_cpu.match @@ -1,42 +1,14 @@ -{{NONDETERMINISTIC}} -urQueueCreateTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueCreateTest.CheckContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PROFILING_ENABLE -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_ON_DEVICE -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_ON_DEVICE_DEFAULT -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_DISCARD_EVENTS -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PRIORITY_LOW -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PRIORITY_HIGH -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_BATCHED -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_USE_DEFAULT_STREAM -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PROFILING_ENABLE -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_ON_DEVICE -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_ON_DEVICE_DEFAULT -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_DISCARD_EVENTS -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PRIORITY_LOW -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PRIORITY_HIGH -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_BATCHED -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_USE_DEFAULT_STREAM -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM -urQueueFinishTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueFlushTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_CONTEXT -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_DEVICE -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_FLAGS -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_REFERENCE_COUNT -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_EMPTY -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_CONTEXT -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_DEVICE -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_DEVICE_DEFAULT -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_FLAGS -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_REFERENCE_COUNT -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_SIZE -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_EMPTY -urQueueGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urQueueCreateTest.Success/* +urQueueCreateTest.CheckContext/* +urQueueCreateWithParamTest.SuccessWithProperties/* +urQueueCreateWithParamTest.MatchingDeviceHandles/* +urQueueFlushTest.Success/* +urQueueGetInfoTest.Context/* +urQueueGetInfoTest.Device/* +urQueueGetInfoTest.Flags/* +urQueueGetInfoTest.ReferenceCount/* +urQueueGetInfoTest.InvalidSizeSmall/* +urQueueGetInfoDeviceQueueTestWithInfoParam.DeviceDefault/* +urQueueGetInfoDeviceQueueTestWithInfoParam.Size/* +urQueueRetainTest.Success/* +urQueueReleaseTest.Success/* diff --git a/test/conformance/queue/urQueueCreateWithNativeHandle.cpp b/test/conformance/queue/urQueueCreateWithNativeHandle.cpp index f2fa83ec8e..9ce80a04e8 100644 --- a/test/conformance/queue/urQueueCreateWithNativeHandle.cpp +++ b/test/conformance/queue/urQueueCreateWithNativeHandle.cpp @@ -9,10 +9,9 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urQueueCreateWithNativeHandleTest); TEST_P(urQueueCreateWithNativeHandleTest, Success) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urQueueGetNativeHandle(queue, nullptr, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urQueueGetNativeHandle(queue, nullptr, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -30,3 +29,24 @@ TEST_P(urQueueCreateWithNativeHandleTest, Success) { ASSERT_EQ(q_context, context); ASSERT_SUCCESS(urQueueRelease(q)); } + +TEST_P(urQueueCreateWithNativeHandleTest, InvalidNullHandle) { + ur_native_handle_t native_handle = 0; + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urQueueGetNativeHandle(queue, nullptr, &native_handle)); + + ur_queue_handle_t q = nullptr; + ASSERT_EQ(urQueueCreateWithNativeHandle(native_handle, nullptr, device, + nullptr, &q), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +TEST_P(urQueueCreateWithNativeHandleTest, InvalidNullPointer) { + ur_native_handle_t native_handle = 0; + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urQueueGetNativeHandle(queue, nullptr, &native_handle)); + + ASSERT_EQ(urQueueCreateWithNativeHandle(native_handle, context, device, + nullptr, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); +} diff --git a/test/conformance/queue/urQueueGetInfo.cpp b/test/conformance/queue/urQueueGetInfo.cpp index 9ffb97e1ff..e3330fef6f 100644 --- a/test/conformance/queue/urQueueGetInfo.cpp +++ b/test/conformance/queue/urQueueGetInfo.cpp @@ -5,131 +5,80 @@ #include #include -std::unordered_map queue_info_size_map = { - {UR_QUEUE_INFO_CONTEXT, sizeof(ur_context_handle_t)}, - {UR_QUEUE_INFO_DEVICE, sizeof(ur_device_handle_t)}, - {UR_QUEUE_INFO_DEVICE_DEFAULT, sizeof(ur_queue_handle_t)}, - {UR_QUEUE_INFO_FLAGS, sizeof(ur_queue_flags_t)}, - {UR_QUEUE_INFO_REFERENCE_COUNT, sizeof(uint32_t)}, - {UR_QUEUE_INFO_SIZE, sizeof(uint32_t)}, - {UR_QUEUE_INFO_EMPTY, sizeof(ur_bool_t)}, -}; +using urQueueGetInfoTest = uur::urQueueTest; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urQueueGetInfoTest); -using urQueueGetInfoTestWithInfoParam = - uur::urQueueTestWithParam; +TEST_P(urQueueGetInfoTest, Context) { + size_t size = 0; + auto infoType = UR_QUEUE_INFO_CONTEXT; + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, 0, nullptr, &size)); + ASSERT_NE(size, 0); + ASSERT_EQ(sizeof(ur_context_handle_t), size); -UUR_TEST_SUITE_P(urQueueGetInfoTestWithInfoParam, - ::testing::Values(UR_QUEUE_INFO_CONTEXT, UR_QUEUE_INFO_DEVICE, - UR_QUEUE_INFO_FLAGS, - UR_QUEUE_INFO_REFERENCE_COUNT, - UR_QUEUE_INFO_EMPTY), - uur::deviceTestWithParamPrinter); + std::vector data(size); + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, size, data.data(), nullptr)); -TEST_P(urQueueGetInfoTestWithInfoParam, Success) { - ur_queue_info_t info_type = getParam(); - size_t size = 0; - auto result = urQueueGetInfo(queue, info_type, 0, nullptr, &size); + auto returned_context = + reinterpret_cast(data.data()); + ASSERT_EQ(context, *returned_context); +} - if (result == UR_RESULT_SUCCESS) { - ASSERT_NE(size, 0); +TEST_P(urQueueGetInfoTest, Device) { + size_t size = 0; + auto infoType = UR_QUEUE_INFO_DEVICE; + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, 0, nullptr, &size)); + ASSERT_NE(size, 0); + ASSERT_EQ(sizeof(ur_device_handle_t), size); - if (const auto expected_size = queue_info_size_map.find(info_type); - expected_size != queue_info_size_map.end()) { - ASSERT_EQ(expected_size->second, size); - } + std::vector data(size); + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, size, data.data(), nullptr)); - std::vector data(size); - ASSERT_SUCCESS( - urQueueGetInfo(queue, info_type, size, data.data(), nullptr)); - - switch (info_type) { - case UR_QUEUE_INFO_CONTEXT: { - auto returned_context = - reinterpret_cast(data.data()); - ASSERT_EQ(context, *returned_context); - break; - } - case UR_QUEUE_INFO_DEVICE: { - auto returned_device = - reinterpret_cast(data.data()); - ASSERT_EQ(*returned_device, device); - break; - } - case UR_QUEUE_INFO_REFERENCE_COUNT: { - auto returned_reference_count = - reinterpret_cast(data.data()); - ASSERT_GT(*returned_reference_count, 0U); - break; - } - default: - break; - } - } else { - ASSERT_EQ_RESULT(result, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); - } + auto returned_device = reinterpret_cast(data.data()); + ASSERT_EQ(device, *returned_device); } -struct urQueueGetInfoDeviceQueueTestWithInfoParam - : public uur::urContextTestWithParam { - void SetUp() { - urContextTestWithParam::SetUp(); - ur_queue_flags_t deviceQueueCapabilities; - ASSERT_SUCCESS( - urDeviceGetInfo(device, UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES, - sizeof(deviceQueueCapabilities), - &deviceQueueCapabilities, nullptr)); - if (!deviceQueueCapabilities) { - GTEST_SKIP() << "Queue on device is not supported."; - } - ASSERT_SUCCESS( - urQueueCreate(context, device, &queueProperties, &queue)); - } - - void TearDown() { - if (queue) { - ASSERT_SUCCESS(urQueueRelease(queue)); - } - urContextTestWithParam::TearDown(); - } +TEST_P(urQueueGetInfoTest, Flags) { + size_t size = 0; + auto infoType = UR_QUEUE_INFO_FLAGS; + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, 0, nullptr, &size)); + ASSERT_NE(size, 0); + ASSERT_EQ(sizeof(ur_queue_flags_t), size); - ur_queue_handle_t queue = nullptr; - ur_queue_properties_t queueProperties = { - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, - UR_QUEUE_FLAG_ON_DEVICE | UR_QUEUE_FLAG_ON_DEVICE_DEFAULT | - UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE}; -}; + std::vector data(size); + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, size, data.data(), nullptr)); -UUR_TEST_SUITE_P(urQueueGetInfoDeviceQueueTestWithInfoParam, - ::testing::Values(UR_QUEUE_INFO_CONTEXT, UR_QUEUE_INFO_DEVICE, - UR_QUEUE_INFO_DEVICE_DEFAULT, - UR_QUEUE_INFO_FLAGS, - UR_QUEUE_INFO_REFERENCE_COUNT, - UR_QUEUE_INFO_SIZE, UR_QUEUE_INFO_EMPTY), - uur::deviceTestWithParamPrinter); + auto returned_flags = reinterpret_cast(data.data()); + EXPECT_EQ(*returned_flags, queue_properties.flags); +} -TEST_P(urQueueGetInfoDeviceQueueTestWithInfoParam, Success) { - ur_queue_info_t info_type = getParam(); +TEST_P(urQueueGetInfoTest, ReferenceCount) { size_t size = 0; - auto result = urQueueGetInfo(queue, info_type, 0, nullptr, &size); + auto infoType = UR_QUEUE_INFO_REFERENCE_COUNT; + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, 0, nullptr, &size)); + ASSERT_NE(size, 0); + ASSERT_EQ(sizeof(uint32_t), size); - if (result == UR_RESULT_SUCCESS) { - ASSERT_NE(size, 0); + std::vector data(size); + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, size, data.data(), nullptr)); - if (const auto expected_size = queue_info_size_map.find(info_type); - expected_size != queue_info_size_map.end()) { - ASSERT_EQ(expected_size->second, size); - } - - std::vector data(size); - ASSERT_SUCCESS( - urQueueGetInfo(queue, info_type, size, data.data(), nullptr)); - } else { - ASSERT_EQ_RESULT(result, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); - } + auto returned_reference_count = reinterpret_cast(data.data()); + ASSERT_GT(*returned_reference_count, 0U); } -using urQueueGetInfoTest = uur::urQueueTest; -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urQueueGetInfoTest); +TEST_P(urQueueGetInfoTest, EmptyQueue) { + size_t size = 0; + auto infoType = UR_QUEUE_INFO_EMPTY; + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urQueueGetInfo(queue, infoType, 0, nullptr, &size)); + ASSERT_NE(size, 0); + ASSERT_EQ(sizeof(ur_bool_t), size); + + std::vector data(size); + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, size, data.data(), nullptr)); + + auto returned_empty_queue = reinterpret_cast(data.data()); + ASSERT_TRUE(returned_empty_queue); +} TEST_P(urQueueGetInfoTest, InvalidNullHandleQueue) { ur_context_handle_t context = nullptr; @@ -174,3 +123,64 @@ TEST_P(urQueueGetInfoTest, InvalidNullPointerPropSizeRet) { UR_RESULT_ERROR_INVALID_NULL_POINTER, urQueueGetInfo(queue, UR_QUEUE_INFO_CONTEXT, 0, nullptr, nullptr)); } + +struct urQueueGetInfoDeviceQueueTestWithInfoParam : public uur::urQueueTest { + void SetUp() { + urQueueGetInfoTest::SetUp(); + ur_queue_flags_t deviceQueueCapabilities; + ASSERT_SUCCESS( + urDeviceGetInfo(device, UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES, + sizeof(deviceQueueCapabilities), + &deviceQueueCapabilities, nullptr)); + if (!deviceQueueCapabilities) { + GTEST_SKIP() << "Queue on device is not supported."; + } + ASSERT_SUCCESS( + urQueueCreate(context, device, &queueProperties, &queue)); + } + + void TearDown() { + if (queue) { + ASSERT_SUCCESS(urQueueRelease(queue)); + } + urQueueGetInfoTest::TearDown(); + } + + ur_queue_handle_t queue = nullptr; + ur_queue_properties_t queueProperties = { + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, + UR_QUEUE_FLAG_ON_DEVICE | UR_QUEUE_FLAG_ON_DEVICE_DEFAULT | + UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE}; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urQueueGetInfoDeviceQueueTestWithInfoParam); + +TEST_P(urQueueGetInfoDeviceQueueTestWithInfoParam, DeviceDefault) { + + size_t size = 0; + auto infoType = UR_QUEUE_INFO_DEVICE_DEFAULT; + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, 0, nullptr, &size)); + ASSERT_NE(size, 0); + ASSERT_EQ(sizeof(ur_queue_handle_t), size); + + std::vector data(size); + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, size, data.data(), nullptr)); + + auto returned_queue = reinterpret_cast(data.data()); + ASSERT_EQ(queue, *returned_queue); +} + +TEST_P(urQueueGetInfoDeviceQueueTestWithInfoParam, Size) { + + size_t size = 0; + auto infoType = UR_QUEUE_INFO_SIZE; + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, 0, nullptr, &size)); + ASSERT_NE(size, 0); + ASSERT_EQ(sizeof(uint32_t), size); + + std::vector data(size); + ASSERT_SUCCESS(urQueueGetInfo(queue, infoType, size, data.data(), nullptr)); + + auto returned_size = reinterpret_cast(data.data()); + ASSERT_GT(*returned_size, 0); +} diff --git a/test/conformance/sampler/sampler_adapter_level_zero.match b/test/conformance/sampler/sampler_adapter_level_zero.match index bf65bfcea6..db656e2b95 100644 --- a/test/conformance/sampler/sampler_adapter_level_zero.match +++ b/test/conformance/sampler/sampler_adapter_level_zero.match @@ -1,9 +1,4 @@ -{{NONDETERMINISTIC}} -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_REFERENCE_COUNT -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_CONTEXT -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_NORMALIZED_COORDS -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_ADDRESSING_MODE -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_FILTER_MODE -{{OPT}}urSamplerGetInfoTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urSamplerReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urSamplerRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urSamplerGetInfoTestWithParam.Success/* +{{OPT}}urSamplerGetInfoTest.InvalidSizePropSizeSmall/* +{{OPT}}urSamplerReleaseTest.Success/* +{{OPT}}urSamplerRetainTest.Success/* diff --git a/test/conformance/sampler/sampler_adapter_level_zero_v2.match b/test/conformance/sampler/sampler_adapter_level_zero_v2.match index bf65bfcea6..db656e2b95 100644 --- a/test/conformance/sampler/sampler_adapter_level_zero_v2.match +++ b/test/conformance/sampler/sampler_adapter_level_zero_v2.match @@ -1,9 +1,4 @@ -{{NONDETERMINISTIC}} -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_REFERENCE_COUNT -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_CONTEXT -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_NORMALIZED_COORDS -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_ADDRESSING_MODE -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_FILTER_MODE -{{OPT}}urSamplerGetInfoTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urSamplerReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urSamplerRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urSamplerGetInfoTestWithParam.Success/* +{{OPT}}urSamplerGetInfoTest.InvalidSizePropSizeSmall/* +{{OPT}}urSamplerReleaseTest.Success/* +{{OPT}}urSamplerRetainTest.Success/* diff --git a/test/conformance/sampler/urSamplerCreateWithNativeHandle.cpp b/test/conformance/sampler/urSamplerCreateWithNativeHandle.cpp index 59638105c9..c6bd776435 100644 --- a/test/conformance/sampler/urSamplerCreateWithNativeHandle.cpp +++ b/test/conformance/sampler/urSamplerCreateWithNativeHandle.cpp @@ -12,10 +12,9 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urSamplerCreateWithNativeHandleTest); TEST_P(urSamplerCreateWithNativeHandleTest, Success) { ur_native_handle_t native_sampler = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urSamplerGetNativeHandle(sampler, &native_sampler)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urSamplerGetNativeHandle(sampler, &native_sampler)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -36,10 +35,9 @@ TEST_P(urSamplerCreateWithNativeHandleTest, Success) { TEST_P(urSamplerCreateWithNativeHandleTest, InvalidNullHandle) { ur_native_handle_t native_sampler = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urSamplerGetNativeHandle(sampler, &native_sampler)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urSamplerGetNativeHandle(sampler, &native_sampler)); ur_sampler_handle_t hSampler = nullptr; ur_sampler_native_properties_t props{}; @@ -50,10 +48,9 @@ TEST_P(urSamplerCreateWithNativeHandleTest, InvalidNullHandle) { TEST_P(urSamplerCreateWithNativeHandleTest, InvalidNullPointer) { ur_native_handle_t native_sampler = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urSamplerGetNativeHandle(sampler, &native_sampler)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urSamplerGetNativeHandle(sampler, &native_sampler)); ur_sampler_native_properties_t props{}; ASSERT_EQ(urSamplerCreateWithNativeHandle(native_sampler, context, &props, diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index d1b373af84..436e7821a9 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -23,12 +23,15 @@ (void)0 #define UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(ret) \ - auto status = ret; \ - if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { \ - GTEST_SKIP(); \ - } else { \ - ASSERT_EQ(status, UR_RESULT_SUCCESS); \ - } + do { \ + auto status = ret; \ + if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE || \ + status == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) { \ + GTEST_SKIP(); \ + } else { \ + ASSERT_EQ(status, UR_RESULT_SUCCESS); \ + } \ + } while (0) namespace uur { @@ -205,6 +208,9 @@ struct urMemImageTest : urContextTest { if (!imageSupported) { GTEST_SKIP(); } + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &image_format, &image_desc, nullptr, + &image)); } void TearDown() override { @@ -215,7 +221,7 @@ struct urMemImageTest : urContextTest { } ur_image_format_t image_format = { - /*.channelOrder =*/UR_IMAGE_CHANNEL_ORDER_ARGB, + /*.channelOrder =*/UR_IMAGE_CHANNEL_ORDER_RGBA, /*.channelType =*/UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, }; ur_image_desc_t image_desc = { @@ -226,8 +232,8 @@ struct urMemImageTest : urContextTest { /*.height =*/16, /*.depth =*/1, /*.arraySize =*/1, - /*.rowPitch =*/16 * sizeof(char[4]), - /*.slicePitch =*/16 * 16 * sizeof(char[4]), + /*.rowPitch =*/0, + /*.slicePitch =*/0, /*.numMipLevel =*/0, /*.numSamples =*/0, }; @@ -1582,6 +1588,10 @@ struct urMultiDeviceProgramTest : urMultiDeviceQueueTest { backend == UR_PLATFORM_BACKEND_CUDA) { GTEST_SKIP(); } + devices = uur::DevicesEnvironment::instance->devices; + if (devices.size() < 2) { + GTEST_SKIP(); + } UUR_RETURN_ON_FATAL_FAILURE( uur::KernelsEnvironment::instance->LoadSource(program_name, il_binary)); @@ -1607,6 +1617,7 @@ struct urMultiDeviceProgramTest : urMultiDeviceQueueTest { std::string program_name = "foo"; ur_program_handle_t program = nullptr; std::vector metadatas{}; + std::vector devices; }; } // namespace uur diff --git a/test/conformance/usm/usm_adapter_cuda.match b/test/conformance/usm/usm_adapter_cuda.match index a9f7c37b87..5460c48661 100644 --- a/test/conformance/usm/usm_adapter_cuda.match +++ b/test/conformance/usm/usm_adapter_cuda.match @@ -1,8 +1,4 @@ -{{NONDETERMINISTIC}} -{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMPoolCreateTest.SuccessWithFlag/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/* +{{OPT}}urUSMHostAllocTest.InvalidUSMSize/* +{{OPT}}urUSMPoolCreateTest.SuccessWithFlag/* +{{OPT}}urUSMSharedAllocTest.InvalidUSMSize/* diff --git a/test/conformance/usm/usm_adapter_hip.match b/test/conformance/usm/usm_adapter_hip.match index 5a1be3c9d4..d1ae0952f4 100644 --- a/test/conformance/usm/usm_adapter_hip.match +++ b/test/conformance/usm/usm_adapter_hip.match @@ -1,85 +1,84 @@ -{{NONDETERMINISTIC}} -urUSMDeviceAllocTest.Success/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullPtrResult/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolDisabled -urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_2048 -urUSMGetMemAllocInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_POOL -urUSMHostAllocTest.Success/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidNullPtrMem/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolDisabled -urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_2048 -urUSMPoolCreateTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolCreateTest.SuccessWithFlag/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_POOL_INFO_CONTEXT -urUSMPoolGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -urUSMPoolGetInfoTest.InvalidNullHandlePool/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidEnumerationProperty/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidSizeZero/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidSizeTooSmall/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidNullPointerPropValue/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolDestroyTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolDestroyTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolRetainTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolRetainTest.InvalidNullHandlePool/AMD_HIP_BACKEND___{{.*}}_ -urUSMSharedAllocTest.Success/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithDescriptors/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidNullPtrMem/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_2048 +urUSMDeviceAllocTest.Success/*__UsePoolEnabled +urUSMDeviceAllocTest.SuccessWithDescriptors/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullHandleContext/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullHandleDevice/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullPtrResult/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidUSMSize/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/*__UsePoolEnabled +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_2048 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_2048 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_2048 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_2048 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_2048 +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_POOL +urUSMHostAllocTest.Success/*__UsePoolEnabled +urUSMHostAllocTest.SuccessWithDescriptors/*__UsePoolEnabled +urUSMHostAllocTest.InvalidNullHandleContext/*__UsePoolEnabled +urUSMHostAllocTest.InvalidNullPtrMem/*__UsePoolEnabled +urUSMHostAllocTest.InvalidUSMSize/*__UsePoolEnabled +urUSMHostAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/*__UsePoolEnabled +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_2048 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_2048 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_2048 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_2048 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_2048 +urUSMPoolCreateTest.Success/* +urUSMPoolCreateTest.SuccessWithFlag/* +urUSMPoolGetInfoTestWithInfoParam.Success/*__UR_USM_POOL_INFO_CONTEXT +urUSMPoolGetInfoTestWithInfoParam.Success/*__UR_USM_POOL_INFO_REFERENCE_COUNT +urUSMPoolGetInfoTest.InvalidNullHandlePool/* +urUSMPoolGetInfoTest.InvalidEnumerationProperty/* +urUSMPoolGetInfoTest.InvalidSizeZero/* +urUSMPoolGetInfoTest.InvalidSizeTooSmall/* +urUSMPoolGetInfoTest.InvalidNullPointerPropValue/* +urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/* +urUSMPoolDestroyTest.Success/* +urUSMPoolDestroyTest.InvalidNullHandleContext/* +urUSMPoolRetainTest.Success/* +urUSMPoolRetainTest.InvalidNullHandlePool/* +urUSMSharedAllocTest.Success/*__UsePoolEnabled +urUSMSharedAllocTest.SuccessWithDescriptors/*__UsePoolEnabled +urUSMSharedAllocTest.SuccessWithMultipleAdvices/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidNullHandleContext/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidNullHandleDevice/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidNullPtrMem/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidUSMSize/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/*__UsePoolEnabled +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_2048 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_2048 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_2048 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_2048 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_2048 diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index 6f2d5ab1f9..201c351120 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -1,3 +1 @@ -{{NONDETERMINISTIC}} -{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/* diff --git a/test/conformance/usm/usm_adapter_level_zero_v2.match b/test/conformance/usm/usm_adapter_level_zero_v2.match index 85f9c4e5c0..ad8e1888d4 100644 --- a/test/conformance/usm/usm_adapter_level_zero_v2.match +++ b/test/conformance/usm/usm_adapter_level_zero_v2.match @@ -1,8 +1 @@ -{{NONDETERMINISTIC}} -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMGetMemAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_USM_ALLOC_INFO_POOL -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled +urUSMGetMemAllocInfoTest.Success/*___UR_USM_ALLOC_INFO_POOL diff --git a/test/conformance/usm/usm_adapter_native_cpu.match b/test/conformance/usm/usm_adapter_native_cpu.match index 08a9c18cae..6ef26e2bdf 100644 --- a/test/conformance/usm/usm_adapter_native_cpu.match +++ b/test/conformance/usm/usm_adapter_native_cpu.match @@ -1,68 +1,17 @@ -{{NONDETERMINISTIC}} -urUSMDeviceAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMDeviceAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMDeviceAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048 -urUSMFreeTest.SuccessDeviceAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMFreeTest.SuccessHostAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMFreeTest.SuccessSharedAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_TYPE -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_BASE_PTR -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_SIZE -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_DEVICE -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_POOL -urUSMGetMemAllocInfoNegativeTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMGetMemAllocInfoNegativeTest.InvalidNullPointerMem/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMGetMemAllocInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMGetMemAllocInfoNegativeTest.InvalidValuePropSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMHostAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMHostAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMHostAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048 -urUSMSharedAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMSharedAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMSharedAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048 +urUSMDeviceAllocTest.Success/*__UsePoolDisabled +urUSMDeviceAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMFreeTest.SuccessDeviceAlloc/* +urUSMFreeTest.SuccessHostAlloc/* +urUSMFreeTest.SuccessSharedAlloc/* +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_TYPE +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_BASE_PTR +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_SIZE +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_DEVICE +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_POOL +urUSMGetMemAllocInfoNegativeTest.InvalidNullHandleContext/* +urUSMGetMemAllocInfoNegativeTest.InvalidNullPointerMem/* +urUSMGetMemAllocInfoNegativeTest.InvalidEnumeration/* +urUSMGetMemAllocInfoNegativeTest.InvalidValuePropSize/* +urUSMHostAllocTest.Success/*__UsePoolDisabled +urUSMHostAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMSharedAllocTest.InvalidUSMSize/*__UsePoolDisabled diff --git a/test/conformance/usm/usm_adapter_opencl.match b/test/conformance/usm/usm_adapter_opencl.match index 3868e5be1b..2fffa9b0ed 100644 --- a/test/conformance/usm/usm_adapter_opencl.match +++ b/test/conformance/usm/usm_adapter_opencl.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} -urUSMGetMemAllocInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_USM_ALLOC_INFO_POOL +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_POOL diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match index bf8c7ce279..627d8eaa78 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match @@ -1,11 +1,5 @@ -{{NONDETERMINISTIC}} -{{OPT}}urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -{{OPT}}urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -{{OPT}}urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 +{{OPT}}urPhysicalMemCreateTest.Success/*__3 +{{OPT}}urPhysicalMemCreateTest.Success/*__7 +{{OPT}}urPhysicalMemCreateTest.Success/*__12 +urPhysicalMemCreateTest.Success/*__44 +urPhysicalMemCreateTest.InvalidSize/* diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero_v2.match b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero_v2.match index 1c83fd1e2a..ec7be06f7e 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero_v2.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero_v2.match @@ -1,84 +1,34 @@ -{{NONDETERMINISTIC}} -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urPhysicalMemReleaseTest.InvalidNullHandlePhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urPhysicalMemRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urPhysicalMemRetainTest.InvalidNullHandlePhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemFreeTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemFreeTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemFreeTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_VIRTUAL_MEM_INFO_ACCESS_MODE -urVirtualMemGetInfoTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemGetInfoTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemGetInfoTest.InvalidEnumerationInfo/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemGranularityGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM -urVirtualMemGranularityGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED -urVirtualMemGranularityGetInfoNegativeTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.InvalidNullHandlePhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.InvalidEnumerationFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___8 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___16 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___32 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___64 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___128 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___256 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___512 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2048 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___5000 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___100000 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___8 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___16 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___32 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___64 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___128 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___256 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___512 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2048 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___5000 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___100000 -urVirtualMemReserveTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemReserveTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemSetAccessTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemSetAccessTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemSetAccessTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemUnmapTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemUnmapTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemUnmapTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urPhysicalMemCreateTest.Success/* +urPhysicalMemCreateTest.InvalidNullHandleContext/* +urPhysicalMemCreateTest.InvalidNullHandleDevice/* +urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/* +urPhysicalMemCreateTest.InvalidSize/* +urPhysicalMemReleaseTest.Success/* +urPhysicalMemReleaseTest.InvalidNullHandlePhysicalMem/* +urPhysicalMemRetainTest.Success/* +urPhysicalMemRetainTest.InvalidNullHandlePhysicalMem/* +urVirtualMemFreeTest.Success/* +urVirtualMemFreeTest.InvalidNullHandleContext/* +urVirtualMemFreeTest.InvalidNullPointerStart/* +urVirtualMemGetInfoTestWithParam.Success/*__UR_VIRTUAL_MEM_INFO_ACCESS_MODE +urVirtualMemGetInfoTest.InvalidNullHandleContext/* +urVirtualMemGetInfoTest.InvalidNullPointerStart/* +urVirtualMemGetInfoTest.InvalidEnumerationInfo/* +urVirtualMemGranularityGetInfoTest.Success/*__UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM +urVirtualMemGranularityGetInfoTest.Success/*__UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED +urVirtualMemGranularityGetInfoNegativeTest.InvalidSizePropSizeSmall/* +urVirtualMemMapTest.Success/* +urVirtualMemMapTest.InvalidNullHandleContext/* +urVirtualMemMapTest.InvalidNullHandlePhysicalMem/* +urVirtualMemMapTest.InvalidNullPointerStart/* +urVirtualMemMapTest.InvalidEnumerationFlags/* +urVirtualMemReserveTestWithParam.SuccessNoStartPointer/* +urVirtualMemReserveTestWithParam.SuccessWithStartPointer/* +urVirtualMemReserveTest.InvalidNullHandleContext/* +urVirtualMemReserveTest.InvalidNullPointer/* +urVirtualMemSetAccessTest.Success/* +urVirtualMemSetAccessTest.InvalidNullHandleContext/* +urVirtualMemSetAccessTest.InvalidNullPointerStart/* +urVirtualMemUnmapTest.Success/* +urVirtualMemUnmapTest.InvalidNullHandleContext/* +urVirtualMemUnmapTest.InvalidNullPointerStart/* diff --git a/test/fuzz/CMakeLists.txt b/test/fuzz/CMakeLists.txt index b48a6a1d44..e2a62a216d 100644 --- a/test/fuzz/CMakeLists.txt +++ b/test/fuzz/CMakeLists.txt @@ -51,7 +51,9 @@ target_link_libraries(fuzztest-base ${PROJECT_NAME}::headers ${PROJECT_NAME}::common -fsanitize=fuzzer) -target_compile_options(fuzztest-base PRIVATE -g -fsanitize=fuzzer) +# When built with -g and -flto (which is required by some hardening flags), this causes a segfault in (upstream) +# LLVM 14-15 while linking when CMAKE_BUILD_TYPE is Release +target_compile_options(fuzztest-base PRIVATE -fsanitize=fuzzer) target_compile_definitions(fuzztest-base PRIVATE -DKERNEL_IL_PATH="${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/fill/spir64.bin.0") target_include_directories(fuzztest-base PRIVATE ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}) add_dependencies(fuzztest-base generate_device_binaries) diff --git a/test/layers/tracing/hello_world.out.logged.match b/test/layers/tracing/hello_world.out.logged.match index 336056dfbc..9e98f457c7 100644 --- a/test/layers/tracing/hello_world.out.logged.match +++ b/test/layers/tracing/hello_world.out.logged.match @@ -11,7 +11,7 @@ Platform initialized. <--- urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} ({{0\.[0-9]+}})) -> UR_RESULT_SUCCESS; API version: {{0\.[0-9]+}} ---> urDeviceGet - <--- urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; + <--- urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = nullptr, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; ---> urDeviceGet <--- urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{.*}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; ---> urDeviceGetInfo diff --git a/test/loader/platforms/CMakeLists.txt b/test/loader/platforms/CMakeLists.txt index 2ff9060b9c..752231be99 100644 --- a/test/loader/platforms/CMakeLists.txt +++ b/test/loader/platforms/CMakeLists.txt @@ -32,5 +32,6 @@ function(add_loader_platform_test name ENV) ) endfunction() -add_loader_platform_test(no_platforms "UR_ADAPTERS_FORCE_LOAD=\"\"") +# Disabling the force load due to issues with the test on windows. +#add_loader_platform_test(no_platforms "UR_ADAPTERS_FORCE_LOAD=\"\"") add_loader_platform_test(null_platform "UR_ADAPTERS_FORCE_LOAD=\"$\"") diff --git a/test/tools/urtrace/mock_hello.match b/test/tools/urtrace/mock_hello.match index cdab3c5c81..a0af2152a7 100644 --- a/test/tools/urtrace/mock_hello.match +++ b/test/tools/urtrace/mock_hello.match @@ -1,12 +1,12 @@ Platform initialized. -urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; -urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; -urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; -urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; +urAdapterGet(.NumEntries = 0, .phAdapters = nullptr, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; +urAdapterGet(.NumEntries = 1, .phAdapters = {{.*}} {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; +urPlatformGet(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = nullptr, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; +urPlatformGet(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{.*}} {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} ({{.*}})) -> UR_RESULT_SUCCESS; API version: {{.*}} -urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; -urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = nullptr, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{.*}} {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; Found a Mock Device gpu. diff --git a/test/tools/urtrace/mock_hello_begin.match b/test/tools/urtrace/mock_hello_begin.match index 0fa8e075d6..318abc7eec 100644 --- a/test/tools/urtrace/mock_hello_begin.match +++ b/test/tools/urtrace/mock_hello_begin.match @@ -1,19 +1,19 @@ Platform initialized. -begin(1) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (0)); -end(1) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr); -end(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; -begin(3) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (0)); -end(3) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {nullptr}, .pNumPlatforms = nullptr); -end(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; +begin(1) - urAdapterGet(.NumEntries = 0, .phAdapters = nullptr, .pNumAdapters = {{.*}} (0)); +end(1) - urAdapterGet(.NumEntries = 0, .phAdapters = nullptr, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{.*}} {{{.*}}}, .pNumAdapters = nullptr); +end(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{.*}} {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; +begin(3) - urPlatformGet(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = nullptr, .pNumPlatforms = {{.*}} (0)); +end(3) - urPlatformGet(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = nullptr, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(4) - urPlatformGet(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{.*}} {nullptr}, .pNumPlatforms = nullptr); +end(4) - urPlatformGet(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{.*}} {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; begin(5) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (0.0)); end(5) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@)) -> UR_RESULT_SUCCESS; API version: {{.*}} -begin(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (0)); -end(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {nullptr}, .pNumDevices = nullptr); -end(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; +begin(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = nullptr, .pNumDevices = {{.*}} (0)); +end(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = nullptr, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{.*}} {nullptr}, .pNumDevices = nullptr); +end(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{.*}} {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; begin(8) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); end(8) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; begin(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); diff --git a/test/tools/urtrace/mock_hello_filter_device.match b/test/tools/urtrace/mock_hello_filter_device.match index 4460759d7e..fde5d03b62 100644 --- a/test/tools/urtrace/mock_hello_filter_device.match +++ b/test/tools/urtrace/mock_hello_filter_device.match @@ -1,7 +1,7 @@ Platform initialized. API version: {{.*}} -urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; -urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = nullptr, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{.*}} {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; Found a Mock Device gpu. diff --git a/test/tools/urtrace/mock_hello_json.match b/test/tools/urtrace/mock_hello_json.match index 82ad2910c7..46bf1ddd30 100644 --- a/test/tools/urtrace/mock_hello_json.match +++ b/test/tools/urtrace/mock_hello_json.match @@ -1,14 +1,14 @@ { "traceEvents": [ Platform initialized. -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterGet", "args": "(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1))" }, -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterGet", "args": "(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr)" }, -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urPlatformGet", "args": "(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (1))" }, -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urPlatformGet", "args": "(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{{.*}}}, .pNumPlatforms = nullptr)" }, -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urPlatformGetApiVersion", "args": "(.hPlatform = {{.*}}, .pVersion = {{.*}} (@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@))" }, +{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterGet", "args": "(.NumEntries = 0, .phAdapters = nullptr, .pNumAdapters = {{.*}} (1))" }, +{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterGet", "args": "(.NumEntries = 1, .phAdapters = {{.*}} {{{.*}}}, .pNumAdapters = nullptr)" }, +{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urPlatformGet", "args": "(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = nullptr, .pNumPlatforms = {{.*}} (1))" }, +{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urPlatformGet", "args": "(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{.*}} {{{.*}}}, .pNumPlatforms = nullptr)" }, +{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urPlatformGetApiVersion", "args": "(.hPlatform = {{.*}}, .pVersion = {{.*}} (0.11))" }, API version: @PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@ -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urDeviceGet", "args": "(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1))" }, -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urDeviceGet", "args": "(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr)" }, +{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urDeviceGet", "args": "(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = nullptr, .pNumDevices = {{.*}} (1))" }, +{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urDeviceGet", "args": "(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{.*}} {{{.*}}}, .pNumDevices = nullptr)" }, { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urDeviceGetInfo", "args": "(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}} (UR_DEVICE_TYPE_GPU), .pPropSizeRet = nullptr)" }, { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urDeviceGetInfo", "args": "(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}} (Mock Device), .pPropSizeRet = nullptr)" }, Found a Mock Device gpu. diff --git a/test/tools/urtrace/mock_hello_profiling.match b/test/tools/urtrace/mock_hello_profiling.match index fe496aab31..0ab658a3eb 100644 --- a/test/tools/urtrace/mock_hello_profiling.match +++ b/test/tools/urtrace/mock_hello_profiling.match @@ -1,12 +1,12 @@ Platform initialized. -urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) -urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) -urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) -urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urAdapterGet(.NumEntries = 0, .phAdapters = nullptr, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urAdapterGet(.NumEntries = 1, .phAdapters = {{.*}} {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urPlatformGet(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = nullptr, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urPlatformGet(.phAdapters = {{.*}} {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{.*}} {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} ({{.*}})) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) API version: {{.*}} -urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) -urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = nullptr, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{.*}} {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) Found a Mock Device gpu. diff --git a/test/unit/print.h b/test/unit/print.h index 0a83698a4d..ac6ec272a1 100644 --- a/test/unit/print.h +++ b/test/unit/print.h @@ -71,8 +71,8 @@ struct UrPlatformGet { struct UrPlatformGetEmptyArray : UrPlatformGet { UrPlatformGetEmptyArray() : UrPlatformGet() {} const char *get_expected() { - return ".phAdapters = \\{\\}, .NumAdapters = 0, .NumEntries = 0, " - ".phPlatforms = \\{\\}, .pNumPlatforms = " + return ".phAdapters = nullptr, .NumAdapters = 0, .NumEntries = 0, " + ".phPlatforms = nullptr, .pNumPlatforms = " "nullptr"; }; }; @@ -88,8 +88,8 @@ struct UrPlatformGetTwoPlatforms : UrPlatformGet { pNumPlatforms = &num_platforms; } const char *get_expected() { - return ".phAdapters = \\{\\}, .NumAdapters = 0, .NumEntries = 2, " - ".phPlatforms = \\{.+, .+\\}, " + return ".phAdapters = nullptr, .NumAdapters = 0, .NumEntries = 2, " + ".phPlatforms = .+ \\{.+, .+\\}, " ".pNumPlatforms = .+ \\(2\\)"; }; }; @@ -276,6 +276,23 @@ struct UrDeviceGetInfoParamsPartitionArray : UrDeviceGetInfoParams { }; }; +struct UrDeviceGetInfoParamsUUID : UrDeviceGetInfoParams { + uint8_t props[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + UrDeviceGetInfoParamsUUID() : UrDeviceGetInfoParams() { + propName = UR_DEVICE_INFO_UUID; + pPropValue = &props; + propSize = sizeof(props); + propSizeRet = sizeof(props); + } + const char *get_expected() { + return ".hDevice = nullptr, .propName = " + "UR_DEVICE_INFO_UUID, .propSize " + "= 10, .pPropValue = \\{0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\}, " + ".pPropSizeRet = .+ " + "\\(10\\)"; + }; +}; + struct UrContextGetInfoParams { ur_context_get_info_params_t params; @@ -419,8 +436,9 @@ typedef ::testing::Types< UrUsmHostAllocParamsHostDesc, UrDeviceGetInfoParamsEmpty, UrDeviceGetInfoParamsName, UrDeviceGetInfoParamsQueueFlag, UrDeviceGetInfoParamsPartitionArray, UrContextGetInfoParamsDevicesArray, - UrDeviceGetInfoParamsInvalidSize, UrProgramMetadataTest, - UrDevicePartitionPropertyTest, UrSamplerAddressModesTest> + UrDeviceGetInfoParamsInvalidSize, UrDeviceGetInfoParamsUUID, + UrProgramMetadataTest, UrDevicePartitionPropertyTest, + UrSamplerAddressModesTest> Implementations; TYPED_TEST_SUITE(ParamsTest, Implementations, ); diff --git a/third_party/benchmark_requirements.txt b/third_party/benchmark_requirements.txt new file mode 100644 index 0000000000..c01a2215c5 --- /dev/null +++ b/third_party/benchmark_requirements.txt @@ -0,0 +1,43 @@ +six==1.16.0 +matplotlib==3.9.2 +mpld3==0.5.10 +alabaster==0.7.12 +Babel==2.14.0 +bandit==1.6.2 +beautifulsoup4==4.11.1 +breathe==4.33.1 +bs4==0.0.1 +certifi==2024.07.04 +chardet==3.0.4 +clang-format==15.0.7 +colorama==0.4.1 +docutils==0.15.2 +exhale==0.3.0 +idna==3.7 +imagesize==1.1.0 +Jinja2==3.1.4 +lxml==4.9.3 +Mako==1.3.0 +MarkupSafe==2.1.5 +packaging==24.1 +Pygments==2.17.2 +pyparsing==2.4.5 +pytest>=7.0 +pytz==2019.3 +PyYAML==6.0.1 +requests==2.32.2 +rst2pdf==0.102 +snowballstemmer==2.0.0 +soupsieve==1.9.5 +Sphinx==4.5.0 +sphinx-book-theme==0.3.3 +sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-websupport==1.2.4 +sphinx-rtd-theme==1.0.0 +urllib3==2.2.2 +dataclasses-json==0.6.7 diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index d949b1f5df..ee7fe52834 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -418,5 +418,7 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo( hDevice, UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP); } } // namespace urinfo