Skip to content

Commit

Permalink
Merge branch 'main' into alex/2585
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbaden authored Nov 22, 2024
2 parents ae3ba41 + 816d7ef commit b64be96
Show file tree
Hide file tree
Showing 40 changed files with 1,104 additions and 515 deletions.
4 changes: 2 additions & 2 deletions .github/actions/setup-pytorch/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ runs:
uses: ./.github/actions/load
env:
# Increase this value to reset cache
CACHE_NUMBER: 12
CACHE_NUMBER: 14
with:
path: pytorch
key: pytorch-$PYTORCH_CACHE_KEY-$CACHE_NUMBER
Expand Down Expand Up @@ -120,7 +120,7 @@ runs:
cd pytorch
pip install wheel
pip install -r requirements.txt
python setup.py bdist_wheel
USE_STATIC_MKL=1 python setup.py bdist_wheel
- name: Install PyTorch (built from source)
if: ${{ inputs.mode == 'source' }}
Expand Down
156 changes: 99 additions & 57 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ concurrency:
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
permissions: read-all
env:
TRITON_BUILD_WITH_CCACHE: "true"
TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
TRITON_DISABLE_LINE_INFO: 1
PROTON_SKIP_PC_SAMPLING_TEST: 1
CCACHE_COMPRESS: "true"
jobs:
Runner-Preparation:
runs-on: ubuntu-latest
Expand All @@ -39,6 +41,11 @@ jobs:
if: github.event_name == 'pull_request'
run: |
echo "enable_integration=true" >> $GITHUB_ENV
- name: Decide manual trigger integration test enablement
# Always enable integration tests when manually triggered
if: github.event_name == 'workflow_dispatch'
run: |
echo "enable_integration=true" >> $GITHUB_ENV
- name: Checkout post-submit commits
if: github.event_name == 'push'
uses: actions/checkout@v4
Expand Down Expand Up @@ -154,6 +161,8 @@ jobs:
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}}
env:
RUNNER_TYPE: ${{ matrix.runner[0] }}
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down Expand Up @@ -199,22 +208,28 @@ jobs:
# "restore" step. This is to prevent the caches from accumulating stale
# files over time.
name: Restore cache of ccache and Triton compilation artifacts
if: github.event_name != 'push'
id: restore-build-cache
if: github.ref != 'refs/heads/main'
uses: actions/cache/restore@v4
with:
path: |
~/.triton/cache
~/.cache/ccache
~/.ccache
# Restore the most recent cache entry.
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
restore-keys: |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-
# We expect this cache key never to hit and for us to fall back
# unconditionally to the restore-key, so it doesn't actually matter
# what we put here (so long as it doesn't hit an existing key).
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directory
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -h -d 1 ~/.triton
mkdir -p ~/.ccache
du -h -d 1 ~/.ccache
- name: Update PATH
run: |
echo "$HOME/.local/bin" >> $GITHUB_PATH
Expand All @@ -224,12 +239,14 @@ jobs:
python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-forked pytest-xdist lit
- name: Install Triton
env:
TRITON_BUILD_WITH_CCACHE: "true"
CUDA_HOME: "/usr/local/cuda"
run: |
echo "PATH is '$PATH'"
cd python
python3 -m pip install '.[tests]'
ccache --zero-stats
python3 -m pip install -v '.[tests]'
- name: CCache Stats
run: ccache --print-stats
- name: Run lit tests
run: |
cd python
Expand Down Expand Up @@ -278,6 +295,13 @@ jobs:
cd third_party/proton/test
python3 -m pytest -s .
cd ..
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
du -h -d 1 ~/.triton
mkdir -p ~/.ccache
du -h -d 1 ~/.ccache
- # If we're on branch `main`, save the ccache Triton compilation artifacts
# to the cache so they can be used by other (non-main) CI runs.
#
Expand All @@ -287,22 +311,17 @@ jobs:
if: github.ref == 'refs/heads/main'
uses: actions/cache/save@v4
with:
path: ~/.triton/cache ~/.cache/ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**
mkdir -p ~/.cache/ccache
ls -alh ~/.cache/ccache
du -sh ~/.cache/ccache
path: |
~/.triton/cache
~/.ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
Integration-Tests-AMD:
needs: Runner-Preparation
if: needs.Runner-Preparation.outputs.matrix-HIP != ''
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
env:
RUNNER_TYPE: ${{ matrix.runner[1] }}
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}}
Expand Down Expand Up @@ -355,40 +374,53 @@ jobs:
# "restore" step. This is to prevent the caches from accumulating stale
# files over time.
name: Restore cache of ccache and Triton compilation artifacts
if: github.event_name != 'push'
id: restore-build-cache
if: github.ref != 'refs/heads/main'
uses: actions/cache/restore@v4
with:
path: |
~/.triton/cache
~/.cache/ccache
~/.ccache
# Restore the most recent cache entry.
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
restore-keys: |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-
# We expect this cache key never to hit and for us to fall back
# unconditionally to the restore-key, so it doesn't actually matter
# what we put here (so long as it doesn't hit an existing key).
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directory
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -h -d 1 ~/.triton
mkdir -p ~/.ccache
du -h -d 1 ~/.ccache
- name: Update PATH
run: |
echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
- name: Install pip dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install lit
- name: Install apt dependencies
run: |
apt update
apt install ccache
- name: Install Triton
id: amd-install-triton
run: |
echo "PATH is '$PATH'"
pip uninstall -y triton
cd python
ccache --zero-stats
pip install -v -e '.[tests]'
- name: Clean up after an unsuccessful build
if: ${{ !success() && steps.amd-install-triton.outcome != 'success' }}
run: |
rm -rf ~/.triton
- name: CCache Stats
run: ccache --print-stats
- name: Run lit tests
run: |
cd python
Expand Down Expand Up @@ -431,6 +463,13 @@ jobs:
cd python
cd "build/$(ls build | grep -i cmake)"
ctest -j32
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
du -h -d 1 ~/.triton
mkdir -p ~/.ccache
du -h -d 1 ~/.ccache
- # If we're on branch `main`, save the ccache Triton compilation artifacts
# to the cache so they can be used by other (non-main) CI runs.
#
Expand All @@ -440,28 +479,23 @@ jobs:
if: github.ref == 'refs/heads/main'
uses: actions/cache/save@v4
with:
path: ~/.triton/cache ~/.cache/ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**
mkdir -p ~/.cache/ccache
ls -alh ~/.cache/ccache
du -sh ~/.cache/ccache
path: |
~/.triton/cache
~/.ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Clean up caches
run: |
rm -rf ~/.triton/cache
Build-Tests:
needs: Runner-Preparation
if: needs.Runner-Preparation.outputs.matrix-MACOS != ''
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
timeout-minutes: 40
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-MACOS)}}
env:
RUNNER_TYPE: ${{ matrix.runner[0] }}
steps:
- name: Checkout
uses: actions/checkout@v4
Expand All @@ -470,7 +504,7 @@ jobs:
- name: Install brew dependencies
run: |
brew update
brew install ccache llvm@19 lld
brew install ccache llvm@19 lld coreutils
- name: Compute cache keys
id: cache-key
run: |
Expand Down Expand Up @@ -511,22 +545,28 @@ jobs:
# "restore" step. This is to prevent the caches from accumulating stale
# files over time.
name: Restore cache of ccache and Triton compilation artifacts
if: github.event_name != 'push'
id: restore-build-cache
if: github.ref != 'refs/heads/main'
uses: actions/cache/restore@v4
with:
path: |
~/.triton/cache
~/.cache/ccache
~/.ccache
# Restore the most recent cache entry.
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
restore-keys: |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-
# We expect this cache key never to hit and for us to fall back
# unconditionally to the restore-key, so it doesn't actually matter
# what we put here (so long as it doesn't hit an existing key).
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directory
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -h -d 1 ~/.triton
mkdir -p ~/.ccache
du -h -d 1 ~/.ccache
- name: Update PATH
run: |
echo "$HOME/.local/bin" >> $GITHUB_PATH
Expand All @@ -539,7 +579,6 @@ jobs:
python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-xdist lit pybind11
- name: Install Triton
env:
TRITON_BUILD_WITH_CCACHE: "true"
TRITON_BUILD_WITH_O1: "true"
# macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
Expand All @@ -548,7 +587,17 @@ jobs:
source ~/.venv/bin/activate
echo "PATH is '$PATH'"
cd python
python3 -m pip install --no-build-isolation .
ccache --zero-stats
python3 -m pip install -v --no-build-isolation .
- name: CCache Stats
run: ccache --print-stats
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
du -h -d 1 ~/.triton
mkdir -p ~/.ccache
du -h -d 1 ~/.ccache
- # If we're on branch `main`, save the ccache Triton compilation artifacts
# to the cache so they can be used by other (non-main) CI runs.
#
Expand All @@ -558,14 +607,7 @@ jobs:
if: github.ref == 'refs/heads/main'
uses: actions/cache/save@v4
with:
path: ~/.triton/cache ~/.cache/ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**
mkdir -p ~/.cache/ccache
ls -alh ~/.cache/ccache
du -sh ~/.cache/ccache
path: |
~/.triton/cache
~/.ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
Loading

0 comments on commit b64be96

Please sign in to comment.