.github/workflows/periodic.yml

name: periodic

on:
  schedule:
    # We have several schedules so jobs can check github.event.schedule to activate only for a fraction of the runs.
    # Also run less frequently on weekends.
    - cron: 45 0,8,16 * * 1-5
    - cron: 45 4 * * 0,6
    - cron: 45 4,12,20 * * 1-5
    - cron: 45 12 * * 0,6
    - cron: 29 8 * * *  # about 1:29am PDT, for mem leak check and rerun disabled tests
  push:
    tags:
      - ciflow/periodic/*
    branches:
      - release/*
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }}
  cancel-in-progress: true

permissions: read-all

jobs:
  llm-td:
    if: github.repository_owner == 'pytorch'
    name: before-test
    uses: ./.github/workflows/llm_td_retrieval.yml
    permissions:
      id-token: write
      contents: read

  target-determination:
    name: before-test
    uses: ./.github/workflows/target_determination.yml
    needs: llm-td
    permissions:
      id-token: write
      contents: read

  get-label-type:
    name: get-label-type
    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
    if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch'
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}

  linux-focal-cuda12_1-py3_10-gcc9-build:
    name: linux-focal-cuda12.1-py3.10-gcc9
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda12.1-py3.10-gcc9
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
      test-matrix: |
        { include: [
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
        ]}
    secrets: inherit

  linux-focal-cuda12_1-py3_10-gcc9-test:
    name: linux-focal-cuda12.1-py3.10-gcc9
    uses: ./.github/workflows/_linux-test.yml
    needs:
      - linux-focal-cuda12_1-py3_10-gcc9-build
      - target-determination
    with:
      build-environment: linux-focal-cuda12.1-py3.10-gcc9
      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.test-matrix }}
    secrets: inherit

  linux-focal-cuda12_4-py3_10-gcc9-build:
    name: linux-focal-cuda12.4-py3.10-gcc9
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda12.4-py3.10-gcc9
      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
      test-matrix: |
        { include: [
          { config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
          { config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
          { config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
          { config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
          { config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
        ]}
    secrets: inherit

  linux-focal-cuda12_4-py3_10-gcc9-test:
    name: linux-focal-cuda12.4-py3.10-gcc9
    uses: ./.github/workflows/_linux-test.yml
    needs:
      - linux-focal-cuda12_4-py3_10-gcc9-build
      - target-determination
    with:
      timeout-minutes: 360
      build-environment: linux-focal-cuda12.4-py3.10-gcc9
      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
    secrets: inherit

  linux-focal-cuda11_8-py3_9-gcc9-build:
    name: linux-focal-cuda11.8-py3.9-gcc9
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda11.8-py3.9-gcc9
      docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
      cuda-arch-list: 8.6
      test-matrix: |
        { include: [
          { config: "multigpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
        ]}
      build-with-debug: false
    secrets: inherit

  linux-focal-cuda11_8-py3_9-gcc9-test:
    name: linux-focal-cuda11.8-py3.9-gcc9
    uses: ./.github/workflows/_linux-test.yml
    needs: linux-focal-cuda11_8-py3_9-gcc9-build
    with:
      build-environment: linux-focal-cuda11.8-py3.9-gcc9
      docker-image: ${{ needs.linux-focal-cuda11_8-py3_9-gcc9-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda11_8-py3_9-gcc9-build.outputs.test-matrix }}
    secrets: inherit

  linux-focal-cuda11_8-py3_10-gcc9-debug-build:
    name: linux-focal-cuda11.8-py3.10-gcc9-debug
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda11.8-py3.10-gcc9-debug
      docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
      build-with-debug: true
      test-matrix: |
        { include: [
          { config: "default", shard: 1, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
          { config: "default", shard: 2, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
          { config: "default", shard: 3, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
          { config: "default", shard: 4, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
          { config: "default", shard: 5, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
          { config: "default", shard: 6, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
          { config: "default", shard: 7, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },
        ]}
    secrets: inherit

  linux-focal-cuda11_8-py3_10-gcc9-debug-test:
    name: linux-focal-cuda11.8-py3.10-gcc9-debug
    uses: ./.github/workflows/_linux-test.yml
    needs:
      - linux-focal-cuda11_8-py3_10-gcc9-debug-build
      - target-determination
    with:
      build-environment: linux-focal-cuda11.8-py3.10-gcc9-debug
      docker-image: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-debug-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-debug-build.outputs.test-matrix }}
    secrets: inherit

  linux-focal-rocm6_2-py3_10-build:
    name: linux-focal-rocm6.2-py3.10
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-rocm6.2-py3.10
      docker-image-name: pytorch-linux-focal-rocm-n-py3
      test-matrix: |
        { include: [
          { config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu", owners: ["module:rocm", "oncall:distributed"] },
          { config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu", owners: ["module:rocm", "oncall:distributed"] },
          { config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu", owners: ["module:rocm", "oncall:distributed"] },
        ]}
    secrets: inherit

  linux-focal-rocm6_2-py3_10-test:
    permissions:
      id-token: write
      contents: read
    name: linux-focal-rocm6.2-py3.10
    uses: ./.github/workflows/_rocm-test.yml
    needs:
      - linux-focal-rocm6_2-py3_10-build
      - target-determination
    with:
      build-environment: linux-focal-rocm6.2-py3.10
      docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
    secrets: inherit

  linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build:
    name: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    if: false # See https://github.com/pytorch/pytorch/issues/138750
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      use_split_build: true
      build-environment: linux-focal-cuda12.1-py3.10-gcc9
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
      test-matrix: |
        { include: [
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
        ]}
    secrets: inherit

  linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build-test:
    name: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
    uses: ./.github/workflows/_linux-test.yml
    needs:
      - linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build
      - target-determination
    with:
      build-environment: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
    secrets: inherit


  linux-focal-cuda11_8-py3_9-gcc9-experimental-split-build:
    name: linux-focal-cuda11.8-py3.9-gcc9-experimental-split-build
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    if: false # See https://github.com/pytorch/pytorch/issues/138750
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      use_split_build: true
      build-environment: linux-focal-cuda11.8-py3.9-gcc9
      docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
      cuda-arch-list: 8.6
      test-matrix: |
        { include: [
          { config: "multigpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
        ]}
      build-with-debug: false
    secrets: inherit

  linux-focal-cuda11_8-py3_9-gcc9-experimental-split-build-test:
    name: linux-focal-cuda11.8-py3.9-gcc9-experimental-split-build-test
    uses: ./.github/workflows/_linux-test.yml
    needs:
      - linux-focal-cuda11_8-py3_9-gcc9-experimental-split-build
      - target-determination
    with:
      build-environment: linux-focal-cuda11.8-py3.9-gcc9-experimental-split-build
      docker-image: ${{ needs.linux-focal-cuda11_8-py3_9-gcc9-experimental-split-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda11_8-py3_9-gcc9-experimental-split-build.outputs.test-matrix }}
    secrets: inherit

  linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build:
    name: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    if: false # See https://github.com/pytorch/pytorch/issues/138750
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      use_split_build: true
      build-environment: linux-focal-cuda11.8-py3.10-gcc9
      docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
      cuda-arch-list: '7.5'
      test-matrix: |
        { include: [
          { config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
          { config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
          { config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
        ]}
    secrets: inherit

  linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build-test:
    name: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build-test
    uses: ./.github/workflows/_linux-test.yml
    needs:
      - linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build
      - target-determination
    with:
      timeout-minutes: 360
      build-environment: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
      docker-image: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
    secrets: inherit

  linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build:
    name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
      cuda-arch-list: 8.6
      test-matrix: |
        { include: [
          { config: "default", shard: 1, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
          { config: "default", shard: 2, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
          { config: "default", shard: 3, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
          { config: "default", shard: 4, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
          { config: "default", shard: 5, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
          { config: "default", shard: 6, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
          { config: "default", shard: 7, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
          { config: "default", shard: 8, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
        ]}
    secrets: inherit

  linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-test:
    name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
    uses: ./.github/workflows/_linux-test.yml
    needs:
      - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build
      - target-determination
    with:
      build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
      docker-image: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
      timeout-minutes: 300
    secrets: inherit

  linux-focal-cuda12_4-py3_10-gcc9-bazel-test:
    name: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
    uses: ./.github/workflows/_bazel-build-test.yml
    needs: get-label-type
    with:
      runner: "${{ needs.get-label-type.outputs.label-type }}linux.large"
      build-environment: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
      cuda-version: "12.4"
      test-matrix: |
        { include: [
          { config: "default", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
        ]}
    secrets: inherit