diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..c559d08f --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,11 @@ +# See https://help.github.com/articles/about-codeowners/ for syntax + +# Core Engineering will be the default owners for everything +# in the repo. Unless a later match takes precedence, +# @deepset-ai/core-engineering will be requested for review +# when someone opens a pull request. +* @deepset-ai/open-source-engineering + +# Documentation +*.md @deepset-ai/documentation @deepset-ai/open-source-engineering +releasenotes/notes/* @deepset-ai/documentation @deepset-ai/open-source-engineering diff --git a/.github/actionlint.yml b/.github/actionlint.yml new file mode 100644 index 00000000..60646952 --- /dev/null +++ b/.github/actionlint.yml @@ -0,0 +1,3 @@ +self-hosted-runner: + # Labels of self-hosted runner in array of string + labels: ["cml", "ubuntu-latest-4-cores"] diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..6778b049 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'daily' diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..c6a58287 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,25 @@ +### Related Issues + +- fixes #issue-number + +### Proposed Changes: + + + + +### How did you test it? + + + +### Notes for the reviewer + + + +### Checklist + +- I have read the [contributors guidelines](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md) and the [code of conduct](https://github.com/deepset-ai/haystack/blob/main/code_of_conduct.txt) +- I have updated the related issue with new insights and changes +- I added unit tests and updated the docstrings +- I've used one of the [conventional commit types](https://www.conventionalcommits.org/en/v1.0.0/) for my PR title: `fix:`, `feat:`, `build:`, `chore:`, `ci:`, `docs:`, `style:`, `refactor:`, `perf:`, `test:`. +- I documented my code +- I ran [pre-commit hooks](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md#installation) and fixed any issue diff --git a/.github/workflows/docstrings_linting.yml b/.github/workflows/docstrings_linting.yml new file mode 100644 index 00000000..ba530e53 --- /dev/null +++ b/.github/workflows/docstrings_linting.yml @@ -0,0 +1,20 @@ +name: run docstrings linting + +on: + push: + branches: + - docstrings-linting + +jobs: + docstrings-linting: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Hatch + run: pip install hatch=="1.9.3" + + - name: ruff docstrings linting + run: hatch run ruff check haystack-experimental diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 00000000..75b76f61 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,84 @@ +# If you change this name also do it in linting-skipper.yml and ci_metrics.yml +name: Linting + +on: + pull_request: + paths: + - "haystack-experimental/**/*.py" + - "test/**/*.py" + - "pyproject.toml" + +env: + PYTHON_VERSION: "3.8" + HATCH_VERSION: "1.9.3" + +jobs: + license-header: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Check License Header + run: docker run --rm -v "$(pwd):/github/workspace" ghcr.io/korandoru/hawkeye check + + mypy: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + # With the default value of 1, there are corner cases where tj-actions/changed-files + # fails with a `no merge base` error + fetch-depth: 0 + + - name: Get changed files + id: files + uses: tj-actions/changed-files@v44 + with: + files: | + **/*.py + files_ignore: | + test/** + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install Hatch + run: pip install hatch==${{ env.HATCH_VERSION }} + + - name: Mypy + if: steps.files.outputs.any_changed == 'true' + run: | + mkdir .mypy_cache + hatch run test:types ${{ steps.files.outputs.all_changed_files }} + + pylint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + # With the default value of 1, there are corner cases where tj-actions/changed-files + # fails with a `no merge base` error + fetch-depth: 0 + + - name: Get changed files + id: files + uses: tj-actions/changed-files@v44 + with: + files: | + haystack-experimental/**/*.py + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install Hatch + run: pip install hatch==${{ env.HATCH_VERSION }} + + - name: Pylint + if: steps.files.outputs.any_changed == 'true' + run: | + hatch run test:lint ${{ steps.files.outputs.all_changed_files }} diff --git a/.github/workflows/linting_skipper.yml b/.github/workflows/linting_skipper.yml new file mode 100644 index 00000000..a48767b8 --- /dev/null +++ b/.github/workflows/linting_skipper.yml @@ -0,0 +1,29 @@ +# If you change this name also do it in linting.yml and ci_metrics.yml +name: Linting + +on: + pull_request: + paths-ignore: + - "haystack/preview/**/*.py" + - "test/preview/**/*.py" + - "e2e/preview/**/*.py" + - "**/pyproject.toml" + +jobs: + license-header: + runs-on: ubuntu-latest + steps: + - name: Skip mypy + run: echo "Skipped mypy" + + mypy: + runs-on: ubuntu-latest + steps: + - name: Skip mypy + run: echo "Skipped mypy" + + pylint: + runs-on: ubuntu-latest + steps: + - name: Skip pylint + run: echo "Skipped pylint" diff --git a/.github/workflows/project.yml b/.github/workflows/project.yml new file mode 100644 index 00000000..62730b7f --- /dev/null +++ b/.github/workflows/project.yml @@ -0,0 +1,16 @@ +name: Track issues with Github project + +on: + issues: + types: + - opened + +jobs: + add-to-project: + name: Add new issues to project for triage + runs-on: ubuntu-latest + steps: + - uses: actions/add-to-project@v1.0.1 + with: + project-url: https://github.com/orgs/deepset-ai/projects/5 + github-token: ${{ secrets.GH_PROJECT_PAT }} diff --git a/.github/workflows/pypi_release.yml b/.github/workflows/pypi_release.yml new file mode 100644 index 00000000..a0ffefd1 --- /dev/null +++ b/.github/workflows/pypi_release.yml @@ -0,0 +1,42 @@ +name: Project release on PyPi + +on: + push: + tags: + - "v[0-9]+.[0-9]+.[0-9]+*" + # We must not release versions tagged with -rc0 suffix + - "!v[0-9]+.[0-9]+.[0-9]-rc0" + +env: + HATCH_VERSION: "1.9.3" + +jobs: + release-on-pypi: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Hatch + run: pip install hatch==${{ env.HATCH_VERSION }} + + - name: Build Haystack Experimental + run: hatch build + + - name: Publish on PyPi + env: + HATCH_INDEX_USER: __token__ + HATCH_INDEX_AUTH: ${{ secrets.HAYSTACK_AI_PYPI_TOKEN }} + run: hatch publish -y + + - name: Notify Slack + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + VERSION: ${{ github.ref_name }} + if: always() + uses: act10ns/slack@v2 + with: + status: ${{ job.status }} + channel: "#haystack-notifications" + config: .github/config/pypi-release-slack-notification.yml diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 00000000..4ac6c92f --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,15 @@ +name: 'Stalebot' +on: + schedule: + - cron: '30 1 * * *' + +jobs: + makestale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v9 + with: + any-of-labels: 'proposal,community-triage' + stale-pr-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 10 days.' + days-before-stale: 30 + days-before-close: 10 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..b4f4ab12 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,375 @@ +# If you change this name also do it in tests_skipper.yml and ci_metrics.yml +name: Tests + +on: + schedule: + - cron: "0 0 * * *" + workflow_dispatch: # Activate this workflow manually + push: + branches: + - main + # release branches have the form v1.9.x + - "v[0-9].*[0-9].x" + pull_request: + types: + - opened + - reopened + - synchronize + - ready_for_review + paths: + # Keep the list in sync with the paths defined in the `tests_skipper.yml` workflow + - "haystack-experimental/**/*.py" + - "test/**/*.py" + - "pyproject.toml" + +env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CORE_AZURE_CS_ENDPOINT: ${{ secrets.CORE_AZURE_CS_ENDPOINT }} + CORE_AZURE_CS_API_KEY: ${{ secrets.CORE_AZURE_CS_API_KEY }} + AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} + AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + HF_API_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }} + PYTHON_VERSION: "3.8" + HATCH_VERSION: "1.9.3" + +jobs: + black: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install Hatch + run: pip install hatch==${{ env.HATCH_VERSION }} + + - name: Check status + run: hatch run default:format-check + + - name: Calculate alert data + id: calculator + shell: bash + if: (success() || failure()) && github.ref_name == 'main' + run: | + if [ "${{ job.status }}" = "success" ]; then + echo "alert_type=success" >> "$GITHUB_OUTPUT"; + else + echo "alert_type=error" >> "$GITHUB_OUTPUT"; + fi + + - name: Send event to Datadog + if: (success() || failure()) && github.ref_name == 'main' + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + api-url: https://api.datadoghq.eu + events: | + - title: "${{ github.workflow }} workflow" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "${{ steps.calculator.outputs.alert_type }}" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + install-dependencies: + name: Install and cache ${{ matrix.os }} dependencies + needs: black + strategy: + matrix: + os: [ubuntu-latest, macos-12, windows-latest] + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install Hatch + run: pip install hatch==${{ env.HATCH_VERSION }} + + - name: Install dependencies + # To actually install and sync the dependencies + run: hatch run test:pip list + + - uses: actions/cache@v4 + with: + path: ${{ env.pythonLocation }} + key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }} + + unit-tests: + name: Unit / ${{ matrix.os }} + needs: install-dependencies + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + - macos-12 + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Restore Python dependencies + uses: actions/cache/restore@v4 + with: + path: ${{ env.pythonLocation }} + key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }} + + - name: Run + run: hatch run test:unit + + - name: Coveralls + # We upload only coverage for ubuntu as handling both os + # complicates the workflow too much for little to no gain + if: matrix.os == 'ubuntu-latest' + uses: coverallsapp/github-action@v2 + with: + path-to-lcov: coverage.xml + + - name: Calculate alert data + id: calculator + shell: bash + if: (success() || failure()) && github.ref_name == 'main' + run: | + if [ "${{ job.status }}" = "success" ]; then + echo "alert_type=success" >> "$GITHUB_OUTPUT"; + else + echo "alert_type=error" >> "$GITHUB_OUTPUT"; + fi + + - name: Send event to Datadog + if: (success() || failure()) && github.ref_name == 'main' + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + api-url: https://api.datadoghq.eu + events: | + - title: "${{ github.workflow }} workflow" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "${{ steps.calculator.outputs.alert_type }}" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + - name: Nightly - run unit tests with Haystack main branch + if: github.event_name == 'schedule' + id: nightly-haystack-main + run: | + hatch run pip install git+https://github.com/deepset-ai/haystack.git + hatch run test:unit + + - name: Send event to Datadog for nightly failures + if: failure() && github.event_name == 'schedule' + uses: ./.github/actions/send_failure + with: + title: | + core-integrations failure: + ${{ (steps.tests.conclusion == 'nightly-haystack-main') && 'nightly-haystack-main' || 'tests' }} + - ${{ github.workflow }} + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + + integration-tests-linux: + name: Integration / ubuntu-latest + needs: unit-tests + runs-on: ubuntu-latest + services: + tika: + image: apache/tika:2.9.0.0 + ports: + - 9998:9998 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install dependencies + run: | + sudo apt update + sudo apt install ffmpeg # for local Whisper tests + + - name: Restore Python dependencies + uses: actions/cache/restore@v4 + with: + path: ${{ env.pythonLocation }} + key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }} + + - name: Run + run: hatch run test:integration + + - name: Calculate alert data + id: calculator + shell: bash + if: (success() || failure()) && github.ref_name == 'main' + run: | + if [ "${{ job.status }}" = "success" ]; then + echo "alert_type=success" >> "$GITHUB_OUTPUT"; + else + echo "alert_type=error" >> "$GITHUB_OUTPUT"; + fi + + - name: Send event to Datadog + if: (success() || failure()) && github.ref_name == 'main' + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + api-url: https://api.datadoghq.eu + events: | + - title: "${{ github.workflow }} workflow" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "${{ steps.calculator.outputs.alert_type }}" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + integration-tests-macos: + name: Integration / macos-12 + needs: unit-tests + runs-on: macos-12 + env: + HAYSTACK_MPS_ENABLED: false + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install dependencies + run: | + brew install ffmpeg # for local Whisper tests + + - name: Restore Python dependencies + uses: actions/cache/restore@v4 + with: + path: ${{ env.pythonLocation }} + key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }} + + - name: Run + run: hatch run test:integration-mac + + - name: Calculate alert data + id: calculator + shell: bash + if: (success() || failure()) && github.ref_name == 'main' + run: | + if [ "${{ job.status }}" = "success" ]; then + echo "alert_type=success" >> "$GITHUB_OUTPUT"; + else + echo "alert_type=error" >> "$GITHUB_OUTPUT"; + fi + + - name: Send event to Datadog + if: (success() || failure()) && github.ref_name == 'main' + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + api-url: https://api.datadoghq.eu + events: | + - title: "${{ github.workflow }} workflow" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "${{ steps.calculator.outputs.alert_type }}" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + integration-tests-windows: + name: Integration / windows-latest + needs: unit-tests + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Restore Python dependencies + uses: actions/cache/restore@v4 + with: + path: ${{ env.pythonLocation }} + key: pip-${{ runner.os }}-${{ github.run_id }}-${{ hashFiles('pyproject.toml') }} + + - name: Run + run: hatch run test:integration-windows + + - name: Calculate alert data + id: calculator + shell: bash + if: (success() || failure()) && github.ref_name == 'main' + run: | + if [ "${{ job.status }}" = "success" ]; then + echo "alert_type=success" >> "$GITHUB_OUTPUT"; + else + echo "alert_type=error" >> "$GITHUB_OUTPUT"; + fi + + - name: Send event to Datadog + if: (success() || failure()) && github.ref_name == 'main' + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + api-url: https://api.datadoghq.eu + events: | + - title: "${{ github.workflow }} workflow" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "${{ steps.calculator.outputs.alert_type }}" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + + trigger-catch-all: + name: Tests completed + # This job will be executed only after all the other tests + # are successful. + # This way we'll be able to mark only this test as required + # and skip it accordingly. + needs: + - integration-tests-linux + - integration-tests-macos + - integration-tests-windows + uses: ./.github/workflows/tests_skipper_workflow.yml + with: + tests_were_skipped: false diff --git a/.github/workflows/tests_skipper_trigger.yml b/.github/workflows/tests_skipper_trigger.yml new file mode 100644 index 00000000..f492d9e9 --- /dev/null +++ b/.github/workflows/tests_skipper_trigger.yml @@ -0,0 +1,46 @@ +# If you change this name also do it in tests.yml and ci_metrics.yml +name: Tests + +on: + pull_request: + types: + - opened + - reopened + - synchronize + - ready_for_review + paths-ignore: + # we skip the tests unless the code changes. The problem is that GitHub will run the check anyway if any other + # file outside the code changed (e.g. the release notes). Hence, we need a second filter down below. + # keep the list in sync with the paths defined in the `tests.yml` workflow + - "haystack-experimental/**/*.py" + - "test/**/*.py" + +jobs: + check_if_changed: + name: Check if changed + runs-on: ubuntu-latest + permissions: + pull-requests: read + outputs: + code_changes: ${{ steps.changes.outputs.code_changes }} + steps: + - uses: actions/checkout@v4 + - name: Check for changed code + id: changes + uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 + with: + # keep the list in sync with the paths defined in the `tests.yml` workflow + filters: | + code_changes: + - haystack-experimental/**/*.py + - test/**/*.py + - "pyproject.toml" + + trigger-catch-all: + name: Tests completed + # Don't run this check if the PR contains both code and non-code changes (e.g. release notes) + needs: check_if_changed + if: needs.check_if_changed.outputs.code_changes == 'false' + uses: ./.github/workflows/tests_skipper_workflow.yml + with: + tests_were_skipped: true diff --git a/.github/workflows/tests_skipper_workflow.yml b/.github/workflows/tests_skipper_workflow.yml new file mode 100644 index 00000000..4554c827 --- /dev/null +++ b/.github/workflows/tests_skipper_workflow.yml @@ -0,0 +1,24 @@ +# If you change this name also do it in tests.yml and ci_metrics.yml +# We use a separate workflow to skip the tests if the PR contains both code and non-code changes (e.g. release notes). +# Skipping the job unfortunately doesn't work because GitHub will treat these jobs as successful even if they are +# skipped. Hence, we need to revert to a separate workflow. +name: Tests +on: + workflow_call: + inputs: + tests_were_skipped: + type: boolean + required: true + +jobs: + catch-all: + # Don't run this check if the PR contains both code and non-code changes (e.g. release notes) + name: Mark tests as completed + runs-on: ubuntu-latest + steps: + - name: Skip tests + if: ${{ github.event.inputs.tests_were_skipped }} + run: echo "Skipped!" + - name: Tests completed successfully + if: ${{ !github.event.inputs.tests_were_skipped }} + run: echo "Tests completed!" diff --git a/.github/workflows/workflows_linting.yml b/.github/workflows/workflows_linting.yml new file mode 100644 index 00000000..6fe24e15 --- /dev/null +++ b/.github/workflows/workflows_linting.yml @@ -0,0 +1,19 @@ +name: Github workflows linter + +on: + pull_request: + paths: + - ".github/workflows" + +jobs: + lint-workflows: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install actionlint + run: go install github.com/rhysd/actionlint/cmd/actionlint@latest + + - name: Run actionlint + run: actionlint