Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(rfc) infra: enable running subset of extended tests #26594

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/scripts/check_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ def dependents_graph() -> dict:

# load extended deps from extended_testing_deps.txt
package_path = Path(path).parent
extended_requirement_path = package_path / "extended_testing_deps.txt"
extended_requirement_path = (
package_path / "extended_dependencies" / "extended_testing_deps.txt"
)
if extended_requirement_path.exists():
with open(extended_requirement_path, "r") as f:
extended_deps = f.read().splitlines()
Expand Down
73 changes: 73 additions & 0 deletions .github/workflows/_extended_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: Extended tests

on:
workflow_dispatch:
inputs:
working-directory:
required: true
type: string
default: "libs/community"
python-version:
required: true
type: string
description: "Python version to use"
default: "3.11"
extended-deps-file:
required: true
type: choice
description: "File to install extended dependencies from"
options:
- extended_testing_deps.txt
- pdf_loader_deps.txt
- other_deps.txt

env:
POETRY_VERSION: "1.7.1"

jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
name: Python ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ inputs.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ inputs.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: core

- name: Install extended dependencies
shell: bash
run: |
poetry install --with test,test_integration
poetry run pip install uv
poetry run uv pip install -r extended_dependencies/${{ inputs.extended-deps-file }}

- name: Install deps outside pyproject
if: ${{ startsWith(inputs.working-directory, 'libs/community/') }}
shell: bash
run: poetry run pip install "boto3<2" "google-cloud-aiplatform<2"

- name: Run extended tests
shell: bash
run: |
make test
make integration_tests

- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu

STATUS="$(git status)"
echo "$STATUS"

# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'
65 changes: 20 additions & 45 deletions .github/workflows/_integration_test.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Ignore changes to this file. Hijacking just to allow
# testing of workflow dispatch on new workflow off of branch.

name: Integration tests

on:
Expand All @@ -6,10 +9,20 @@ on:
working-directory:
required: true
type: string
default: "libs/community"
python-version:
required: true
type: string
description: "Python version to use"
default: "3.11"
extended-deps-file:
required: true
type: choice
description: "File to install extended dependencies from"
options:
- extended_testing_deps.txt
- pdf_loader_deps.txt
- other_deps.txt

env:
POETRY_VERSION: "1.7.1"
Expand All @@ -32,60 +45,22 @@ jobs:
working-directory: ${{ inputs.working-directory }}
cache-key: core

- name: Install dependencies
- name: Install extended dependencies
shell: bash
run: poetry install --with test,test_integration
run: |
poetry install --with test,test_integration
poetry run pip install uv
poetry run uv pip install -r extended_dependencies/${{ inputs.extended-deps-file }}

- name: Install deps outside pyproject
if: ${{ startsWith(inputs.working-directory, 'libs/community/') }}
shell: bash
run: poetry run pip install "boto3<2" "google-cloud-aiplatform<2"

- name: 'Authenticate to Google Cloud'
id: 'auth'
uses: google-github-actions/auth@v2
with:
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'

- name: Run integration tests
- name: Run extended tests
shell: bash
env:
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
ES_URL: ${{ secrets.ES_URL }}
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
run: |
make test
make integration_tests

- name: Ensure the tests did not create any additional files
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check_diffs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ jobs:
echo "Running extended tests, installing dependencies with poetry..."
poetry install --with test
poetry run pip install uv
poetry run uv pip install -r extended_testing_deps.txt
poetry run uv pip install -r extended_dependencies/extended_testing_deps.txt

- name: Run extended tests
run: make extended_tests
Expand Down
2 changes: 1 addition & 1 deletion libs/community/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ test tests:
poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)

integration_tests:
poetry run pytest $(TEST_FILE)
poetry run pytest -m runs $(TEST_FILE)

test_watch:
poetry run ptw --disable-socket --allow-unix-socket --snapshot-update --now . -- -vv tests/unit_tests
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-r pdf_loader_deps.txt
-r other_deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,13 @@ openapi-pydantic>=0.3.2,<0.4
oracle-ads>=2.9.1,<3
oracledb>=2.2.0,<3
pandas>=2.0.1,<3
pdfminer-six>=20221105,<20240706
pgvector>=0.1.6,<0.2
praw>=7.7.1,<8
premai>=0.3.25,<0.4
psychicapi>=0.8.0,<0.9
pydantic>=2.7.4,<3
py-trello>=0.19.0,<0.20
pyjwt>=2.8.0,<3
pymupdf>=1.22.3,<2
pypdf>=3.4.0,<5
pypdfium2>=4.10.0,<5
pyspark>=3.4.0,<4
rank-bm25>=0.2.2,<0.3
rapidfuzz>=3.1.1,<4
Expand Down
4 changes: 4 additions & 0 deletions libs/community/extended_dependencies/pdf_loader_deps.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pdfminer-six>=20221105,<20240706
pymupdf>=1.22.3,<2
pypdf>=3.4.0,<5
pypdfium2>=4.10.0,<5
1 change: 1 addition & 0 deletions libs/community/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
markers = [
"requires: mark tests as requiring a specific library",
"scheduled: mark tests to run in scheduled testing",
"runs: mark tests to run in CI",
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"
Expand Down
87 changes: 86 additions & 1 deletion libs/community/tests/integration_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Getting the absolute path of the current file's directory
from importlib import util
import os
from typing import Dict, Sequence

import pytest
from pytest import Config, Function, Parser

# Getting the absolute path of the current file's directory

Check failure on line 8 in libs/community/tests/integration_tests/conftest.py

View workflow job for this annotation

GitHub Actions / cd libs/community / make lint #3.12

Ruff (I001)

tests/integration_tests/conftest.py:1:1: I001 Import block is un-sorted or un-formatted

Check failure on line 8 in libs/community/tests/integration_tests/conftest.py

View workflow job for this annotation

GitHub Actions / cd libs/community / make lint #3.9

Ruff (I001)

tests/integration_tests/conftest.py:1:1: I001 Import block is un-sorted or un-formatted
ABS_PATH = os.path.dirname(os.path.abspath(__file__))

# Getting the absolute path of the project's root directory
Expand All @@ -17,3 +22,83 @@


_load_env()

def pytest_addoption(parser: Parser) -> None:
"""Add custom command line options to pytest."""
parser.addoption(
"--only-extended",
action="store_true",
help="Only run extended tests. Does not allow skipping any extended tests.",
)
parser.addoption(
"--only-core",
action="store_true",
help="Only run core tests. Never runs any extended tests.",
)


def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) -> None:
"""Add implementations for handling custom markers.

At the moment, this adds support for a custom `requires` marker.

The `requires` marker is used to denote tests that require one or more packages
to be installed to run. If the package is not installed, the test is skipped.

The `requires` marker syntax is:

.. code-block:: python

@pytest.mark.requires("package1", "package2")
def test_something():
...
"""
# Mapping from the name of a package to whether it is installed or not.
# Used to avoid repeated calls to `util.find_spec`
required_pkgs_info: Dict[str, bool] = {}

only_extended = config.getoption("--only-extended") or False
only_core = config.getoption("--only-core") or False

if only_extended and only_core:
raise ValueError("Cannot specify both `--only-extended` and `--only-core`.")

for item in items:
requires_marker = item.get_closest_marker("requires")
if requires_marker is not None:
if only_core:
item.add_marker(pytest.mark.skip(reason="Skipping not a core test."))
continue

# Iterate through the list of required packages
required_pkgs = requires_marker.args
for pkg in required_pkgs:
# If we haven't yet checked whether the pkg is installed
# let's check it and store the result.
if pkg not in required_pkgs_info:
try:
installed = util.find_spec(pkg) is not None
except Exception:
installed = False
required_pkgs_info[pkg] = installed

if not required_pkgs_info[pkg]:
if only_extended:
pytest.fail(
f"Package `{pkg}` is not installed but is required for "
f"extended tests. Please install the given package and "
f"try again.",
)

else:
# If the package is not installed, we immediately break
# and mark the test as skipped.
item.add_marker(
pytest.mark.skip(reason=f"Requires pkg: `{pkg}`")
)
break
else:
if only_extended:
item.add_marker(
pytest.mark.skip(reason="Skipping not an extended test.")
)
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ def test_pdfminer_pdf_as_html_loader() -> None:
assert len(docs) == 1


@pytest.mark.runs
@pytest.mark.requires("pypdf")
def test_pypdf_loader() -> None:
"""Test PyPDFLoader."""
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
Expand All @@ -101,6 +103,8 @@ def test_pypdf_loader() -> None:
assert len(docs) == 16


@pytest.mark.runs
@pytest.mark.requires("pypdf")
def test_pypdf_loader_with_layout() -> None:
"""Test PyPDFLoader with layout mode."""
file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf"
Expand Down
Loading