Skip to content

Commit

Permalink
more standardize (#158)
Browse files Browse the repository at this point in the history
  • Loading branch information
efriis authored Apr 5, 2024
1 parent 8c6a8da commit dc1d937
Show file tree
Hide file tree
Showing 36 changed files with 524 additions and 1,743 deletions.
48 changes: 48 additions & 0 deletions .github/scripts/check_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import json
import sys
from typing import Dict

LIB_DIRS = ["libs/weaviate"]

if __name__ == "__main__":
files = sys.argv[1:]

dirs_to_run: Dict[str, set] = {
"lint": set(),
"test": set(),
}

if len(files) == 300:
# max diff length is 300 files - there are likely files missing
raise ValueError("Max diff reached. Please manually run CI on changed libs.")

for file in files:
if any(
file.startswith(dir_)
for dir_ in (
".github/workflows",
".github/tools",
".github/actions",
".github/scripts/check_diff.py",
)
):
# add all LANGCHAIN_DIRS for infra changes
dirs_to_run["test"].update(LIB_DIRS)

if any(file.startswith(dir_) for dir_ in LIB_DIRS):
for dir_ in LIB_DIRS:
if file.startswith(dir_):
dirs_to_run["test"].add(dir_)
elif file.startswith("libs/"):
raise ValueError(
f"Unknown lib: {file}. check_diff.py likely needs "
"an update for this new library!"
)

outputs = {
"dirs-to-lint": list(dirs_to_run["lint"] | dirs_to_run["test"]),
"dirs-to-test": list(dirs_to_run["test"]),
}
for key, value in outputs.items():
json_output = json.dumps(value)
print(f"{key}={json_output}") # noqa: T201
65 changes: 65 additions & 0 deletions .github/scripts/get_min_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import sys

import tomllib
from packaging.version import parse as parse_version
import re

MIN_VERSION_LIBS = ["langchain-core"]


def get_min_version(version: str) -> str:
# case ^x.x.x
_match = re.match(r"^\^(\d+(?:\.\d+){0,2})$", version)
if _match:
return _match.group(1)

# case >=x.x.x,<y.y.y
_match = re.match(r"^>=(\d+(?:\.\d+){0,2}),<(\d+(?:\.\d+){0,2})$", version)
if _match:
_min = _match.group(1)
_max = _match.group(2)
assert parse_version(_min) < parse_version(_max)
return _min

# case x.x.x
_match = re.match(r"^(\d+(?:\.\d+){0,2})$", version)
if _match:
return _match.group(1)

raise ValueError(f"Unrecognized version format: {version}")


def get_min_version_from_toml(toml_path: str):
# Parse the TOML file
with open(toml_path, "rb") as file:
toml_data = tomllib.load(file)

# Get the dependencies from tool.poetry.dependencies
dependencies = toml_data["tool"]["poetry"]["dependencies"]

# Initialize a dictionary to store the minimum versions
min_versions = {}

# Iterate over the libs in MIN_VERSION_LIBS
for lib in MIN_VERSION_LIBS:
# Check if the lib is present in the dependencies
if lib in dependencies:
# Get the version string
version_string = dependencies[lib]

# Use parse_version to get the minimum supported version from version_string
min_version = get_min_version(version_string)

# Store the minimum version in the min_versions dictionary
min_versions[lib] = min_version

return min_versions


# Get the TOML file path from the command line argument
toml_file = sys.argv[1]

# Call the function to get the minimum versions
min_versions = get_min_version_from_toml(toml_file)

print(" ".join([f"{lib}=={version}" for lib, version in min_versions.items()]))
39 changes: 39 additions & 0 deletions .github/workflows/_codespell.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
---
name: make spell_check

on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"

permissions:
contents: read

jobs:
codespell:
name: (Check for spelling errors)
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install Dependencies
run: |
pip install toml
- name: Extract Ignore Words List
working-directory: ${{ inputs.working-directory }}
run: |
# Use a Python script to extract the ignore words list from pyproject.toml
python ../../.github/workflows/extract_ignored_words_list.py
id: extract_ignore_words

- name: Codespell
uses: codespell-project/actions-codespell@v2
with:
skip: guide_imports.json
ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }}
57 changes: 57 additions & 0 deletions .github/workflows/_compile_integration_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: compile-integration-test

on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"

env:
POETRY_VERSION: "1.7.1"

jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: "poetry run pytest -m compile tests/integration_tests #${{ matrix.python-version }}"
steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: compile-integration

- name: Install integration dependencies
shell: bash
run: poetry install --with=test_integration,test

- name: Check integration tests compile
shell: bash
run: poetry run pytest -m compile tests/integration_tests

- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'
102 changes: 102 additions & 0 deletions .github/workflows/_lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: lint

on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"

env:
POETRY_VERSION: "1.7.1"
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}

# This env var allows us to get inline annotations when ruff has complaints.
RUFF_OUTPUT_FORMAT: github

jobs:
build:
name: "make lint #${{ matrix.python-version }}"
runs-on: ubuntu-latest
strategy:
matrix:
# Only lint on the min and max supported Python versions.
# It's extremely unlikely that there's a lint issue on any version in between
# that doesn't show up on the min or max versions.
#
# GitHub rate-limits how many jobs can be running at any one time.
# Starting new jobs is also relatively slow,
# so linting on fewer versions makes CI faster.
python-version:
- "3.8"
- "3.11"
steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: lint-with-extras

- name: Check Poetry File
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
poetry check
- name: Check lock file
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
poetry lock --check
- name: Install dependencies
# Also installs dev/lint/test/typing dependencies, to ensure we have
# type hints for as many of our libraries as possible.
# This helps catch errors that require dependencies to be spotted, for example:
# https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
#
# If you change this configuration, make sure to change the `cache-key`
# in the `poetry_setup` action above to stop using the old cache.
# It doesn't matter how you change it, any change will cause a cache-bust.
working-directory: ${{ inputs.working-directory }}
run: |
poetry install --with lint,typing
- name: Get .mypy_cache to speed up mypy
uses: actions/cache@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
with:
path: |
${{ env.WORKDIR }}/.mypy_cache
key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }}


- name: Analysing the code with our lint
working-directory: ${{ inputs.working-directory }}
run: |
make lint_package
- name: Install unit+integration test dependencies
working-directory: ${{ inputs.working-directory }}
run: |
poetry install --with test,test_integration
- name: Get .mypy_cache_test to speed up mypy
uses: actions/cache@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
with:
path: |
${{ env.WORKDIR }}/.mypy_cache_test
key: mypy-test-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', inputs.working-directory)) }}

- name: Analysing the code with our lint
working-directory: ${{ inputs.working-directory }}
run: |
make lint_tests
58 changes: 58 additions & 0 deletions .github/workflows/_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: test

on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"

env:
POETRY_VERSION: "1.7.1"

jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: "make test #${{ matrix.python-version }}"
steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: core

- name: Install dependencies
shell: bash
run: poetry install --with test

- name: Run core tests
shell: bash
run: |
make test
- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'
Loading

0 comments on commit dc1d937

Please sign in to comment.