From f7f8f95500f3b88788c47a862fb04d5d455ee9eb Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Thu, 26 Sep 2024 22:08:49 -0700 Subject: [PATCH] Add workflows --- .github/actions/poetry_setup/action.yml | 88 +++++++++++++++++++++++++ .github/workflows/py-baseline.yml | 37 +++++++++++ .github/workflows/py-bench.yml | 8 +-- .github/workflows/python_test.yml | 16 ++--- python/Makefile | 15 ++++- python/bench/__main__.py | 49 ++++++-------- python/bench/create_run_tree.py | 9 +++ python/bench/dumps_json.py | 17 ++++- 8 files changed, 190 insertions(+), 49 deletions(-) create mode 100644 .github/actions/poetry_setup/action.yml create mode 100644 .github/workflows/py-baseline.yml create mode 100644 python/bench/create_run_tree.py diff --git a/.github/actions/poetry_setup/action.yml b/.github/actions/poetry_setup/action.yml new file mode 100644 index 000000000..df04e1e71 --- /dev/null +++ b/.github/actions/poetry_setup/action.yml @@ -0,0 +1,88 @@ +# An action for setting up poetry install with caching. +# Using a custom action since the default action does not +# take poetry install groups into account. +# Action code from: +# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236 +name: poetry-install-with-caching +description: Poetry install with support for caching of dependency groups. + +inputs: + python-version: + description: Python version, supporting MAJOR.MINOR only + required: true + + poetry-version: + description: Poetry version + required: true + + cache-key: + description: Cache key to use for manual handling of caching + required: true + +runs: + using: composite + steps: + - uses: actions/setup-python@v5 + name: Setup python ${{ inputs.python-version }} + id: setup-python + with: + python-version: ${{ inputs.python-version }} + + - uses: actions/cache@v3 + id: cache-bin-poetry + name: Cache Poetry binary - Python ${{ inputs.python-version }} + env: + SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1" + with: + path: | + /opt/pipx/venvs/poetry + # This step caches the poetry installation, so make sure it's keyed on the poetry version as well. + key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }} + + - name: Refresh shell hashtable and fixup softlinks + if: steps.cache-bin-poetry.outputs.cache-hit == 'true' + shell: bash + env: + POETRY_VERSION: ${{ inputs.poetry-version }} + PYTHON_VERSION: ${{ inputs.python-version }} + run: | + set -eux + + # Refresh the shell hashtable, to ensure correct `which` output. + hash -r + + # `actions/cache@v3` doesn't always seem able to correctly unpack softlinks. + # Delete and recreate the softlinks pipx expects to have. + rm /opt/pipx/venvs/poetry/bin/python + cd /opt/pipx/venvs/poetry/bin + ln -s "$(which "python$PYTHON_VERSION")" python + chmod +x python + cd /opt/pipx_bin/ + ln -s /opt/pipx/venvs/poetry/bin/poetry poetry + chmod +x poetry + + # Ensure everything got set up correctly. + /opt/pipx/venvs/poetry/bin/python --version + /opt/pipx_bin/poetry --version + + - name: Install poetry + if: steps.cache-bin-poetry.outputs.cache-hit != 'true' + shell: bash + env: + POETRY_VERSION: ${{ inputs.poetry-version }} + PYTHON_VERSION: ${{ inputs.python-version }} + # Install poetry using the python version installed by setup-python step. + run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose + + - name: Restore pip and poetry cached dependencies + uses: actions/cache@v3 + env: + SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4" + with: + path: | + ~/.cache/pip + ~/.cache/pypoetry/virtualenvs + ~/.cache/pypoetry/cache + ~/.cache/pypoetry/artifacts + ./.venv + key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles('./poetry.lock') }} diff --git a/.github/workflows/py-baseline.yml b/.github/workflows/py-baseline.yml new file mode 100644 index 000000000..4b1998847 --- /dev/null +++ b/.github/workflows/py-baseline.yml @@ -0,0 +1,37 @@ +name: py-baseline + +on: + workflow_dispatch: + push: + branches: [main] + paths: + - "python/langsmith/**" + +env: + POETRY_VERSION: "1.7.1" + +jobs: + benchmark: + runs-on: ubuntu-latest + defaults: + run: + working-directory: python + steps: + - uses: actions/checkout@v4 + - run: SHA=$(git rev-parse HEAD) && echo "SHA=$SHA" >> $GITHUB_ENV + - name: Set up Python 3.11 + Poetry ${{ env.POETRY_VERSION }} + uses: "./.github/actions/poetry_setup" + with: + python-version: "3.11" + poetry-version: ${{ env.POETRY_VERSION }} + cache-key: py-benchi + - name: Install dependencies + run: poetry install --with dev + - name: Run benchmarks + run: OUTPUT=out/benchmark-baseline.json make -s benchmark + - name: Save outputs + uses: actions/cache/save@v4 + with: + key: ${{ runner.os }}-benchmark-baseline-${{ env.SHA }} + path: | + python/out/benchmark-baseline.json diff --git a/.github/workflows/py-bench.yml b/.github/workflows/py-bench.yml index 38d80aa73..20ce118d1 100644 --- a/.github/workflows/py-bench.yml +++ b/.github/workflows/py-bench.yml @@ -1,9 +1,9 @@ -name: bench +name: py-bench on: pull_request: paths: - - "libs/**" + - "python/langsmith/**" env: POETRY_VERSION: "1.7.1" @@ -26,7 +26,7 @@ jobs: with: python-version: "3.11" poetry-version: ${{ env.POETRY_VERSION }} - cache-key: bench + cache-key: py-bench - name: Install dependencies run: poetry install --with dev - name: Download baseline @@ -37,7 +37,7 @@ jobs: ${{ runner.os }}-benchmark-baseline- fail-on-cache-miss: true path: | - libs/langgraph/out/benchmark-baseline.json + python/out/benchmark-baseline.json - name: Run benchmarks id: benchmark run: | diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index 98020f18e..d207b112b 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -29,18 +29,12 @@ jobs: working-directory: python steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v5 - name: Setup python ${{ matrix.python-version }} - id: setup-python + - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }} + uses: "./.github/actions/poetry_setup" with: python-version: ${{ matrix.python-version }} - cache: "pip" - - name: Install poetry - shell: bash - env: - PYTHON_VERSION: ${{ matrix.python-version }} - # Install poetry using the python version installed by setup-python step. - run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose + poetry-version: ${{ env.POETRY_VERSION }} + cache-key: build-and-test - name: Install dependencies run: | poetry install --with dev,lint @@ -51,4 +45,4 @@ jobs: run: make lint - name: Run Unit tests ${{ matrix.python-version }} run: make tests - shell: bash \ No newline at end of file + shell: bash diff --git a/python/Makefile b/python/Makefile index e40f8944b..4ef65ad35 100644 --- a/python/Makefile +++ b/python/Makefile @@ -1,4 +1,17 @@ -.PHONY: tests lint format build publish doctest integration_tests integration_tests_fast evals +.PHONY: tests lint format build publish doctest integration_tests integration_tests_fast evals benchmark benchmark-fast + + +OUTPUT ?= out/benchmark.json + +benchmark: + mkdir -p out + rm -f $(OUTPUT) + poetry run python -m bench -o $(OUTPUT) --rigorous + +benchmark-fast: + mkdir -p out + rm -f $(OUTPUT) + poetry run python -m bench -o $(OUTPUT) --fast tests: PYTHONDEVMODE=1 PYTHONASYNCIODEBUG=1 poetry run python -m pytest --disable-socket --allow-unix-socket -n auto --durations=10 tests/unit_tests diff --git a/python/bench/__main__.py b/python/bench/__main__.py index e924030ce..1efc071ef 100644 --- a/python/bench/__main__.py +++ b/python/bench/__main__.py @@ -1,11 +1,10 @@ -import random -from uuid import uuid4 - from pyperf._runner import Runner + +from bench.create_run_tree import create_run_trees from bench.dumps_json import ( - create_nested_instance, DeeplyNestedModel, DeeplyNestedModelV1, + create_nested_instance, ) from langsmith.client import _dumps_json @@ -17,46 +16,36 @@ def __init__(self): benchmarks = ( ( - "dumps_dataclass_nested_200x10", - lambda x: _dumps_json({"input": x}), - create_nested_instance(200, 100), - ), - ( - "dumps_pydantic_nested_200x10", - lambda x: _dumps_json({"input": x}), - create_nested_instance(200, 100, branch_constructor=DeeplyNestedModel), + "create_20_000_run_trees", + create_run_trees, + 20_000, ), ( - "dumps_pydanticv1_nested_200x10", + "dumps_class_nested_py_branch_and_leaf_200x250", lambda x: _dumps_json({"input": x}), - create_nested_instance(200, 100, branch_constructor=DeeplyNestedModelV1), + create_nested_instance( + 200, 250, branch_constructor=MyClass, leaf_constructor=MyClass + ), ), - # Add random python class at the leaf ( - "dumps_dataclass_nested_py_leaf_200x10", + "dumps_class_nested_py_leaf_200x250", lambda x: _dumps_json({"input": x}), - create_nested_instance(200, 100, leaf_constructor=MyClass), + create_nested_instance(200, 250, leaf_constructor=MyClass), ), ( - "dumps_pydantic_nested_py_leaf_200x10", + "dumps_dataclass_nested_200x250", lambda x: _dumps_json({"input": x}), - create_nested_instance( - 200, 100, branch_constructor=DeeplyNestedModel, leaf_constructor=MyClass - ), + create_nested_instance(200, 250), ), ( - "dumps_pydanticv1_nested_py_leaf_200x10", + "dumps_pydantic_nested_200x250", lambda x: _dumps_json({"input": x}), - create_nested_instance( - 200, 100, branch_constructor=DeeplyNestedModelV1, leaf_constructor=MyClass - ), + create_nested_instance(200, 250, branch_constructor=DeeplyNestedModel), ), ( - "dumps_class_nested_py_leaf_200x10", + "dumps_pydanticv1_nested_200x250", lambda x: _dumps_json({"input": x}), - create_nested_instance( - 200, 100, branch_constructor=MyClass, leaf_constructor=MyClass - ), + create_nested_instance(200, 250, branch_constructor=DeeplyNestedModelV1), ), ) @@ -64,4 +53,4 @@ def __init__(self): r = Runner() for name, fn, input_ in benchmarks: - r.bench_func(name, fn, input) + r.bench_func(name, fn, input_) diff --git a/python/bench/create_run_tree.py b/python/bench/create_run_tree.py new file mode 100644 index 000000000..29cc84f44 --- /dev/null +++ b/python/bench/create_run_tree.py @@ -0,0 +1,9 @@ +from unittest.mock import patch + +from langsmith import RunTree + + +def create_run_trees(N: int): + with patch("langsmith.client.requests.Session", autospec=True): + for i in range(N): + RunTree(name=str(i)).post() diff --git a/python/bench/dumps_json.py b/python/bench/dumps_json.py index b015a8d04..afbe603a4 100644 --- a/python/bench/dumps_json.py +++ b/python/bench/dumps_json.py @@ -1,4 +1,7 @@ +import uuid from dataclasses import dataclass, field +from datetime import datetime +from decimal import Decimal from typing import Any, Callable, Dict, Optional from pydantic import BaseModel, Field @@ -7,7 +10,17 @@ def _default(): - return {"some_val": "😈"} + + return { + "some_val": "😈", + "uuid_val": uuid.uuid4(), + "datetime_val": datetime.now(), + "list_val": [1, 2, 3], + "decimal_val": Decimal("3.14"), + "set_val": {1, 2, 3}, + "tuple_val": (4, 5, 6), + "bytes_val": b"hello world", + } @dataclass @@ -18,12 +31,10 @@ class DeeplyNested: class DeeplyNestedModel(BaseModel): - vals: Dict[str, Any] = Field(default_factory=_default) class DeeplyNestedModelV1(BaseModelV1): - vals: Dict[str, Any] = FieldV1(default_factory=_default)