Merge branch 'main' into dev

integrate main
sotopia-lab · Jul 12, 2024 · 1ee1232 · 1ee1232
2 parents e753eb1 + 28f053a
commit 1ee1232
Show file tree

Hide file tree

Showing 51 changed files with 2,789 additions and 1,370 deletions.
diff --git a/.github/.codecov.yml b/.github/.codecov.yml
@@ -0,0 +1,23 @@
+codecov:
+  notify:
+    wait_for_ci: true
+
+coverage:
+  status:
+    patch:
+      default:
+        threshold: 100% # allow patch coverage to be lower than project coverage by any amount
+    project:
+      default:
+        threshold: 5% # allow project coverage to drop at most 5%
+
+comment:                  # this is a top-level key
+  layout: " diff, flags, files"
+  behavior: default
+  require_changes: false  # if true: only post the comment if coverage changes
+  require_base: false        # [true :: must have a base report to post]
+  require_head: true       # [true :: must have a head report to post]
+  hide_project_coverage: false # [true :: only show coverage on the git diff]
+
+github_checks:
+    annotations: false
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+
+version: 2
+updates:
+  - package-ecosystem: "pip" # See documentation for possible values
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "daily"
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -4,6 +4,7 @@
 #
 name: Deploy Next.js site to Pages
 
+
 on:
   # Runs on pushes targeting the default branch
   push:

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
@@ -1,5 +1,19 @@
 name: Mypy
-on: [push]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+on:
+  push:
+    branches:
+      - main
+      - release
+      - dev
+  pull_request:
+    branches:
+      - main
+      - release
 
 jobs:
   Static-Type-Checking:

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -1,5 +1,9 @@
 name: pre-commit
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
 on:
   pull_request:
   push:

diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -8,6 +8,10 @@
 
 name: Upload Python Package
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
 on:
   release:
     types: [published]

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,21 +1,43 @@
 name: Pytest
-on: [push]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+on:
+  push:
+    branches:
+      - main
+      - release
+      - dev
+  pull_request:
+    branches:
+      - main
+      - release
 
 jobs:
   Pytest:
-    runs-on: ubuntu-latest
     strategy:
       max-parallel: 5
+      matrix:
+        os: [ubuntu-latest, macos-13]
+
+    runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python 3.11
       uses: actions/setup-python@v4
       with:
         python-version: 3.11.2
+    - name: Set up Docker
+      if: runner.os == 'ubuntu-latest'
+      uses: docker-practice/actions-setup-docker@master
+      timeout-minutes: 12
+    - name: Install Poetry
+      uses: abatilo/actions-poetry@v2
     - name: Install dependencies
       run: |
-        curl -sSL https://install.python-poetry.org | python3
         poetry lock
         poetry install --with test -E chat
     - name: Test with pytest
@@ -24,4 +46,8 @@ jobs:
         REDIS_OM_URL: ${{ secrets.REDIS_OM_URL }}
         TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
       run: |
-        poetry run pytest
+        poetry run pytest --cov=. --cov-report=xml
+    - name: Upload coverage report to Codecov
+      uses: codecov/[email protected]
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -137,7 +137,7 @@ data/*
 deprecated/*
 
 *.csv
-
+*.jsonl
 #backup
 backup/*
 
@@ -161,3 +161,5 @@ backup/*
 node_modules/*
 docs/.next/*
 docs/node_modules/*
+
+redis-data/*
diff --git a/README.md b/README.md
@@ -2,23 +2,28 @@
   <img src="figs/title.png" style="width: 100%;" alt="sotopia"></img>
 </div>
 
-# Sotopia: an Open-ended Social Learning Environment
+<h1 align="center">Sotopia: an Open-ended Social Learning Environment</h1>
+
+<div align="center">
+
+[![pypi](https://img.shields.io/pypi/v/sotopia.svg)](https://pypi.python.org/pypi/sotopia)
+[![versions](https://img.shields.io/pypi/pyversions/sotopia.svg)](https://github.com/sotopia/sotopia)
+[![CI](https://img.shields.io/github/actions/workflow/status/sotopia-lab/sotopia/tests.yml?branch=main&logo=github&label=CI)](https://github.com/sotopia-lab/sotopia/actions?query=branch%3Amain)
+[![codecov](https://codecov.io/github/sotopia-lab/sotopia/graph/badge.svg?token=00LRQFX0QR)](https://codecov.io/github/sotopia-lab/sotopia)
+[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/14hJOfzpA37PRUzdlFgiqVzUGIhhngqnz?usp=sharing)
+
 [![Project Page](https://img.shields.io/badge/Project-Page-green.svg)](https://www.sotopia.world/projects/sotopia)
 [![Paper PDF](https://img.shields.io/badge/Paper-PDF-red.svg)](https://arxiv.org/abs/2310.11667)
-[![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg)](https://www.python.org/downloads/release/python-3109/)
-[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/)
-<a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
-[![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/)
-[![bear-ified](https://raw.githubusercontent.com/beartype/beartype-assets/main/badge/bear-ified.svg)](https://beartype.readthedocs.io)
-[![Github Action](https://github.com/XuhuiZhou/sotopia/actions/workflows/tests.yml/badge.svg?branch=main)]()
-[![Github Action](https://github.com/XuhuiZhou/sotopia/actions/workflows/pre-commit.yml/badge.svg?branch=main)]()
 [![Dataset](https://img.shields.io/badge/%F0%9F%A4%97-Sotopia%20Dataset-yellow)](https://huggingface.co/datasets/cmu-lti/sotopia)
 [![Demo](https://img.shields.io/badge/%F0%9F%A4%97-Sotopia%20Demo-orange)](https://huggingface.co/spaces/cmu-lti/sotopia-space/)
 
+
+
+</div>
+
 ## News
 
-* [05/2024] Sotopia will be presented at ICLR 2024 as a spotlight ⭐!
-* [05/2024] We release [a simple tutorial](https://colab.research.google.com/drive/14hJOfzpA37PRUzdlFgiqVzUGIhhngqnz?usp=sharing) for you to run sotopia end-to-end on google colab.
+* [05/2024] Sotopia was presented at ICLR 2024 as a spotlight ⭐!
 
 
 ## Introduction
@@ -37,6 +42,9 @@ Sotopia is an open-ended social learning environment that allows agents to inter
 }
 ```
 
+## Help
+See [documentation](https://docs.sotopia.world) for more details.
+
 
 ## Get started
 ### Use on Google Colab
@@ -74,7 +82,7 @@ For some experiments, TogetherAI key is required to run the code. Please set the
 conda env config vars set TOGETHER_API_KEY=your_key
 ```
 
-A redis-stack server is required to run the code. Please follow the [instruction](https://redis.io/docs/stack/get-started/install/docker/) to start a redis-stack server or use an existing server. You can also check [Q&A](/docs/all_the_issues.md) to initiate the redis server with the Sotopia data.
+A redis-stack server is required to run the code. Please follow the [instruction](https://redis.io/docs/stack/get-started/install/docker/) to start a redis-stack server or use an existing server. You can also check [Q&A](/docs/troubleshooting.md) to initiate the redis server with the Sotopia data.
 
 The `REDIS_OM_URL` need to be set before loading and saving agents:
 ```bash
@@ -148,7 +156,7 @@ To run a large batch of environments, you can change the `ENV_IDS` parameter in
 ## Getting access to your simulation
 After running experiments, you can go to the `examples/redis_stats.ipynb` notebook to check the existing episodes (Episode Log section), as well as calculate the performance.
 
-For the original Sotopia simulation in our paper's experiments, you can find how to get them in the [Q&A](/docs/all_the_issues.md) section in the `./docs` folder.
+For the original Sotopia simulation in our paper's experiments, you can find how to get them in the [Q&A](/docs/troubleshooting.md) section in the `./docs` folder.
 
 ## Adding new characters and environments
 You can use the following function with the `**kwargs` being the properties of the `AgentProfile` class. This is the same for the scenarios/environments.

diff --git a/docs/pages/_meta.json b/docs/pages/_meta.json
@@ -7,6 +7,14 @@
         "title": "Documentation",
         "type": "menu",
         "items": {
+            "agents": {
+                "title": "Agents",
+                "href": "/agents"
+            },
+            "environments": {
+                "title": "Environments",
+                "href": "/environments"
+            },
             "examples": {
                 "title": "Examples",
                 "href": "/examples"

diff --git a/docs/pages/agents.md b/docs/pages/agents.md
diff --git a/docs/pages/benchmark.md b/docs/pages/benchmark.md
@@ -0,0 +1,11 @@
+# Benchmark your model as a social agent in Sotopia
+
+```
+sotopia_benchmark --model=<your_model_name>
+```
+or
+
+```
+python sotopia/benchmark/cli.py --model=<your_model_name>
+```
+Currently this script would run over 100 simulations on the Sotopia Hard tasks. And the partner model is fixed to be `meta-llama/Llama-3-70b-chat-hf`
diff --git a/docs/pages/environments.md b/docs/pages/environments.md
diff --git a/docs/pages/index.mdx b/docs/pages/index.mdx
@@ -107,7 +107,7 @@ Redis stack is a required dependency for using Sotopia. There are two ways to se
       <AccordionItem value="item-1">
         <AccordionTrigger>Docker is my thing.</AccordionTrigger>
         <AccordionContent>
-        Please follow the [instruction](https://redis.io/docs/stack/get-started/install/docker/) to start a redis-stack server or use an existing server. You can also check [Q&A](/docs/all_the_issues.md) to initiate the redis server with the Sotopia data.
+        Please follow the [instruction](https://redis.io/docs/stack/get-started/install/docker/) to start a redis-stack server or use an existing server. You can also check [Q&A](/docs/troubleshooting.md) to initiate the redis server with the Sotopia data.
 
         The `REDIS_OM_URL` need to be set before loading and saving agents:
         ```bash

diff --git a/docs/pages/all_the_issues.md → docs/pages/troubleshooting.md b/docs/pages/all_the_issues.md → docs/pages/troubleshooting.md
@@ -1,4 +1,4 @@
-# Q&A
+# Troubleshooting
 ## Missing episodes
 
 Large batch size may cause some episodes to be skipped. This is due to the fact that the server may not be able to handle the load. Try reducing the batch size. But you can also use the script in `examples/fix_missing_episodes.py` to fix the missing episodes.

diff --git a/examples/experiment_eval.py b/examples/experiment_eval.py
@@ -19,8 +19,10 @@
     EpisodeLog,
 )
 from sotopia.envs.evaluators import (
+    EvaluationForTwoAgents,
     ReachGoalLLMEvaluator,
     RuleBasedTerminatedEvaluator,
+    SotopiaDimensions,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
 from sotopia.generation_utils.generate import LLM_Name
@@ -143,7 +145,10 @@ def _iterate_env_agent_combo_not_in_db(
                     RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
                 ],
                 terminal_evaluators=[
-                    ReachGoalLLMEvaluator(model_names["env"]),
+                    ReachGoalLLMEvaluator(
+                        model_names["env"],
+                        EvaluationForTwoAgents[SotopiaDimensions],
+                    ),
                 ],
             )
             agent_profiles = [AgentProfile.get(id) for id in agent_ids]

diff --git a/examples/fix_missing_episodes.py b/examples/fix_missing_episodes.py
@@ -19,8 +19,10 @@
     EnvironmentProfile,
 )
 from sotopia.envs.evaluators import (
+    EvaluationForTwoAgents,
     ReachGoalLLMEvaluator,
     RuleBasedTerminatedEvaluator,
+    SotopiaDimensions,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
 from sotopia.generation_utils.generate import LLM_Name
@@ -227,7 +229,10 @@ def yield_env_agent_combo(
                 RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
             ],
             terminal_evaluators=[
-                ReachGoalLLMEvaluator(model_names["env"]),
+                ReachGoalLLMEvaluator(
+                    model_names["env"],
+                    EvaluationForTwoAgents[SotopiaDimensions],
+                ),
             ],
         )
         agent_profiles = [AgentProfile.get(id) for id in (agent_id1, agent_id2)]

diff --git a/examples/fix_missing_episodes_with_tag.py b/examples/fix_missing_episodes_with_tag.py
@@ -35,8 +35,10 @@
     EnvironmentProfile,
 )
 from sotopia.envs.evaluators import (
+    EvaluationForTwoAgents,
     ReachGoalLLMEvaluator,
     RuleBasedTerminatedEvaluator,
+    SotopiaDimensions,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
 from sotopia.generation_utils.generate import LLM_Name
@@ -325,7 +327,10 @@ def yield_env_agent_combo(
                 RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
             ],
             terminal_evaluators=[
-                ReachGoalLLMEvaluator(model_names["env"]),
+                ReachGoalLLMEvaluator(
+                    model_names["env"],
+                    EvaluationForTwoAgents[SotopiaDimensions],
+                ),
             ],
         )
         agent_profiles = [AgentProfile.get(id) for id in (agent_id1, agent_id2)]

diff --git a/examples/generate_specific_envs.py b/examples/generate_specific_envs.py
@@ -12,7 +12,7 @@
 import numpy as np
 from datasets import DatasetDict, load_dataset
 
-from sotopia.generation_utils.generate import StrOutputParser, generate
+from sotopia.generation_utils.generate import StrOutputParser, agenerate
 
 
 async def generate_mutual_friend_envs() -> tuple[str, list[str]]:
@@ -78,7 +78,7 @@ async def generate_craigslist_bargains_envs() -> tuple[str, list[str]]:
     all_data = craigslist_bargains_dataset["train"]
     # sample one datum from all data
     datum = np.random.choice(all_data)
-    scenario = generate(
+    scenario = await agenerate(
         model_name="gpt-4",
         template="The following sentence is automatically generated with the following"
         'template: "One person is selling <item> for <price>, another person is'
@@ -100,7 +100,7 @@ async def generate_craigslist_bargains_envs() -> tuple[str, list[str]]:
             datum["agent_info"]["Target"][i] = datum["items"]["Price"][0] / (
                 1 + markup_ratio
             )
-        goal = generate(
+        goal = await agenerate(
             model_name="gpt-4",
             template="The following sentence is automatically generated with the following"
             'template: "You want to <role> this item. Your target price '