Skip to content

Commit

Permalink
Don't enable scheduler thread on LLM tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
notsyncing committed Dec 14, 2024
1 parent 4a7e9e9 commit aeb5569
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 7 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ See [CHANGELOG](./CHANGELOG.md) for more details.
|Chat|Basic chat completion|☑️|☑️|Text generation works, some advanced parameters (like `frequency_penalty`, `n`, `logprobs`, etc) not implemented yet|
|Chat|Seeding||||
|Chat|Streaming response||||
|Chat|Image input|||InternVL2 supported|
|Chat|Image input|||InternVL2.5 supported|
|Embeddings|Create embeddings||☑️|`encoding_format` not implemented yet|

### Backend-agnostic features
Expand Down Expand Up @@ -63,7 +63,7 @@ See [CHANGELOG](./CHANGELOG.md) for more details.
|Model|Repository|Device|Backend|Remarks|
|-----|----------|------|-------|-------|
|CodeQwen1.5-7B|https://huggingface.co/Qwen/CodeQwen1.5-7B|Intel GPU|IPEX-LLM, OpenVINO||
|InternVL2-8B|https://huggingface.co/OpenGVLab/InternVL2-8B|Intel GPU|IPEX-LLM|Image input supported|
|InternVL2.5-8B|https://huggingface.co/OpenGVLab/InternVL2_5-8B|Intel GPU|IPEX-LLM|Image input supported|
|bge-m3|https://huggingface.co/BAAI/bge-m3|Intel GPU, CPU|OpenVINO|Accuracy may decrease if quantized to int8|
|Qwen2-7B-Instruct|https://huggingface.co/Qwen/Qwen2-7B-Instruct|Intel GPU|IPEX-LLM|Tool calling supported|
|Qwen2.5-Coder-7B-Instruct|https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct|Intel GPU|IPEX-LLM||
Expand Down
17 changes: 14 additions & 3 deletions src/azarrot/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ class Server:
chat_thread_manager: ChatThreadManager
api: FastAPI

enable_schedule_thread: bool = True

_uvicorn_server: uvicorn.Server | None = None
_running: bool = False
_schedule_thread: Thread | None = None
Expand All @@ -181,8 +183,11 @@ def start(self) -> None:

self._running = True

self._schedule_thread = Thread(target=self.__schedule_loop)
self._schedule_thread.start()
if self.enable_schedule_thread:
self._schedule_thread = Thread(target=self.__schedule_loop)
self._schedule_thread.start()
else:
log.info("Schedule thread disabled.")

self.vector_store_worker.start()

Expand Down Expand Up @@ -219,7 +224,11 @@ def __schedule_loop(self) -> None:
time.sleep(1)


def create_server(config: ServerConfig | None = None, enable_backends: list[type[BaseBackend]] | None = None) -> Server:
def create_server(
config: ServerConfig | None = None,
enable_backends: list[type[BaseBackend]] | None = None,
enable_schedule_thread: bool = True
) -> Server:
log.info("Azarrot is initializing...")

if config is None:
Expand Down Expand Up @@ -294,6 +303,8 @@ def create_server(config: ServerConfig | None = None, enable_backends: list[type
vector_store_worker=vector_store_worker,
chat_thread_manager=chat_thread_manager,
api=api,

enable_schedule_thread=enable_schedule_thread
)


Expand Down
1 change: 1 addition & 0 deletions tests/integration/ipex_llm/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def ipex_llm_server() -> Generator[Server, Any, Any]:
server = create_server(
config=ServerConfig(models_dir=tmp_path / "models", working_dir=tmp_path / "working"),
enable_backends=[IPEXLLMBackend],
enable_schedule_thread=False
)

environ[ENV_AZARROT_TEST_MODE] = "True"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from azarrot.backends.ipex_llm_backend import BACKEND_ID_IPEX_LLM
from azarrot.server import Server

INTERNVL2_CHAT_MODEL = "OpenGVLab/InternVL2-8B"
INTERNVL2_CHAT_MODEL = "OpenGVLab/InternVL2_5-8B"


def test_internvl2_hello(ipex_llm_server: Server) -> None:
Expand Down Expand Up @@ -85,4 +85,4 @@ def test_internvl2_image_input(ipex_llm_server: Server) -> None:
result = completion.choices[0].message
assert result is not None
assert result.content is not None
assert result.content.find("小熊猫") >= 0
assert result.content.find("小红熊猫") >= 0
1 change: 1 addition & 0 deletions tests/integration/openvino/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def openvino_server() -> Generator[Server, Any, Any]:
server = create_server(
config=ServerConfig(models_dir=tmp_path / "models", working_dir=tmp_path / "working"),
enable_backends=[OpenVINOBackend],
enable_schedule_thread=False
)

environ[ENV_AZARROT_TEST_MODE] = "True"
Expand Down

0 comments on commit aeb5569

Please sign in to comment.