Don't enable scheduler thread on LLM tests.

notsyncing · Dec 14, 2024 · aeb5569 · aeb5569
1 parent 4a7e9e9
commit aeb5569
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -33,7 +33,7 @@ See [CHANGELOG](./CHANGELOG.md) for more details.
 |Chat|Basic chat completion|☑️|☑️|Text generation works, some advanced parameters (like `frequency_penalty`, `n`, `logprobs`, etc) not implemented yet|
 |Chat|Seeding|✅|✅||
 |Chat|Streaming response|✅|✅||
-|Chat|Image input|✅|❌|InternVL2 supported|
+|Chat|Image input|✅|❌|InternVL2.5 supported|
 |Embeddings|Create embeddings|❌|☑️|`encoding_format` not implemented yet|
 
 ### Backend-agnostic features
@@ -63,7 +63,7 @@ See [CHANGELOG](./CHANGELOG.md) for more details.
 |Model|Repository|Device|Backend|Remarks|
 |-----|----------|------|-------|-------|
 |CodeQwen1.5-7B|https://huggingface.co/Qwen/CodeQwen1.5-7B|Intel GPU|IPEX-LLM, OpenVINO||
-|InternVL2-8B|https://huggingface.co/OpenGVLab/InternVL2-8B|Intel GPU|IPEX-LLM|Image input supported|
+|InternVL2.5-8B|https://huggingface.co/OpenGVLab/InternVL2_5-8B|Intel GPU|IPEX-LLM|Image input supported|
 |bge-m3|https://huggingface.co/BAAI/bge-m3|Intel GPU, CPU|OpenVINO|Accuracy may decrease if quantized to int8|
 |Qwen2-7B-Instruct|https://huggingface.co/Qwen/Qwen2-7B-Instruct|Intel GPU|IPEX-LLM|Tool calling supported|
 |Qwen2.5-Coder-7B-Instruct|https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct|Intel GPU|IPEX-LLM||

diff --git a/src/azarrot/server.py b/src/azarrot/server.py
@@ -171,6 +171,8 @@ class Server:
     chat_thread_manager: ChatThreadManager
     api: FastAPI
 
+    enable_schedule_thread: bool = True
+
     _uvicorn_server: uvicorn.Server | None = None
     _running: bool = False
     _schedule_thread: Thread | None = None
@@ -181,8 +183,11 @@ def start(self) -> None:
 
         self._running = True
 
-        self._schedule_thread = Thread(target=self.__schedule_loop)
-        self._schedule_thread.start()
+        if self.enable_schedule_thread:
+            self._schedule_thread = Thread(target=self.__schedule_loop)
+            self._schedule_thread.start()
+        else:
+            log.info("Schedule thread disabled.")
 
         self.vector_store_worker.start()
 
@@ -219,7 +224,11 @@ def __schedule_loop(self) -> None:
             time.sleep(1)
 
 
-def create_server(config: ServerConfig | None = None, enable_backends: list[type[BaseBackend]] | None = None) -> Server:
+def create_server(
+    config: ServerConfig | None = None,
+    enable_backends: list[type[BaseBackend]] | None = None,
+    enable_schedule_thread: bool = True
+) -> Server:
     log.info("Azarrot is initializing...")
 
     if config is None:
@@ -294,6 +303,8 @@ def create_server(config: ServerConfig | None = None, enable_backends: list[type
         vector_store_worker=vector_store_worker,
         chat_thread_manager=chat_thread_manager,
         api=api,
+
+        enable_schedule_thread=enable_schedule_thread
     )
 
 

diff --git a/tests/integration/ipex_llm/conftest.py b/tests/integration/ipex_llm/conftest.py
@@ -24,6 +24,7 @@ def ipex_llm_server() -> Generator[Server, Any, Any]:
     server = create_server(
         config=ServerConfig(models_dir=tmp_path / "models", working_dir=tmp_path / "working"),
         enable_backends=[IPEXLLMBackend],
+        enable_schedule_thread=False
     )
 
     environ[ENV_AZARROT_TEST_MODE] = "True"

diff --git a/tests/integration/ipex_llm/test_internvl2.py → .../integration/ipex_llm/test_internvl2_5.py b/tests/integration/ipex_llm/test_internvl2.py → .../integration/ipex_llm/test_internvl2_5.py
@@ -3,7 +3,7 @@
 from azarrot.backends.ipex_llm_backend import BACKEND_ID_IPEX_LLM
 from azarrot.server import Server
 
-INTERNVL2_CHAT_MODEL = "OpenGVLab/InternVL2-8B"
+INTERNVL2_CHAT_MODEL = "OpenGVLab/InternVL2_5-8B"
 
 
 def test_internvl2_hello(ipex_llm_server: Server) -> None:
@@ -85,4 +85,4 @@ def test_internvl2_image_input(ipex_llm_server: Server) -> None:
     result = completion.choices[0].message
     assert result is not None
     assert result.content is not None
-    assert result.content.find("小熊猫") >= 0
+    assert result.content.find("小红熊猫") >= 0
diff --git a/tests/integration/openvino/conftest.py b/tests/integration/openvino/conftest.py
@@ -24,6 +24,7 @@ def openvino_server() -> Generator[Server, Any, Any]:
     server = create_server(
         config=ServerConfig(models_dir=tmp_path / "models", working_dir=tmp_path / "working"),
         enable_backends=[OpenVINOBackend],
+        enable_schedule_thread=False
     )
 
     environ[ENV_AZARROT_TEST_MODE] = "True"