diff --git a/ochat/serving/openai_api_protocol.py b/ochat/serving/openai_api_protocol.py index 139e058..bfe5750 100644 --- a/ochat/serving/openai_api_protocol.py +++ b/ochat/serving/openai_api_protocol.py @@ -59,7 +59,7 @@ class ChatCompletionRequest(BaseModel): top_p: Optional[float] = 1.0 n: Optional[int] = 1 max_tokens: Optional[int] = 768 - stop: Optional[Union[str, List[str]]] = Field(default_factory=list) + stop: Optional[Union[str, List[str]]] = None stream: Optional[bool] = False presence_penalty: Optional[float] = 0.0 frequency_penalty: Optional[float] = 0.0 diff --git a/ochat/serving/openai_api_server.py b/ochat/serving/openai_api_server.py index 4645c67..b92c717 100644 --- a/ochat/serving/openai_api_server.py +++ b/ochat/serving/openai_api_server.py @@ -160,7 +160,7 @@ async def create_chat_completion(raw_request: Request): frequency_penalty=request.frequency_penalty, temperature=request.temperature, top_p=request.top_p, - stop=[model.eot_token], + stop=request.stop, max_tokens=request.max_tokens ) except ValueError as e: