-
Notifications
You must be signed in to change notification settings - Fork 416
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix bugs in qwen2vl's openai-compatible client
- Loading branch information
Showing
7 changed files
with
101 additions
and
108 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,59 @@ | ||
import copy | ||
import logging | ||
import os | ||
from typing import Iterator, List | ||
from pprint import pformat | ||
from typing import List | ||
|
||
from qwen_agent.llm import ModelServiceError | ||
from qwen_agent.llm.base import register_llm | ||
from qwen_agent.llm.oai import TextChatAtOAI | ||
from qwen_agent.llm.schema import Message | ||
from qwen_agent.llm.schema import ContentItem, Message | ||
from qwen_agent.log import logger | ||
from qwen_agent.utils.utils import encode_image_as_base64 | ||
|
||
|
||
def _convert_local_images_to_base64(messages: List[Message]) -> List[Message]: | ||
messages_new = [] | ||
for msg in messages: | ||
if isinstance(msg.content, list): | ||
msg = copy.deepcopy(msg) | ||
for item in msg.content: | ||
t, v = item.get_type_and_value() | ||
if t == 'image': | ||
if v.startswith('file://'): | ||
v = v[len('file://'):] | ||
if (not v.startswith(('http://', 'https://', 'data:'))) and os.path.exists(v): | ||
item.image = encode_image_as_base64(v, max_short_side_length=1080) | ||
else: | ||
assert isinstance(msg.content, str) | ||
messages_new.append(msg) | ||
return messages_new | ||
|
||
|
||
@register_llm('qwenvl_oai') | ||
class QwenVLChatAtOAI(TextChatAtOAI): | ||
|
||
@property | ||
def support_multimodal_input(self) -> bool: | ||
return True | ||
|
||
def _chat_stream( | ||
self, | ||
messages: List[Message], | ||
delta_stream: bool, | ||
generate_cfg: dict, | ||
) -> Iterator[List[Message]]: | ||
messages = _convert_local_images_to_base64(messages) | ||
return super()._chat_stream(messages=messages, delta_stream=delta_stream, generate_cfg=generate_cfg) | ||
|
||
def _chat_no_stream( | ||
self, | ||
messages: List[Message], | ||
generate_cfg: dict, | ||
) -> List[Message]: | ||
messages = _convert_local_images_to_base64(messages) | ||
return super()._chat_no_stream(messages=messages, generate_cfg=generate_cfg) | ||
@staticmethod | ||
def convert_messages_to_dicts(messages: List[Message]) -> List[dict]: | ||
new_messages = [] | ||
|
||
for msg in messages: | ||
content = msg.content | ||
if isinstance(content, str): | ||
content = [ContentItem(text=content)] | ||
assert isinstance(content, list) | ||
|
||
new_content = [] | ||
for item in content: | ||
t, v = item.get_type_and_value() | ||
if t == 'text': | ||
new_content.append({'type': 'text', 'text': v}) | ||
if t == 'image': | ||
if v.startswith('file://'): | ||
v = v[len('file://'):] | ||
if not v.startswith(('http://', 'https://', 'data:')): | ||
if os.path.exists(v): | ||
v = encode_image_as_base64(v, max_short_side_length=1080) | ||
else: | ||
raise ModelServiceError(f'Local image "{v}" does not exist.') | ||
new_content.append({'type': 'image_url', 'image_url': {'url': v}}) | ||
|
||
new_msg = msg.model_dump() | ||
new_msg['content'] = new_content | ||
new_messages.append(new_msg) | ||
|
||
if logger.isEnabledFor(logging.DEBUG): | ||
lite_messages = copy.deepcopy(new_messages) | ||
for msg in lite_messages: | ||
for item in msg['content']: | ||
if item.get('image_url', {}).get('url', '').startswith('data:'): | ||
item['image_url']['url'] = item['image_url']['url'][:64] + '...' | ||
logger.debug(f'LLM Input:\n{pformat(lite_messages, indent=2)}') | ||
|
||
return new_messages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters