Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

judge keyboard using adb #69

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Mobile-Agent-v2/MobileAgent/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,18 @@ def back(adb_path):

def home(adb_path):
command = adb_path + f" shell am start -a android.intent.action.MAIN -c android.intent.category.HOME"
subprocess.run(command, capture_output=True, text=True, shell=True)

def get_all_input_method(adb_path):
command = adb_path + " shell ime list -a"
result = subprocess.run(command, capture_output=True, text=True, shell=True)
return result.stdout

def get_current_input_method(adb_path):
command = adb_path + " shell settings get secure default_input_method"
result = subprocess.run(command, capture_output=True, text=True, shell=True)
return result.stdout.strip()

def set_input_method(adb_path, package="com.android.adbkeyboard/.AdbIME"):
command = adb_path + f" shell settings put secure default_input_method " + package
subprocess.run(command, capture_output=True, text=True, shell=True)
19 changes: 8 additions & 11 deletions Mobile-Agent-v2/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from MobileAgent.api import inference_chat
from MobileAgent.text_localization import ocr
from MobileAgent.icon_localization import det
from MobileAgent.controller import get_screenshot, tap, slide, type, back, home
from MobileAgent.controller import get_screenshot, tap, slide, type, back, home, get_all_input_method, get_current_input_method, set_input_method
from MobileAgent.prompt import get_action_prompt, get_reflect_prompt, get_memory_prompt, get_process_prompt
from MobileAgent.chat import init_action_chat, init_reflect_chat, init_memory_chat, add_response, add_response_two_image

Expand Down Expand Up @@ -280,7 +280,13 @@ def get_perception_infos(adb_path, screenshot_file):
os.mkdir(screenshot)
error_flag = False


keyboard = False
if not "adbkeyboard" in get_current_input_method(adb_path):
if "adbkeyboard" in get_all_input_method(adb_path):
set_input_method(adb_path)
keyboard = True
else:
keyboard = True
iter = 0
while True:
iter += 1
Expand All @@ -289,15 +295,6 @@ def get_perception_infos(adb_path, screenshot_file):
perception_infos, width, height = get_perception_infos(adb_path, screenshot_file)
shutil.rmtree(temp_file)
os.mkdir(temp_file)

keyboard = False
keyboard_height_limit = 0.9 * height
for perception_info in perception_infos:
if perception_info['coordinates'][1] < keyboard_height_limit:
continue
if 'ADB Keyboard' in perception_info['text']:
keyboard = True
break

prompt_action = get_action_prompt(instruction, perception_infos, width, height, keyboard, summary_history, action_history, summary, action, add_info, error_flag, completed_requirements, memory)
chat_action = init_action_chat()
Expand Down