diff --git a/Mobile-Agent-v2/MobileAgent/controller.py b/Mobile-Agent-v2/MobileAgent/controller.py index dac33ce..2a60d23 100644 --- a/Mobile-Agent-v2/MobileAgent/controller.py +++ b/Mobile-Agent-v2/MobileAgent/controller.py @@ -57,4 +57,18 @@ def back(adb_path): def home(adb_path): command = adb_path + f" shell am start -a android.intent.action.MAIN -c android.intent.category.HOME" + subprocess.run(command, capture_output=True, text=True, shell=True) + +def get_all_input_method(adb_path): + command = adb_path + " shell ime list -a" + result = subprocess.run(command, capture_output=True, text=True, shell=True) + return result.stdout + +def get_current_input_method(adb_path): + command = adb_path + " shell settings get secure default_input_method" + result = subprocess.run(command, capture_output=True, text=True, shell=True) + return result.stdout.strip() + +def set_input_method(adb_path, package="com.android.adbkeyboard/.AdbIME"): + command = adb_path + f" shell settings put secure default_input_method " + package subprocess.run(command, capture_output=True, text=True, shell=True) \ No newline at end of file diff --git a/Mobile-Agent-v2/run.py b/Mobile-Agent-v2/run.py index cda145e..33a5e02 100644 --- a/Mobile-Agent-v2/run.py +++ b/Mobile-Agent-v2/run.py @@ -8,7 +8,7 @@ from MobileAgent.api import inference_chat from MobileAgent.text_localization import ocr from MobileAgent.icon_localization import det -from MobileAgent.controller import get_screenshot, tap, slide, type, back, home +from MobileAgent.controller import get_screenshot, tap, slide, type, back, home, get_all_input_method, get_current_input_method, set_input_method from MobileAgent.prompt import get_action_prompt, get_reflect_prompt, get_memory_prompt, get_process_prompt from MobileAgent.chat import init_action_chat, init_reflect_chat, init_memory_chat, add_response, add_response_two_image @@ -280,7 +280,13 @@ def get_perception_infos(adb_path, screenshot_file): os.mkdir(screenshot) error_flag = False - +keyboard = False +if not "adbkeyboard" in get_current_input_method(adb_path): + if "adbkeyboard" in get_all_input_method(adb_path): + set_input_method(adb_path) + keyboard = True +else: + keyboard = True iter = 0 while True: iter += 1 @@ -289,15 +295,6 @@ def get_perception_infos(adb_path, screenshot_file): perception_infos, width, height = get_perception_infos(adb_path, screenshot_file) shutil.rmtree(temp_file) os.mkdir(temp_file) - - keyboard = False - keyboard_height_limit = 0.9 * height - for perception_info in perception_infos: - if perception_info['coordinates'][1] < keyboard_height_limit: - continue - if 'ADB Keyboard' in perception_info['text']: - keyboard = True - break prompt_action = get_action_prompt(instruction, perception_infos, width, height, keyboard, summary_history, action_history, summary, action, add_info, error_flag, completed_requirements, memory) chat_action = init_action_chat()