diff --git a/README.md b/README.md index 7c72fd16..d341696b 100644 --- a/README.md +++ b/README.md @@ -240,6 +240,7 @@ You may use them to debug, replay, or analyze the agent output. * For other communications, please contact [ufo-agent@microsoft.com](mailto:ufo-agent@microsoft.com). --- + -   ## 📚 Citation Our technical report paper can be found [here](https://arxiv.org/abs/2402.07939). Note that previous HostAgent and AppAgent in the paper are renamed to HostAgent and AppAgent in the code base to better reflect their functions. @@ -112,4 +96,4 @@ For more information on GUI agents, refer to our survey paper: [Large Language M gtag('js', new Date()); gtag('config', 'G-FX17ZGJYGC'); - \ No newline at end of file + diff --git a/ufo/agents/agent/app_agent.py b/ufo/agents/agent/app_agent.py index d1d73bdd..246c9326 100644 --- a/ufo/agents/agent/app_agent.py +++ b/ufo/agents/agent/app_agent.py @@ -58,6 +58,7 @@ def __init__( self.online_doc_retriever = None self.experience_retriever = None self.human_demonstration_retriever = None + self.Puppeteer = self.create_puppeteer_interface() self.set_state(ContinueAppAgentState()) diff --git a/ufo/agents/agent/basic.py b/ufo/agents/agent/basic.py index dba15f07..f85cffb7 100644 --- a/ufo/agents/agent/basic.py +++ b/ufo/agents/agent/basic.py @@ -139,7 +139,7 @@ def message_constructor(self) -> List[Dict[str, Union[str, List[Dict[str, str]]] @classmethod def get_response( - cls, message: List[dict], namescope: str, use_backup_engine: bool + cls, message: List[dict], namescope: str, use_backup_engine: bool, configs = configs ) -> str: """ Get the response for the prompt. @@ -149,7 +149,7 @@ def get_response( :return: The response. """ response_string, cost = llm_call.get_completion( - message, namescope, use_backup_engine=use_backup_engine + message, namescope, use_backup_engine=use_backup_engine, configs = configs ) return response_string, cost @@ -243,6 +243,7 @@ def process_resume(self) -> None: if self.processor: self.processor.resume() + def process_asker(self, ask_user: bool = True) -> None: """ Ask for the process. diff --git a/ufo/agents/processors/app_agent_processor.py b/ufo/agents/processors/app_agent_processor.py index af97b362..3658ed73 100644 --- a/ufo/agents/processors/app_agent_processor.py +++ b/ufo/agents/processors/app_agent_processor.py @@ -22,7 +22,8 @@ from ufo.agents.agent.app_agent import AppAgent configs = Config.get_instance().config_data -BACKEND = configs["CONTROL_BACKEND"] +if configs is not None: + BACKEND = configs["CONTROL_BACKEND"] class AppAgentProcessor(BaseProcessor): @@ -503,7 +504,8 @@ def demonstration_prompt_helper(self) -> Tuple[List[str], List[str]]: return examples, tips def get_filtered_annotation_dict( - self, annotation_dict: Dict[str, UIAWrapper] + self, annotation_dict: Dict[str, UIAWrapper], + configs = configs ) -> Dict[str, UIAWrapper]: """ Get the filtered annotation dictionary. diff --git a/ufo/agents/processors/basic.py b/ufo/agents/processors/basic.py index b9021eb5..e93c94a5 100644 --- a/ufo/agents/processors/basic.py +++ b/ufo/agents/processors/basic.py @@ -21,7 +21,8 @@ from ufo.module.context import Context, ContextNames configs = Config.get_instance().config_data -BACKEND = configs["CONTROL_BACKEND"] +if configs is not None: + BACKEND = configs["CONTROL_BACKEND"] class BaseProcessor(ABC): diff --git a/ufo/agents/processors/host_agent_processor.py b/ufo/agents/processors/host_agent_processor.py index 94a38500..a6819795 100644 --- a/ufo/agents/processors/host_agent_processor.py +++ b/ufo/agents/processors/host_agent_processor.py @@ -13,7 +13,8 @@ from ufo.module.context import Context, ContextNames configs = Config.get_instance().config_data -BACKEND = configs["CONTROL_BACKEND"] +if configs is not None: + BACKEND = configs["CONTROL_BACKEND"] if TYPE_CHECKING: from ufo.agents.agent.host_agent import HostAgent diff --git a/ufo/automator/ui_control/controller.py b/ufo/automator/ui_control/controller.py index 87df3fba..749f51ce 100644 --- a/ufo/automator/ui_control/controller.py +++ b/ufo/automator/ui_control/controller.py @@ -18,7 +18,7 @@ configs = Config.get_instance().config_data -if configs.get("AFTER_CLICK_WAIT", None) is not None: +if configs is not None and configs.get("AFTER_CLICK_WAIT", None) is not None: pywinauto.timings.Timings.after_clickinput_wait = configs["AFTER_CLICK_WAIT"] pywinauto.timings.Timings.after_click_wait = configs["AFTER_CLICK_WAIT"] diff --git a/ufo/automator/ui_control/openfile.py b/ufo/automator/ui_control/openfile.py index 2058a369..49a51dd6 100644 --- a/ufo/automator/ui_control/openfile.py +++ b/ufo/automator/ui_control/openfile.py @@ -9,7 +9,10 @@ configs = Config.get_instance().config_data -BACKEND = configs["CONTROL_BACKEND"] +if configs is not None: + BACKEND = configs["CONTROL_BACKEND"] +else: + BACKEND = "uia" class FileController: @@ -17,9 +20,9 @@ class FileController: Control block for open file / specific APP and proceed the operation. """ - def __init__(self): + def __init__(self, backend=BACKEND): - self.backend = BACKEND + self.backend = backend self.file_path = "" self.APP = "" self.apptype = "" diff --git a/ufo/automator/ui_control/screenshot.py b/ufo/automator/ui_control/screenshot.py index b7c884b1..9243a78d 100644 --- a/ufo/automator/ui_control/screenshot.py +++ b/ufo/automator/ui_control/screenshot.py @@ -18,7 +18,11 @@ configs = Config.get_instance().config_data -DEFAULT_PNG_COMPRESS_LEVEL = int(configs.get("DEFAULT_PNG_COMPRESS_LEVEL", 0)) +if configs is not None: + DEFAULT_PNG_COMPRESS_LEVEL = int(configs.get("DEFAULT_PNG_COMPRESS_LEVEL", 0)) +else: + DEFAULT_PNG_COMPRESS_LEVEL = 6 + class Photographer(ABC): diff --git a/ufo/config/config.py b/ufo/config/config.py index c9979cb0..08a1e29b 100644 --- a/ufo/config/config.py +++ b/ufo/config/config.py @@ -14,7 +14,10 @@ class Config: def __init__(self): # Load config here - self.config_data = self.load_config() + if os.getenv("RUN_CONFIGS", "true").lower() != "false": + self.config_data = self.load_config() + else: + self.config_data = None @staticmethod def get_instance(): @@ -26,7 +29,7 @@ def get_instance(): Config._instance = Config() return Config._instance - def load_config(self, config_path="ufo/config/") -> dict: + def load_config(self, config_path = "ufo/config/") -> dict: """ Load the configuration from a YAML file and environment variables. @@ -45,14 +48,13 @@ def load_config(self, config_path="ufo/config/") -> dict: # Update configs with YAML data if yaml_data: configs.update(yaml_data) - with open(path + "config_dev.yaml", "r") as file: - yaml_dev_data = yaml.safe_load(file) - with open(path + "config_prices.yaml", "r") as file: - yaml_prices_data = yaml.safe_load(file) - # Update configs with YAML data - if yaml_data: + if os.path.exists(path + "config_dev.yaml"): + with open(path + "config_dev.yaml", "r") as file: + yaml_dev_data = yaml.safe_load(file) configs.update(yaml_dev_data) - if yaml_prices_data: + if os.path.exists(path + "config_prices.yaml"): + with open(path + "config_prices.yaml", "r") as file: + yaml_prices_data = yaml.safe_load(file) configs.update(yaml_prices_data) except FileNotFoundError: print_with_color( diff --git a/ufo/llm/llm_call.py b/ufo/llm/llm_call.py index 78a302be..b47ac9e5 100644 --- a/ufo/llm/llm_call.py +++ b/ufo/llm/llm_call.py @@ -12,7 +12,7 @@ def get_completion( - messages, agent: str = "APP", use_backup_engine: bool = True + messages, agent: str = "APP", use_backup_engine: bool = True, configs = configs ) -> Tuple[str, float]: """ Get completion for the given messages. @@ -23,13 +23,14 @@ def get_completion( """ responses, cost = get_completions( - messages, agent=agent, use_backup_engine=use_backup_engine, n=1 + messages, agent=agent, use_backup_engine=use_backup_engine, n=1, configs = configs ) return responses[0], cost def get_completions( - messages, agent: str = "APP", use_backup_engine: bool = True, n: int = 1 + messages, agent: str = "APP", use_backup_engine: bool = True, n: int = 1, + configs = configs ) -> Tuple[list, float]: """ Get completions for the given messages. @@ -44,6 +45,10 @@ def get_completions( agent_type = "HOST_AGENT" elif agent.lower() in ["app", "appagent"]: agent_type = "APP_AGENT" + elif agent.lower() == "prefill": + agent_type = "PREFILL_AGENT" + elif agent.lower() == "filter": + agent_type = "FILTER_AGENT" elif agent.lower() == "backup": agent_type = "BACKUP_AGENT" else: diff --git a/ufo/llm/openai.py b/ufo/llm/openai.py index 9765cc33..75f985a6 100644 --- a/ufo/llm/openai.py +++ b/ufo/llm/openai.py @@ -29,7 +29,7 @@ def __init__(self, config: Dict[str, Any], agent_type: str) -> None: self.config = config self.api_type = self.config_llm["API_TYPE"].lower() self.max_retry = self.config["MAX_RETRY"] - self.prices = self.config["PRICES"] + self.prices = self.config.get("PRICES", {}) assert self.api_type in ["openai", "aoai", "azure_ad"], "Invalid API type" self.client: OpenAI = OpenAIService.get_openai_client( diff --git a/ufo/module/basic.py b/ufo/module/basic.py index 81edb918..db1f3add 100644 --- a/ufo/module/basic.py +++ b/ufo/module/basic.py @@ -702,7 +702,7 @@ def capture_last_snapshot(self) -> None: app_agent.Puppeteer.save_to_xml(xml_save_path) @staticmethod - def initialize_logger(log_path: str, log_filename: str) -> logging.Logger: + def initialize_logger(log_path: str, log_filename: str, mode='a', configs = configs) -> logging.Logger: """ Initialize logging. log_path: The path of the log file. @@ -717,7 +717,7 @@ def initialize_logger(log_path: str, log_filename: str) -> logging.Logger: logger.handlers = [] log_file_path = os.path.join(log_path, log_filename) - file_handler = logging.FileHandler(log_file_path, encoding="utf-8") + file_handler = logging.FileHandler(log_file_path, mode = mode, encoding="utf-8") formatter = logging.Formatter("%(message)s") file_handler.setFormatter(formatter) logger.addHandler(file_handler)