Skip to content

Commit

Permalink
Merge pull request #157 from microsoft/pre-release
Browse files Browse the repository at this point in the history
refactor experience summarizer
  • Loading branch information
vyokky authored Dec 18, 2024
2 parents 6c9fde9 + 75bf4b3 commit 2fcf5ab
Show file tree
Hide file tree
Showing 11 changed files with 252 additions and 278 deletions.
4 changes: 4 additions & 0 deletions documents/docs/configurations/developer_configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The following parameters are included in the system configuration of the UFO age
|-------------------------|---------------------------------------------------------------------------------------------------------|----------|---------------|
| `CONTROL_BACKEND` | The backend for control action, currently supporting `uia` and `win32`. | String | "uia" |
| `MAX_STEP` | The maximum step limit for completing the user request in a session. | Integer | 100 |
| `MAX_ROUND` | The maximum round limit for completing the user request in a session. | Integer | 10 |
| `SLEEP_TIME` | The sleep time in seconds between each step to wait for the window to be ready. | Integer | 5 |
| `RECTANGLE_TIME` | The time in seconds for the rectangle display around the selected control. | Integer | 1 |
| `SAFE_GUARD` | Whether to use the safe guard to ask for user confirmation before performing sensitive operations. | Boolean | True |
Expand All @@ -25,6 +26,9 @@ The following parameters are included in the system configuration of the UFO age
| `LOG_XML` | Whether to log the XML file at every step. | Boolean | False |
| `SCREENSHOT_TO_MEMORY` | Whether to allow the screenshot to [`Blackboard`](../agents/design/blackboard.md) for the agent's decision making. | Boolean | True |
| `SAVE_UI_TREE` | Whether to save the UI tree in the log. | Boolean | False |
| `SAVE_EXPERIENCE` | Whether to save the experience, can be "always" for always save, "always_not" for always not save, "ask" for asking the user to save or not. By default, it is "always_not" | String | "always_not" |
| `TASK_STATUS` | Whether to record the status of the tasks in batch execution mode. | Boolean | True |


## Main Prompt Configuration

Expand Down
6 changes: 0 additions & 6 deletions ufo/agents/agent/evaluation_agent.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import sys

sys.path.append("..")
sys.path.append("../..")
sys.path.append("./")

from typing import Any, Dict, Optional, Tuple

from ufo.agents.agent.basic import BasicAgent
Expand Down
7 changes: 5 additions & 2 deletions ufo/config/config_dev.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
CONTROL_BACKEND: "uia" # The backend for control action, currently we support uia and win32
MAX_STEP: 100 # The max step limit for completing the user request
MAX_ROUND: 10 # The max round limit for completing the user request
SLEEP_TIME: 1 # The sleep time between each step to wait for the window to be ready
RECTANGLE_TIME: 1

Expand Down Expand Up @@ -96,7 +97,7 @@ EVA_ROUND: FALSE
EVA_ALL_SCREENSHOTS: True # Whether to include all the screenshots in the evaluation

# Image saving performance
DEFAULT_PNG_COMPRESS_LEVEL: 9 # The compress level for the PNG image, 0-9, 0 is no compress, 1 is the fastest, 9 is the best compress
DEFAULT_PNG_COMPRESS_LEVEL: 1 # The compress level for the PNG image, 0-9, 0 is no compress, 1 is the fastest, 9 is the best compress


# Save UI tree
Expand All @@ -105,5 +106,7 @@ SAVE_UI_TREE: False # Whether to save the UI tree

# Record the status of the tasks
TASK_STATUS: True # Whether to record the status of the tasks in batch execution mode.
# TASK_STATUS_FILE # The path for the task status file.

# Experience saving
SAVE_EXPERIENCE: "ask" # Whether to save the experience, can be "always" for always save, "always_not" for always not save, "ask" for asking the user to save or not. By default, it is "always_not"

76 changes: 76 additions & 0 deletions ufo/experience/experience_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

from typing import Any, Dict, List
from collections import defaultdict

from ufo.trajectory import parser
from ufo.automator.ui_control.screenshot import PhotographerFacade


class ExperienceLogLoader:
"""
Loading the logs from previous runs.
"""

_subtask_key = "Subtask"
_application_key = "Application"
_image_url_key = "ScreenshotURLs"

def __init__(self, log_path: str):
"""
Initialize the LogLoader.
:param log_path: The path of the log file.
"""
self._log_path = log_path
trajectory = parser.Trajectory(log_path)
self._subtask_partition = self.group_by_subtask(trajectory.app_agent_log)

@classmethod
def group_by_subtask(
cls, step_log: List[Dict[str, Any]]
) -> List[List[Dict[str, Any]]]:
"""
Group the logs by the value of the "Subtask" field.
:param step_log: The step log.
:return: The grouped logs.
"""

grouped = defaultdict(list)
for log in step_log:
# Group by the value of the "Subtask" field
image_urls = {}
for key in parser.Trajectory._screenshot_keys:
image_urls[key] = PhotographerFacade.encode_image(
log.get(parser.Trajectory._step_screenshot_key, {}).get(key)
)
log[cls._image_url_key] = image_urls
subtask = log.get(cls._subtask_key)
grouped[subtask].append(log)

# Build the desired output structure
result = [
{
"subtask_index": index,
"subtask": subtask,
"logs": logs,
"application": logs[0][cls._application_key],
}
for index, (subtask, logs) in enumerate(grouped.items())
]

return result

@property
def subtask_partition(self) -> List[Dict[str, Any]]:
"""
:return: The subtask partition.
"""
return self._subtask_partition

@property
def log_path(self) -> str:
"""
:return: The log path.
"""
return self._log_path
200 changes: 0 additions & 200 deletions ufo/experience/parser.py

This file was deleted.

32 changes: 27 additions & 5 deletions ufo/experience/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
# Licensed under the MIT License.

import os
import sys
from typing import Tuple

import yaml
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS

from ufo.experience.parser import ExperienceLogLoader
from ufo.experience.experience_parser import ExperienceLogLoader
from ufo.llm.llm_call import get_completion
from ufo.prompter.experience_prompter import ExperiencePrompter
from ufo.utils import get_hugginface_embedding, json_parser
Expand Down Expand Up @@ -107,8 +108,8 @@ def get_summary_list(self, logs: list) -> Tuple[list, float]:
for log_partition in logs:
prompt = self.build_prompt(log_partition)
summary, cost = self.get_summary(prompt)
summary["request"] = ExperienceLogLoader.get_user_request(log_partition)
summary["app_list"] = ExperienceLogLoader.get_app_list(log_partition)
summary["request"] = log_partition.get("subtask")
summary["app_list"] = [log_partition.get("application")]
summaries.append(summary)
total_cost += cost

Expand All @@ -121,8 +122,7 @@ def read_logs(log_path: str) -> list:
:param log_path: The path of the log file.
"""
replay_loader = ExperienceLogLoader(log_path)
logs = replay_loader.create_logs()
return logs
return replay_loader.subtask_partition

@staticmethod
def create_or_update_yaml(summaries: list, yaml_path: str):
Expand Down Expand Up @@ -184,3 +184,25 @@ def create_or_update_vector_db(summaries: list, db_path: str):
db.save_local(db_path)

print(f"Updated vector DB successfully: {db_path}")


if __name__ == "__main__":

from ufo.config.config import Config

configs = Config.get_instance().config_data

# Initialize the ExperienceSummarizer

summarizer = ExperienceSummarizer(
configs["APP_AGENT"]["VISUAL_MODE"],
configs["EXPERIENCE_PROMPT"],
configs["APPAGENT_EXAMPLE_PROMPT"],
configs["API_PROMPT"],
)

log_path = "logs/test_exp"

experience = summarizer.read_logs(log_path)
summaries, cost = summarizer.get_summary_list(experience)
print(summaries, cost)
Loading

0 comments on commit 2fcf5ab

Please sign in to comment.