Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions documents/docs/configurations/developer_configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The following parameters are included in the system configuration of the UFO age
|-------------------------|---------------------------------------------------------------------------------------------------------|----------|---------------|
| `CONTROL_BACKEND` | The backend for control action, currently supporting `uia` and `win32`. | String | "uia" |
| `MAX_STEP` | The maximum step limit for completing the user request in a session. | Integer | 100 |
| `MAX_ROUND` | The maximum round limit for completing the user request in a session. | Integer | 10 |
| `SLEEP_TIME` | The sleep time in seconds between each step to wait for the window to be ready. | Integer | 5 |
| `RECTANGLE_TIME` | The time in seconds for the rectangle display around the selected control. | Integer | 1 |
| `SAFE_GUARD` | Whether to use the safe guard to ask for user confirmation before performing sensitive operations. | Boolean | True |
Expand All @@ -25,6 +26,9 @@ The following parameters are included in the system configuration of the UFO age
| `LOG_XML` | Whether to log the XML file at every step. | Boolean | False |
| `SCREENSHOT_TO_MEMORY` | Whether to allow the screenshot to [`Blackboard`](../agents/design/blackboard.md) for the agent's decision making. | Boolean | True |
| `SAVE_UI_TREE` | Whether to save the UI tree in the log. | Boolean | False |
| `SAVE_EXPERIENCE` | Whether to save the experience, can be "always" for always save, "always_not" for always not save, "ask" for asking the user to save or not. By default, it is "always_not" | String | "always_not" |
| `TASK_STATUS` | Whether to record the status of the tasks in batch execution mode. | Boolean | True |


## Main Prompt Configuration

Expand Down
6 changes: 0 additions & 6 deletions ufo/agents/agent/evaluation_agent.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import sys

sys.path.append("..")
sys.path.append("../..")
sys.path.append("./")

from typing import Any, Dict, Optional, Tuple

from ufo.agents.agent.basic import BasicAgent
Expand Down
7 changes: 5 additions & 2 deletions ufo/config/config_dev.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
CONTROL_BACKEND: "uia" # The backend for control action, currently we support uia and win32
MAX_STEP: 100 # The max step limit for completing the user request
MAX_ROUND: 10 # The max round limit for completing the user request
SLEEP_TIME: 1 # The sleep time between each step to wait for the window to be ready
RECTANGLE_TIME: 1

Expand Down Expand Up @@ -96,7 +97,7 @@ EVA_ROUND: FALSE
EVA_ALL_SCREENSHOTS: True # Whether to include all the screenshots in the evaluation

# Image saving performance
DEFAULT_PNG_COMPRESS_LEVEL: 9 # The compress level for the PNG image, 0-9, 0 is no compress, 1 is the fastest, 9 is the best compress
DEFAULT_PNG_COMPRESS_LEVEL: 1 # The compress level for the PNG image, 0-9, 0 is no compress, 1 is the fastest, 9 is the best compress


# Save UI tree
Expand All @@ -105,5 +106,7 @@ SAVE_UI_TREE: False # Whether to save the UI tree

# Record the status of the tasks
TASK_STATUS: True # Whether to record the status of the tasks in batch execution mode.
# TASK_STATUS_FILE # The path for the task status file.

# Experience saving
SAVE_EXPERIENCE: "ask" # Whether to save the experience, can be "always" for always save, "always_not" for always not save, "ask" for asking the user to save or not. By default, it is "always_not"

76 changes: 76 additions & 0 deletions ufo/experience/experience_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

from typing import Any, Dict, List
from collections import defaultdict

from ufo.trajectory import parser
from ufo.automator.ui_control.screenshot import PhotographerFacade


class ExperienceLogLoader:
"""
Loading the logs from previous runs.
"""

_subtask_key = "Subtask"
_application_key = "Application"
_image_url_key = "ScreenshotURLs"

def __init__(self, log_path: str):
"""
Initialize the LogLoader.
:param log_path: The path of the log file.
"""
self._log_path = log_path
trajectory = parser.Trajectory(log_path)
self._subtask_partition = self.group_by_subtask(trajectory.app_agent_log)

@classmethod
def group_by_subtask(
cls, step_log: List[Dict[str, Any]]
) -> List[List[Dict[str, Any]]]:
"""
Group the logs by the value of the "Subtask" field.
:param step_log: The step log.
:return: The grouped logs.
"""

grouped = defaultdict(list)
for log in step_log:
# Group by the value of the "Subtask" field
image_urls = {}
for key in parser.Trajectory._screenshot_keys:
image_urls[key] = PhotographerFacade.encode_image(
log.get(parser.Trajectory._step_screenshot_key, {}).get(key)
)
log[cls._image_url_key] = image_urls
subtask = log.get(cls._subtask_key)
grouped[subtask].append(log)

# Build the desired output structure
result = [
{
"subtask_index": index,
"subtask": subtask,
"logs": logs,
"application": logs[0][cls._application_key],
}
for index, (subtask, logs) in enumerate(grouped.items())
]

return result

@property
def subtask_partition(self) -> List[Dict[str, Any]]:
"""
:return: The subtask partition.
"""
return self._subtask_partition

@property
def log_path(self) -> str:
"""
:return: The log path.
"""
return self._log_path
200 changes: 0 additions & 200 deletions ufo/experience/parser.py

This file was deleted.

32 changes: 27 additions & 5 deletions ufo/experience/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
# Licensed under the MIT License.

import os
import sys
from typing import Tuple

import yaml
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS

from ufo.experience.parser import ExperienceLogLoader
from ufo.experience.experience_parser import ExperienceLogLoader
from ufo.llm.llm_call import get_completion
from ufo.prompter.experience_prompter import ExperiencePrompter
from ufo.utils import get_hugginface_embedding, json_parser
Expand Down Expand Up @@ -107,8 +108,8 @@ def get_summary_list(self, logs: list) -> Tuple[list, float]:
for log_partition in logs:
prompt = self.build_prompt(log_partition)
summary, cost = self.get_summary(prompt)
summary["request"] = ExperienceLogLoader.get_user_request(log_partition)
summary["app_list"] = ExperienceLogLoader.get_app_list(log_partition)
summary["request"] = log_partition.get("subtask")
summary["app_list"] = [log_partition.get("application")]
summaries.append(summary)
total_cost += cost

Expand All @@ -121,8 +122,7 @@ def read_logs(log_path: str) -> list:
:param log_path: The path of the log file.
"""
replay_loader = ExperienceLogLoader(log_path)
logs = replay_loader.create_logs()
return logs
return replay_loader.subtask_partition

@staticmethod
def create_or_update_yaml(summaries: list, yaml_path: str):
Expand Down Expand Up @@ -184,3 +184,25 @@ def create_or_update_vector_db(summaries: list, db_path: str):
db.save_local(db_path)

print(f"Updated vector DB successfully: {db_path}")


if __name__ == "__main__":

from ufo.config.config import Config

configs = Config.get_instance().config_data

# Initialize the ExperienceSummarizer

summarizer = ExperienceSummarizer(
configs["APP_AGENT"]["VISUAL_MODE"],
configs["EXPERIENCE_PROMPT"],
configs["APPAGENT_EXAMPLE_PROMPT"],
configs["API_PROMPT"],
)

log_path = "logs/test_exp"

experience = summarizer.read_logs(log_path)
summaries, cost = summarizer.get_summary_list(experience)
print(summaries, cost)
Loading