microsoft · vyokky · Dec 18, 2024 · Dec 17, 2024 · Dec 17, 2024 · Dec 18, 2024
diff --git a/documents/docs/configurations/developer_configuration.md b/documents/docs/configurations/developer_configuration.md
@@ -10,6 +10,7 @@ The following parameters are included in the system configuration of the UFO age
 |-------------------------|---------------------------------------------------------------------------------------------------------|----------|---------------|
 | `CONTROL_BACKEND`       | The backend for control action, currently supporting `uia` and `win32`.                                 | String   | "uia"         |
 | `MAX_STEP`              | The maximum step limit for completing the user request in a session.                                    | Integer  | 100           |
+| `MAX_ROUND`             | The maximum round limit for completing the user request in a session.                                   | Integer  | 10            |
 | `SLEEP_TIME`            | The sleep time in seconds between each step to wait for the window to be ready.                         | Integer  | 5             |
 | `RECTANGLE_TIME`        | The time in seconds for the rectangle display around the selected control.                              | Integer  | 1             |
 | `SAFE_GUARD`            | Whether to use the safe guard to ask for user confirmation before performing sensitive operations.      | Boolean  | True          |
@@ -25,6 +26,9 @@ The following parameters are included in the system configuration of the UFO age
 | `LOG_XML`               | Whether to log the XML file at every step.                                                              | Boolean  | False         |
 | `SCREENSHOT_TO_MEMORY`  | Whether to allow the screenshot to [`Blackboard`](../agents/design/blackboard.md) for the agent's decision making.                              | Boolean  | True          |
 | `SAVE_UI_TREE`          | Whether to save the UI tree in the log.                                                                 | Boolean  | False         |
+| `SAVE_EXPERIENCE`       | Whether to save the experience, can be "always" for always save, "always_not" for always not save, "ask" for asking the user to save or not. By default, it is "always_not" | String   | "always_not"  |
+| `TASK_STATUS`           | Whether to record the status of the tasks in batch execution mode.                                     | Boolean  | True         |
+
 
 ## Main Prompt Configuration
 

diff --git a/ufo/agents/agent/evaluation_agent.py b/ufo/agents/agent/evaluation_agent.py
@@ -1,12 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-import sys
-
-sys.path.append("..")
-sys.path.append("../..")
-sys.path.append("./")
-
 from typing import Any, Dict, Optional, Tuple
 
 from ufo.agents.agent.basic import BasicAgent

diff --git a/ufo/config/config_dev.yaml b/ufo/config/config_dev.yaml
@@ -1,5 +1,6 @@
 CONTROL_BACKEND: "uia"  # The backend for control action, currently we support uia and win32
 MAX_STEP: 100  # The max step limit for completing the user request
+MAX_ROUND: 10  # The max round limit for completing the user request
 SLEEP_TIME: 1  # The sleep time between each step to wait for the window to be ready
 RECTANGLE_TIME: 1
 
@@ -96,7 +97,7 @@ EVA_ROUND: FALSE
 EVA_ALL_SCREENSHOTS: True  # Whether to include all the screenshots in the evaluation
 
 # Image saving performance
-DEFAULT_PNG_COMPRESS_LEVEL: 9  # The compress level for the PNG image, 0-9, 0 is no compress, 1 is the fastest, 9 is the best compress
+DEFAULT_PNG_COMPRESS_LEVEL: 1  # The compress level for the PNG image, 0-9, 0 is no compress, 1 is the fastest, 9 is the best compress
 
 
 # Save UI tree
@@ -105,5 +106,7 @@ SAVE_UI_TREE: False  # Whether to save the UI tree
 
 # Record the status of the tasks
 TASK_STATUS: True  # Whether to record the status of the tasks in batch execution mode.
-# TASK_STATUS_FILE # The path for the task status file.
+
+# Experience saving
+SAVE_EXPERIENCE: "ask"  # Whether to save the experience, can be "always" for always save, "always_not" for always not save, "ask" for asking the user to save or not. By default, it is "always_not"
 
diff --git a/ufo/experience/experience_parser.py b/ufo/experience/experience_parser.py
@@ -0,0 +1,76 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from typing import Any, Dict, List
+from collections import defaultdict
+
+from ufo.trajectory import parser
+from ufo.automator.ui_control.screenshot import PhotographerFacade
+
+
+class ExperienceLogLoader:
+    """
+    Loading the logs from previous runs.
+    """
+
+    _subtask_key = "Subtask"
+    _application_key = "Application"
+    _image_url_key = "ScreenshotURLs"
+
+    def __init__(self, log_path: str):
+        """
+        Initialize the LogLoader.
+        :param log_path: The path of the log file.
+        """
+        self._log_path = log_path
+        trajectory = parser.Trajectory(log_path)
+        self._subtask_partition = self.group_by_subtask(trajectory.app_agent_log)
+
+    @classmethod
+    def group_by_subtask(
+        cls, step_log: List[Dict[str, Any]]
+    ) -> List[List[Dict[str, Any]]]:
+        """
+        Group the logs by the value of the "Subtask" field.
+        :param step_log: The step log.
+        :return: The grouped logs.
+        """
+
+        grouped = defaultdict(list)
+        for log in step_log:
+            # Group by the value of the "Subtask" field
+            image_urls = {}
+            for key in parser.Trajectory._screenshot_keys:
+                image_urls[key] = PhotographerFacade.encode_image(
+                    log.get(parser.Trajectory._step_screenshot_key, {}).get(key)
+                )
+            log[cls._image_url_key] = image_urls
+            subtask = log.get(cls._subtask_key)
+            grouped[subtask].append(log)
+
+        # Build the desired output structure
+        result = [
+            {
+                "subtask_index": index,
+                "subtask": subtask,
+                "logs": logs,
+                "application": logs[0][cls._application_key],
+            }
+            for index, (subtask, logs) in enumerate(grouped.items())
+        ]
+
+        return result
+
+    @property
+    def subtask_partition(self) -> List[Dict[str, Any]]:
+        """
+        :return: The subtask partition.
+        """
+        return self._subtask_partition
+
+    @property
+    def log_path(self) -> str:
+        """
+        :return: The log path.
+        """
+        return self._log_path
diff --git a/ufo/experience/parser.py b/ufo/experience/parser.py
diff --git a/ufo/experience/summarizer.py b/ufo/experience/summarizer.py
@@ -2,13 +2,14 @@
 # Licensed under the MIT License.
 
 import os
+import sys
 from typing import Tuple
 
 import yaml
 from langchain.docstore.document import Document
 from langchain_community.vectorstores import FAISS
 
-from ufo.experience.parser import ExperienceLogLoader
+from ufo.experience.experience_parser import ExperienceLogLoader
 from ufo.llm.llm_call import get_completion
 from ufo.prompter.experience_prompter import ExperiencePrompter
 from ufo.utils import get_hugginface_embedding, json_parser
@@ -107,8 +108,8 @@ def get_summary_list(self, logs: list) -> Tuple[list, float]:
         for log_partition in logs:
             prompt = self.build_prompt(log_partition)
             summary, cost = self.get_summary(prompt)
-            summary["request"] = ExperienceLogLoader.get_user_request(log_partition)
-            summary["app_list"] = ExperienceLogLoader.get_app_list(log_partition)
+            summary["request"] = log_partition.get("subtask")
+            summary["app_list"] = [log_partition.get("application")]
             summaries.append(summary)
             total_cost += cost
 
@@ -121,8 +122,7 @@ def read_logs(log_path: str) -> list:
         :param log_path: The path of the log file.
         """
         replay_loader = ExperienceLogLoader(log_path)
-        logs = replay_loader.create_logs()
-        return logs
+        return replay_loader.subtask_partition
 
     @staticmethod
     def create_or_update_yaml(summaries: list, yaml_path: str):
@@ -184,3 +184,25 @@ def create_or_update_vector_db(summaries: list, db_path: str):
         db.save_local(db_path)
 
         print(f"Updated vector DB successfully: {db_path}")
+
+
+if __name__ == "__main__":
+
+    from ufo.config.config import Config
+
+    configs = Config.get_instance().config_data
+
+    # Initialize the ExperienceSummarizer
+
+    summarizer = ExperienceSummarizer(
+        configs["APP_AGENT"]["VISUAL_MODE"],
+        configs["EXPERIENCE_PROMPT"],
+        configs["APPAGENT_EXAMPLE_PROMPT"],
+        configs["API_PROMPT"],
+    )
+
+    log_path = "logs/test_exp"
+
+    experience = summarizer.read_logs(log_path)
+    summaries, cost = summarizer.get_summary_list(experience)
+    print(summaries, cost)