Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Ignore Jupyter Notebook checkpoints
.ipynb_checkpoints
/test/*
/deprecated/*
/test/*.ipynb
/logs/*
__pycache__/
Expand All @@ -19,6 +20,7 @@ ufo/config/config_llm.yaml
ufo/rag/app_docs/*
learner/records.json
vectordb/docs/*
vectordb/experience/*

# Don't ignore the example files
!vectordb/docs/example/
Expand Down
2 changes: 2 additions & 0 deletions learner/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'



def create_indexer(app: str, docs: str, format: str, incremental: bool, save_path: str):
"""
Create an indexer for the given application.
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ pywin32==304
pywinauto==0.6.8
PyYAML==6.0.1
Requests==2.31.0
faiss-cpu==1.23.5
lxml==5.1.0
faiss-cpu==1.8.0
lxml==5.1.0
psutil==5.9.8
1 change: 1 addition & 0 deletions ufo/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def load_config(config_path="ufo/config/"):
:return: Merged configuration from environment variables and YAML file.
"""
# Copy environment variables to avoid modifying them directly
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings
configs = dict(os.environ)

path = config_path
Expand Down
12 changes: 11 additions & 1 deletion ufo/config/config.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ ACTION_SELECTION_PROMPT: "ufo/prompts/base/{mode}/action_selection.yaml" # The
APP_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/app_example.yaml" # The prompt for the app selection
ACTION_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/action_example.yaml" # The prompt for the action selection


## For experience learning
EXPERIENCE_PROMPT: "ufo/prompts/experience/{mode}/experience_summary.yaml"
EXPERIENCE_SAVED_PATH: "vectordb/experience/"

API_PROMPT: "ufo/prompts/base/{mode}/api.yaml" # The prompt for the API
INPUT_TEXT_API: "type_keys" # The input text API
INPUT_TEXT_ENTER: True # whether to press enter after typing the text
Expand All @@ -46,4 +51,9 @@ RAG_OFFLINE_DOCS_RETRIEVED_TOPK: 1 # The topk for the offline retrieved documen
## RAG Configuration for the Bing search
RAG_ONLINE_SEARCH: False # Whether to use the online search for the RAG.
RAG_ONLINE_SEARCH_TOPK: 5 # The topk for the online search
RAG_ONLINE_RETRIEVED_TOPK: 1 # The topk for the online retrieved documents
RAG_ONLINE_RETRIEVED_TOPK: 1 # The topk for the online retrieved documents


## RAG Configuration for experience
RAG_EXPERIENCE: True # Whether to use the offline RAG.
RAG_EXPERIENCE_RETRIEVED_TOPK: 5 # The topk for the offline retrieved documents
19 changes: 9 additions & 10 deletions ufo/config/config_llm.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@ APP_AGENT: {
# API_TYPE: "azure_ad", # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
# API_KEY: "YOUR_KEY", # The OpenAI API key
# API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
# API_VERSION: "2024-02-15-preview" ,# "2024-02-15-preview" by default
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
API_KEY: "YOUR_KEY", # The OpenAI API key
API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

###For the AOAI
API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
### For Azure_AD
AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
}

Expand All @@ -25,20 +25,20 @@ ACTION_AGENT: {
API_TYPE: "azure_ad", # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
API_KEY: "YOUR_KEY", # The OpenAI API key
API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

# API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
# API_KEY: "YOUR_KEY", # The OpenAI API key
# API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
# API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

###For the AOAI
API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
### For Azure_AD
AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
}

Expand All @@ -47,26 +47,25 @@ BACKUP_AGENT: {
API_TYPE: "azure_ad", # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
API_KEY: "YOUR_KEY", # The OpenAI API key
API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

# API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
# API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
# API_KEY: "YOUR_KEY", # The OpenAI API key
# API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
# API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
# API_MODEL: "YOUR_MODEL", # The only OpenAI model by now that accepts visual input

###For the AOAI
API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
### For Azure_AD
AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
}




### For parameters
MAX_TOKENS: 2000 # The max token limit for the response completion
MAX_RETRY: 3 # The max retry limit for the response completion
Expand Down
2 changes: 2 additions & 0 deletions ufo/experience/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
200 changes: 200 additions & 0 deletions ufo/experience/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import json
import os
import re
from ..utils import encode_image_from_path, print_with_color


class ExperienceLogLoader:
"""
Loading the logs from previous runs.
"""

def __init__(self, log_path: str):
"""
Initialize the LogLoader.
:param log_path: The path of the log file.
"""
self.log_path = log_path
self.response = self.load_response_log()
self.max_stepnum = self.find_max_number_in_filenames(log_path)
self.request_partition = self.get_request_partition()
self.screenshots = {}

self.logs = []


def load_response_log(self):
"""
Load the response log.
:return: The response log.
"""

response = []
response_log_path = os.path.join(self.log_path, "response.log")
with open(response_log_path, 'r', encoding='utf-8') as file:
# Read the lines and split them into a list
response_log = file.readlines()
for response_string in response_log:
try:
response.append(json.loads(response_string))
except json.JSONDecodeError:
print_with_color(f"Error loading response log: {response_string}", "yellow")
return response


@staticmethod
def find_max_number_in_filenames(log_path) -> int:
"""
Find the maximum number in the filenames.
:return: The maximum number in the filenames.
"""

# Get the list of files in the folder
files = os.listdir(log_path)

# Initialize an empty list to store extracted numbers
numbers = []

# Iterate through each file
for file in files:
# Extract the number from the filename
number = ExperienceLogLoader.extract_action_step_count(file)
if number is not None:
# Append the extracted number to the list
numbers.append(number)

if numbers:
# Return the maximum number if numbers list is not empty
return max(numbers)
else:
# Return None if no numbers are found in filenames
return None


def load_screenshot(self, stepnum: int = 0, version: str = "") -> str:
"""
Load the screenshot.
:param stepnum: The step number of the screenshot.
:param version: The version of the screenshot.
:return: The screenshot.
"""

# create version tag
if version:
version_tag = "_" + version
else:
version_tag = ""

# Get the filename of the screenshot
filename = "action_step{stepnum}{version}.png".format(stepnum=stepnum, version=version_tag)
screenshot_path = os.path.join(self.log_path, filename)

# Check if the screenshot exists
if os.path.exists(screenshot_path):
image_url = encode_image_from_path(screenshot_path)
else:
image_url = None

return image_url


def create_logs(self) -> list:
"""
Create the response log.
:return: The response log.
"""
self.logs = []
for partition in self.request_partition:
request = self.response[partition[0]]["Request"]
nround = self.response[partition[0]]["Round"]
partitioned_logs = {
"request": request,
"round": nround,
"step_num": len(partition),
**{
"step_%s" % local_step: {
"response": self.response[step],
"is_first_action": local_step == 1,
"screenshot": {
version: self.load_screenshot(step, "" if version == "raw" else version)
for version in ["raw", "selected_controls"]
}
}
for local_step, step in enumerate(partition)
},
"application": list({self.response[step]["Application"] for step in partition})
}
self.logs.append(partitioned_logs)
return self.logs


def get_request_partition(self) -> list:
"""
Partition the logs.
:return: The partitioned logs.
"""
request_partition = []
current_round = 0
current_partition = []

for step in range(self.max_stepnum):
nround = self.response[step]["Round"]

if nround != current_round:
if current_partition:
request_partition.append(current_partition)
current_partition = [step]
current_round = nround
else:
current_partition.append(step)

if current_partition:
request_partition.append(current_partition)

return request_partition



@staticmethod
def get_user_request(log_partition: dict) -> str:
"""
Get the user request.
:param log_partition: The log partition.
:return: The user request.
"""
return log_partition.get("request")



@staticmethod
def get_app_list(log_partition: dict) -> list:
"""
Get the user request.
:param log_partition: The log partition.
:return: The application list.
"""
return log_partition.get("application")


@staticmethod
def extract_action_step_count(filename : str) -> int:
"""
Extract the action step count from the filename.
:param filename: The filename.
:return: The number extracted from the filename.
"""

# Define a regular expression pattern to extract numbers
pattern = r'action_step(\d+)\.png'
# Use re.search to find the matching pattern in the filename
match = re.search(pattern, filename)
if match:
# Return the extracted number as an integer
return int(match.group(1))
else:
# Return None if no match is found
return None

Loading