From 21d0a297b7e48531e33ba12d16840b59a4220751 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Wed, 1 Jan 2025 23:36:11 -0500 Subject: [PATCH 01/10] describe_actions.py with PIL --- ...describe_action.py => describe_actions.py} | 62 +++++++------------ 1 file changed, 24 insertions(+), 38 deletions(-) rename experiments/{describe_action.py => describe_actions.py} (66%) diff --git a/experiments/describe_action.py b/experiments/describe_actions.py similarity index 66% rename from experiments/describe_action.py rename to experiments/describe_actions.py index 81a79f67c..67a40eff9 100644 --- a/experiments/describe_action.py +++ b/experiments/describe_actions.py @@ -1,35 +1,29 @@ -"""Generate action descriptions.""" - from pprint import pformat - from loguru import logger -import cv2 +from PIL import Image, ImageDraw, ImageFont import numpy as np - from openadapt.db import crud def embed_description( - image: np.ndarray, + image: Image.Image, description: str, x: int = None, y: int = None, -) -> np.ndarray: +) -> Image.Image: """Embed a description into an image at the specified location. Args: - image (np.ndarray): The image to annotate. + image (Image.Image): The image to annotate. description (str): The text to embed. x (int, optional): The x-coordinate. Defaults to None (centered). y (int, optional): The y-coordinate. Defaults to None (centered). Returns: - np.ndarray: The annotated image. + Image.Image: The annotated image. """ - font = cv2.FONT_HERSHEY_SIMPLEX - font_scale = 1 - font_color = (255, 255, 255) # White - line_type = 1 + draw = ImageDraw.Draw(image) + font = ImageFont.load_default() # Replace with a TTF font if needed # Split description into multiple lines max_width = 60 # Maximum characters per line @@ -47,34 +41,27 @@ def embed_description( # Default to center if coordinates are not provided if x is None or y is None: - x = image.shape[1] // 2 - y = image.shape[0] // 2 + x = image.width // 2 + y = image.height // 2 - # Draw semi-transparent background and text + # Calculate text dimensions and draw semi-transparent background and text for i, line in enumerate(lines): - text_size, _ = cv2.getTextSize(line, font, font_scale, line_type) - text_x = max(0, min(x - text_size[0] // 2, image.shape[1] - text_size[0])) - text_y = y + i * 20 + bbox = draw.textbbox((0, 0), line, font=font) + text_width, text_height = bbox[2] - bbox[0], bbox[3] - bbox[1] + text_x = max(0, min(x - text_width // 2, image.width - text_width)) + text_y = y + i * text_height # Draw background - cv2.rectangle( - image, - (text_x - 15, text_y - 25), - (text_x + text_size[0] + 15, text_y + 15), - (0, 0, 0), - -1, + background_box = ( + text_x - 15, + text_y - 5, + text_x + text_width + 15, + text_y + text_height + 5, ) + draw.rectangle(background_box, fill=(0, 0, 0, 128)) # Draw text - cv2.putText( - image, - line, - (text_x, text_y), - font, - font_scale, - font_color, - line_type, - ) + draw.text((text_x, text_y), line, fill=(255, 255, 255), font=font) return image @@ -88,8 +75,8 @@ def main() -> None: for action in action_events: description, image = action.prompt_for_description(return_image=True) - # Convert image to numpy array for OpenCV compatibility - image = np.array(image) + # Convert image to PIL.Image for compatibility + image = Image.fromarray(np.array(image)) if action.mouse_x is not None and action.mouse_y is not None: # Use the mouse coordinates for mouse events @@ -105,8 +92,7 @@ def main() -> None: logger.info(f"{action=}") logger.info(f"{description=}") - cv2.imshow("Annotated Image", annotated_image) - cv2.waitKey(0) + annotated_image.show() # Opens the annotated image using the default viewer descriptions.append(description) logger.info(f"descriptions=\n{pformat(descriptions)}") From caaeeec9058aec4370cd157ab7a155b6b44eb9ce Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Wed, 1 Jan 2025 23:36:21 -0500 Subject: [PATCH 02/10] import sounddevice inside record_audio() --- openadapt/record.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openadapt/record.py b/openadapt/record.py index 4740da8c9..a76de017a 100644 --- a/openadapt/record.py +++ b/openadapt/record.py @@ -35,7 +35,6 @@ import numpy as np import psutil -import sounddevice import soundfile import websockets.sync.server import whisper @@ -1082,6 +1081,8 @@ def record_audio( audio_frames = [] # to store audio frames + import sounddevice + def audio_callback( indata: np.ndarray, frames: int, time: Any, status: sounddevice.CallbackFlags ) -> None: From 116168b9677ffea58936ce144a38d9b3ef24506f Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Wed, 1 Jan 2025 23:56:29 -0500 Subject: [PATCH 03/10] get_scaling_factor --- experiments/describe_actions.py | 24 +++++++++++++++++------- openadapt/utils.py | 12 ++++++++++++ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/experiments/describe_actions.py b/experiments/describe_actions.py index 67a40eff9..b17412f9a 100644 --- a/experiments/describe_actions.py +++ b/experiments/describe_actions.py @@ -1,8 +1,13 @@ from pprint import pformat from loguru import logger -from PIL import Image, ImageDraw, ImageFont +from PIL import Image, ImageDraw import numpy as np + from openadapt.db import crud +from openadapt.plotting import get_font +from openadapt.utils import get_scaling_factor + +scaling_factor = get_scaling_factor() def embed_description( @@ -23,7 +28,8 @@ def embed_description( Image.Image: The annotated image. """ draw = ImageDraw.Draw(image) - font = ImageFont.load_default() # Replace with a TTF font if needed + font_size = 30 # Set font size (2x the default size) + font = get_font("Arial.ttf", font_size) # Split description into multiple lines max_width = 60 # Maximum characters per line @@ -39,10 +45,14 @@ def embed_description( if current_line: lines.append(" ".join(current_line)) - # Default to center if coordinates are not provided + # Default to top left if coordinates are not provided if x is None or y is None: - x = image.width // 2 - y = image.height // 2 + x = 0 + y = 0 + + # Adjust coordinates for scaling factor + x = int(x * scaling_factor) + y = int(y * scaling_factor) # Calculate text dimensions and draw semi-transparent background and text for i, line in enumerate(lines): @@ -83,8 +93,8 @@ def main() -> None: annotated_image = embed_description( image, description, - x=int(action.mouse_x) * 2, - y=int(action.mouse_y) * 2, + x=int(action.mouse_x), + y=int(action.mouse_y), ) else: # Center the text for other events diff --git a/openadapt/utils.py b/openadapt/utils.py index 524441946..4e1a0f44a 100644 --- a/openadapt/utils.py +++ b/openadapt/utils.py @@ -1087,6 +1087,18 @@ def get_html_prompt(html: str, convert_to_markdown: bool = False) -> str: return str(soup) +def get_scaling_factor() -> int: + """Determine the scaling factor using AppKit on macOS.""" + if sys.platform == "darwin": + from AppKit import NSScreen + + main_screen = NSScreen.mainScreen() + backing_scale = main_screen.backingScaleFactor() + logger.info(f"Backing Scale Factor: {backing_scale}") + return int(backing_scale) + return 1 # Default for Windows/Linux + + class WrapStdout: """Class to be used a target for multiprocessing.Process.""" From adc3c36a21bed3c0aec0cfc832e0b201f2bee072 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Wed, 1 Jan 2025 23:58:30 -0500 Subject: [PATCH 04/10] show text in top left --- experiments/describe_actions.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/experiments/describe_actions.py b/experiments/describe_actions.py index b17412f9a..9e4b699b1 100644 --- a/experiments/describe_actions.py +++ b/experiments/describe_actions.py @@ -13,16 +13,16 @@ def embed_description( image: Image.Image, description: str, - x: int = None, - y: int = None, + x: int = 0, + y: int = 0, ) -> Image.Image: """Embed a description into an image at the specified location. Args: image (Image.Image): The image to annotate. description (str): The text to embed. - x (int, optional): The x-coordinate. Defaults to None (centered). - y (int, optional): The y-coordinate. Defaults to None (centered). + x (int, optional): The x-coordinate. Defaults to 0. + y (int, optional): The y-coordinate. Defaults to 0. Returns: Image.Image: The annotated image. @@ -45,11 +45,6 @@ def embed_description( if current_line: lines.append(" ".join(current_line)) - # Default to top left if coordinates are not provided - if x is None or y is None: - x = 0 - y = 0 - # Adjust coordinates for scaling factor x = int(x * scaling_factor) y = int(y * scaling_factor) @@ -93,8 +88,6 @@ def main() -> None: annotated_image = embed_description( image, description, - x=int(action.mouse_x), - y=int(action.mouse_y), ) else: # Center the text for other events From baede4f8609b3fd7225b2ce05e4992f00b09cc65 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Thu, 2 Jan 2025 00:03:47 -0500 Subject: [PATCH 05/10] max_width = image.width --- experiments/describe_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/describe_actions.py b/experiments/describe_actions.py index 9e4b699b1..2303a99a2 100644 --- a/experiments/describe_actions.py +++ b/experiments/describe_actions.py @@ -32,7 +32,7 @@ def embed_description( font = get_font("Arial.ttf", font_size) # Split description into multiple lines - max_width = 60 # Maximum characters per line + max_width = image.width words = description.split() lines = [] current_line = [] From ef0d7bc31d1bb6ed9896e4f4e19506a25fd4b33e Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Thu, 2 Jan 2025 00:08:19 -0500 Subject: [PATCH 06/10] dim_outside_window --- openadapt/models.py | 1 + openadapt/plotting.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/openadapt/models.py b/openadapt/models.py index b2e9a4224..03b60329e 100644 --- a/openadapt/models.py +++ b/openadapt/models.py @@ -573,6 +573,7 @@ def prompt_for_description(self, return_image: bool = False) -> str: darken_outside=0.7, display_text=False, marker_fill_transparency=0, + dim_outside_window=False, ) if self.text: diff --git a/openadapt/plotting.py b/openadapt/plotting.py index b0bc0b932..1cffb8261 100644 --- a/openadapt/plotting.py +++ b/openadapt/plotting.py @@ -228,6 +228,7 @@ def display_event( diff: bool = False, darken_outside: float | None = None, display_text: bool = True, + dim_outside_window: bool = True, ) -> Image.Image: """Display an action event on the image. @@ -247,6 +248,7 @@ def display_event( the ellipse for mouse events. Range 0-1, where 1 is completely black. Defaults to None (no darkening). display_text (bool): Whether to display action text. Defaults to True. + dim_outside_window (bool): Whether to dim outside the WindowEvent area. Returns: PIL.Image.Image: The image with the action event displayed on it. @@ -267,14 +269,15 @@ def display_event( width_ratio, height_ratio = utils.get_scale_ratios(action_event) # dim area outside window event - if not window_event: - logger.error(f"{window_event=}") - else: - x0 = window_event.left * width_ratio - y0 = window_event.top * height_ratio - x1 = x0 + window_event.width * width_ratio - y1 = y0 + window_event.height * height_ratio - image = draw_rectangle(x0, y0, x1, y1, image, outline_width=5) + if dim_outside_window: + if not window_event: + logger.error(f"{window_event=}") + else: + x0 = window_event.left * width_ratio + y0 = window_event.top * height_ratio + x1 = x0 + window_event.width * width_ratio + y1 = y0 + window_event.height * height_ratio + image = draw_rectangle(x0, y0, x1, y1, image, outline_width=5) # display diff bbox if diff: From ee42addfe5637e071b3903d6471d340b3c980e88 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Thu, 2 Jan 2025 00:17:17 -0500 Subject: [PATCH 07/10] add module docstringg --- experiments/describe_actions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experiments/describe_actions.py b/experiments/describe_actions.py index 2303a99a2..3561d6d8e 100644 --- a/experiments/describe_actions.py +++ b/experiments/describe_actions.py @@ -1,3 +1,5 @@ +"""Generate natural language descriptions from actions.""" + from pprint import pformat from loguru import logger from PIL import Image, ImageDraw From 16f06d6a795983ae79ba0cc6b12f5df05127d1f9 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Thu, 2 Jan 2025 00:28:25 -0500 Subject: [PATCH 08/10] add browser to visualize.py --- openadapt/visualize.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openadapt/visualize.py b/openadapt/visualize.py index ff47712ae..3d315676a 100644 --- a/openadapt/visualize.py +++ b/openadapt/visualize.py @@ -159,6 +159,7 @@ def main( recording_id: int = None, diff_video: bool = False, cleanup: bool = True, + browser: str = None, ) -> bool: """Visualize a recording. @@ -167,6 +168,7 @@ def main( recording_id (int, optional): The ID of the recording to visualize. diff_video (bool): Whether to diff Screenshots against video frames. cleanup (bool): Whether to remove the HTML file after it is displayed. + browser (str, optional): Command to open the browser executable. Returns: bool: True if visualization was successful, None otherwise. @@ -445,7 +447,8 @@ def main( result = show( # noqa: F841 layout( rows, - ) + ), + browser=browser, ) def _cleanup() -> None: From 76698435980c2492ed9e1aca3677b3f6da1771bb Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Thu, 2 Jan 2025 00:30:28 -0500 Subject: [PATCH 09/10] subrocess browser --- openadapt/visualize.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/openadapt/visualize.py b/openadapt/visualize.py index 3d315676a..92cf6d063 100644 --- a/openadapt/visualize.py +++ b/openadapt/visualize.py @@ -444,12 +444,15 @@ def main( os.makedirs(RECORDING_DIR_PATH, exist_ok=True) output_file(fname_out, title=title) - result = show( # noqa: F841 - layout( - rows, - ), - browser=browser, - ) + # Open the file using the specified browser command or fallback + if browser: + import subprocess + + logger.info(f"Opening browser with command: {browser}") + subprocess.run([browser, f"file://{fname_out}"], check=True) + else: + logger.info("Falling back to default browser behavior") + result = show(layout(rows)) # noqa: F841 def _cleanup() -> None: os.remove(fname_out) From 8be07c19b803dda1e744cb6fd3f0368ece37ca87 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Thu, 2 Jan 2025 00:38:32 -0500 Subject: [PATCH 10/10] show regardless of browser --- openadapt/visualize.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/openadapt/visualize.py b/openadapt/visualize.py index 92cf6d063..8722d0883 100644 --- a/openadapt/visualize.py +++ b/openadapt/visualize.py @@ -444,15 +444,13 @@ def main( os.makedirs(RECORDING_DIR_PATH, exist_ok=True) output_file(fname_out, title=title) - # Open the file using the specified browser command or fallback + result = show(layout(rows)) # noqa: F841 + if browser: import subprocess logger.info(f"Opening browser with command: {browser}") subprocess.run([browser, f"file://{fname_out}"], check=True) - else: - logger.info("Falling back to default browser behavior") - result = show(layout(rows)) # noqa: F841 def _cleanup() -> None: os.remove(fname_out)