From 21d0a297b7e48531e33ba12d16840b59a4220751 Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Wed, 1 Jan 2025 23:36:11 -0500
Subject: [PATCH 01/10] describe_actions.py with PIL

---
 ...describe_action.py => describe_actions.py} | 62 +++++++------------
 1 file changed, 24 insertions(+), 38 deletions(-)
 rename experiments/{describe_action.py => describe_actions.py} (66%)

diff --git a/experiments/describe_action.py b/experiments/describe_actions.py
similarity index 66%
rename from experiments/describe_action.py
rename to experiments/describe_actions.py
index 81a79f67c..67a40eff9 100644
--- a/experiments/describe_action.py
+++ b/experiments/describe_actions.py
@@ -1,35 +1,29 @@
-"""Generate action descriptions."""
-
 from pprint import pformat
-
 from loguru import logger
-import cv2
+from PIL import Image, ImageDraw, ImageFont
 import numpy as np
-
 from openadapt.db import crud
 
 
 def embed_description(
-    image: np.ndarray,
+    image: Image.Image,
     description: str,
     x: int = None,
     y: int = None,
-) -> np.ndarray:
+) -> Image.Image:
     """Embed a description into an image at the specified location.
 
     Args:
-        image (np.ndarray): The image to annotate.
+        image (Image.Image): The image to annotate.
         description (str): The text to embed.
         x (int, optional): The x-coordinate. Defaults to None (centered).
         y (int, optional): The y-coordinate. Defaults to None (centered).
 
     Returns:
-        np.ndarray: The annotated image.
+        Image.Image: The annotated image.
     """
-    font = cv2.FONT_HERSHEY_SIMPLEX
-    font_scale = 1
-    font_color = (255, 255, 255)  # White
-    line_type = 1
+    draw = ImageDraw.Draw(image)
+    font = ImageFont.load_default()  # Replace with a TTF font if needed
 
     # Split description into multiple lines
     max_width = 60  # Maximum characters per line
@@ -47,34 +41,27 @@ def embed_description(
 
     # Default to center if coordinates are not provided
     if x is None or y is None:
-        x = image.shape[1] // 2
-        y = image.shape[0] // 2
+        x = image.width // 2
+        y = image.height // 2
 
-    # Draw semi-transparent background and text
+    # Calculate text dimensions and draw semi-transparent background and text
     for i, line in enumerate(lines):
-        text_size, _ = cv2.getTextSize(line, font, font_scale, line_type)
-        text_x = max(0, min(x - text_size[0] // 2, image.shape[1] - text_size[0]))
-        text_y = y + i * 20
+        bbox = draw.textbbox((0, 0), line, font=font)
+        text_width, text_height = bbox[2] - bbox[0], bbox[3] - bbox[1]
+        text_x = max(0, min(x - text_width // 2, image.width - text_width))
+        text_y = y + i * text_height
 
         # Draw background
-        cv2.rectangle(
-            image,
-            (text_x - 15, text_y - 25),
-            (text_x + text_size[0] + 15, text_y + 15),
-            (0, 0, 0),
-            -1,
+        background_box = (
+            text_x - 15,
+            text_y - 5,
+            text_x + text_width + 15,
+            text_y + text_height + 5,
         )
+        draw.rectangle(background_box, fill=(0, 0, 0, 128))
 
         # Draw text
-        cv2.putText(
-            image,
-            line,
-            (text_x, text_y),
-            font,
-            font_scale,
-            font_color,
-            line_type,
-        )
+        draw.text((text_x, text_y), line, fill=(255, 255, 255), font=font)
 
     return image
 
@@ -88,8 +75,8 @@ def main() -> None:
         for action in action_events:
             description, image = action.prompt_for_description(return_image=True)
 
-            # Convert image to numpy array for OpenCV compatibility
-            image = np.array(image)
+            # Convert image to PIL.Image for compatibility
+            image = Image.fromarray(np.array(image))
 
             if action.mouse_x is not None and action.mouse_y is not None:
                 # Use the mouse coordinates for mouse events
@@ -105,8 +92,7 @@ def main() -> None:
 
             logger.info(f"{action=}")
             logger.info(f"{description=}")
-            cv2.imshow("Annotated Image", annotated_image)
-            cv2.waitKey(0)
+            annotated_image.show()  # Opens the annotated image using the default viewer
             descriptions.append(description)
 
         logger.info(f"descriptions=\n{pformat(descriptions)}")

From caaeeec9058aec4370cd157ab7a155b6b44eb9ce Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Wed, 1 Jan 2025 23:36:21 -0500
Subject: [PATCH 02/10] import sounddevice inside record_audio()

---
 openadapt/record.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openadapt/record.py b/openadapt/record.py
index 4740da8c9..a76de017a 100644
--- a/openadapt/record.py
+++ b/openadapt/record.py
@@ -35,7 +35,6 @@
 
 import numpy as np
 import psutil
-import sounddevice
 import soundfile
 import websockets.sync.server
 import whisper
@@ -1082,6 +1081,8 @@ def record_audio(
 
     audio_frames = []  # to store audio frames
 
+    import sounddevice
+
     def audio_callback(
         indata: np.ndarray, frames: int, time: Any, status: sounddevice.CallbackFlags
     ) -> None:

From 116168b9677ffea58936ce144a38d9b3ef24506f Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Wed, 1 Jan 2025 23:56:29 -0500
Subject: [PATCH 03/10] get_scaling_factor

---
 experiments/describe_actions.py | 24 +++++++++++++++++-------
 openadapt/utils.py              | 12 ++++++++++++
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/experiments/describe_actions.py b/experiments/describe_actions.py
index 67a40eff9..b17412f9a 100644
--- a/experiments/describe_actions.py
+++ b/experiments/describe_actions.py
@@ -1,8 +1,13 @@
 from pprint import pformat
 from loguru import logger
-from PIL import Image, ImageDraw, ImageFont
+from PIL import Image, ImageDraw
 import numpy as np
+
 from openadapt.db import crud
+from openadapt.plotting import get_font
+from openadapt.utils import get_scaling_factor
+
+scaling_factor = get_scaling_factor()
 
 
 def embed_description(
@@ -23,7 +28,8 @@ def embed_description(
         Image.Image: The annotated image.
     """
     draw = ImageDraw.Draw(image)
-    font = ImageFont.load_default()  # Replace with a TTF font if needed
+    font_size = 30  # Set font size (2x the default size)
+    font = get_font("Arial.ttf", font_size)
 
     # Split description into multiple lines
     max_width = 60  # Maximum characters per line
@@ -39,10 +45,14 @@ def embed_description(
     if current_line:
         lines.append(" ".join(current_line))
 
-    # Default to center if coordinates are not provided
+    # Default to top left if coordinates are not provided
     if x is None or y is None:
-        x = image.width // 2
-        y = image.height // 2
+        x = 0
+        y = 0
+
+    # Adjust coordinates for scaling factor
+    x = int(x * scaling_factor)
+    y = int(y * scaling_factor)
 
     # Calculate text dimensions and draw semi-transparent background and text
     for i, line in enumerate(lines):
@@ -83,8 +93,8 @@ def main() -> None:
                 annotated_image = embed_description(
                     image,
                     description,
-                    x=int(action.mouse_x) * 2,
-                    y=int(action.mouse_y) * 2,
+                    x=int(action.mouse_x),
+                    y=int(action.mouse_y),
                 )
             else:
                 # Center the text for other events
diff --git a/openadapt/utils.py b/openadapt/utils.py
index 524441946..4e1a0f44a 100644
--- a/openadapt/utils.py
+++ b/openadapt/utils.py
@@ -1087,6 +1087,18 @@ def get_html_prompt(html: str, convert_to_markdown: bool = False) -> str:
     return str(soup)
 
 
+def get_scaling_factor() -> int:
+    """Determine the scaling factor using AppKit on macOS."""
+    if sys.platform == "darwin":
+        from AppKit import NSScreen
+
+        main_screen = NSScreen.mainScreen()
+        backing_scale = main_screen.backingScaleFactor()
+        logger.info(f"Backing Scale Factor: {backing_scale}")
+        return int(backing_scale)
+    return 1  # Default for Windows/Linux
+
+
 class WrapStdout:
     """Class to be used a target for multiprocessing.Process."""
 

From adc3c36a21bed3c0aec0cfc832e0b201f2bee072 Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Wed, 1 Jan 2025 23:58:30 -0500
Subject: [PATCH 04/10] show text in top left

---
 experiments/describe_actions.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/experiments/describe_actions.py b/experiments/describe_actions.py
index b17412f9a..9e4b699b1 100644
--- a/experiments/describe_actions.py
+++ b/experiments/describe_actions.py
@@ -13,16 +13,16 @@
 def embed_description(
     image: Image.Image,
     description: str,
-    x: int = None,
-    y: int = None,
+    x: int = 0,
+    y: int = 0,
 ) -> Image.Image:
     """Embed a description into an image at the specified location.
 
     Args:
         image (Image.Image): The image to annotate.
         description (str): The text to embed.
-        x (int, optional): The x-coordinate. Defaults to None (centered).
-        y (int, optional): The y-coordinate. Defaults to None (centered).
+        x (int, optional): The x-coordinate. Defaults to 0.
+        y (int, optional): The y-coordinate. Defaults to 0.
 
     Returns:
         Image.Image: The annotated image.
@@ -45,11 +45,6 @@ def embed_description(
     if current_line:
         lines.append(" ".join(current_line))
 
-    # Default to top left if coordinates are not provided
-    if x is None or y is None:
-        x = 0
-        y = 0
-
     # Adjust coordinates for scaling factor
     x = int(x * scaling_factor)
     y = int(y * scaling_factor)
@@ -93,8 +88,6 @@ def main() -> None:
                 annotated_image = embed_description(
                     image,
                     description,
-                    x=int(action.mouse_x),
-                    y=int(action.mouse_y),
                 )
             else:
                 # Center the text for other events

From baede4f8609b3fd7225b2ce05e4992f00b09cc65 Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Thu, 2 Jan 2025 00:03:47 -0500
Subject: [PATCH 05/10] max_width = image.width

---
 experiments/describe_actions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiments/describe_actions.py b/experiments/describe_actions.py
index 9e4b699b1..2303a99a2 100644
--- a/experiments/describe_actions.py
+++ b/experiments/describe_actions.py
@@ -32,7 +32,7 @@ def embed_description(
     font = get_font("Arial.ttf", font_size)
 
     # Split description into multiple lines
-    max_width = 60  # Maximum characters per line
+    max_width = image.width
     words = description.split()
     lines = []
     current_line = []

From ef0d7bc31d1bb6ed9896e4f4e19506a25fd4b33e Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Thu, 2 Jan 2025 00:08:19 -0500
Subject: [PATCH 06/10] dim_outside_window

---
 openadapt/models.py   |  1 +
 openadapt/plotting.py | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/openadapt/models.py b/openadapt/models.py
index b2e9a4224..03b60329e 100644
--- a/openadapt/models.py
+++ b/openadapt/models.py
@@ -573,6 +573,7 @@ def prompt_for_description(self, return_image: bool = False) -> str:
             darken_outside=0.7,
             display_text=False,
             marker_fill_transparency=0,
+            dim_outside_window=False,
         )
 
         if self.text:
diff --git a/openadapt/plotting.py b/openadapt/plotting.py
index b0bc0b932..1cffb8261 100644
--- a/openadapt/plotting.py
+++ b/openadapt/plotting.py
@@ -228,6 +228,7 @@ def display_event(
     diff: bool = False,
     darken_outside: float | None = None,
     display_text: bool = True,
+    dim_outside_window: bool = True,
 ) -> Image.Image:
     """Display an action event on the image.
 
@@ -247,6 +248,7 @@ def display_event(
           the ellipse for mouse events. Range 0-1, where 1 is completely black.
           Defaults to None (no darkening).
         display_text (bool): Whether to display action text. Defaults to True.
+        dim_outside_window (bool): Whether to dim outside the WindowEvent area.
 
     Returns:
         PIL.Image.Image: The image with the action event displayed on it.
@@ -267,14 +269,15 @@ def display_event(
     width_ratio, height_ratio = utils.get_scale_ratios(action_event)
 
     # dim area outside window event
-    if not window_event:
-        logger.error(f"{window_event=}")
-    else:
-        x0 = window_event.left * width_ratio
-        y0 = window_event.top * height_ratio
-        x1 = x0 + window_event.width * width_ratio
-        y1 = y0 + window_event.height * height_ratio
-        image = draw_rectangle(x0, y0, x1, y1, image, outline_width=5)
+    if dim_outside_window:
+        if not window_event:
+            logger.error(f"{window_event=}")
+        else:
+            x0 = window_event.left * width_ratio
+            y0 = window_event.top * height_ratio
+            x1 = x0 + window_event.width * width_ratio
+            y1 = y0 + window_event.height * height_ratio
+            image = draw_rectangle(x0, y0, x1, y1, image, outline_width=5)
 
     # display diff bbox
     if diff:

From ee42addfe5637e071b3903d6471d340b3c980e88 Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Thu, 2 Jan 2025 00:17:17 -0500
Subject: [PATCH 07/10] add module docstringg

---
 experiments/describe_actions.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/experiments/describe_actions.py b/experiments/describe_actions.py
index 2303a99a2..3561d6d8e 100644
--- a/experiments/describe_actions.py
+++ b/experiments/describe_actions.py
@@ -1,3 +1,5 @@
+"""Generate natural language descriptions from actions."""
+
 from pprint import pformat
 from loguru import logger
 from PIL import Image, ImageDraw

From 16f06d6a795983ae79ba0cc6b12f5df05127d1f9 Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Thu, 2 Jan 2025 00:28:25 -0500
Subject: [PATCH 08/10] add browser to visualize.py

---
 openadapt/visualize.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/openadapt/visualize.py b/openadapt/visualize.py
index ff47712ae..3d315676a 100644
--- a/openadapt/visualize.py
+++ b/openadapt/visualize.py
@@ -159,6 +159,7 @@ def main(
     recording_id: int = None,
     diff_video: bool = False,
     cleanup: bool = True,
+    browser: str = None,
 ) -> bool:
     """Visualize a recording.
 
@@ -167,6 +168,7 @@ def main(
         recording_id (int, optional): The ID of the recording to visualize.
         diff_video (bool): Whether to diff Screenshots against video frames.
         cleanup (bool): Whether to remove the HTML file after it is displayed.
+        browser (str, optional): Command to open the browser executable.
 
     Returns:
         bool: True if visualization was successful, None otherwise.
@@ -445,7 +447,8 @@ def main(
     result = show(  # noqa: F841
         layout(
             rows,
-        )
+        ),
+        browser=browser,
     )
 
     def _cleanup() -> None:

From 76698435980c2492ed9e1aca3677b3f6da1771bb Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Thu, 2 Jan 2025 00:30:28 -0500
Subject: [PATCH 09/10] subrocess browser

---
 openadapt/visualize.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/openadapt/visualize.py b/openadapt/visualize.py
index 3d315676a..92cf6d063 100644
--- a/openadapt/visualize.py
+++ b/openadapt/visualize.py
@@ -444,12 +444,15 @@ def main(
     os.makedirs(RECORDING_DIR_PATH, exist_ok=True)
     output_file(fname_out, title=title)
 
-    result = show(  # noqa: F841
-        layout(
-            rows,
-        ),
-        browser=browser,
-    )
+    # Open the file using the specified browser command or fallback
+    if browser:
+        import subprocess
+
+        logger.info(f"Opening browser with command: {browser}")
+        subprocess.run([browser, f"file://{fname_out}"], check=True)
+    else:
+        logger.info("Falling back to default browser behavior")
+        result = show(layout(rows))  # noqa: F841
 
     def _cleanup() -> None:
         os.remove(fname_out)

From 8be07c19b803dda1e744cb6fd3f0368ece37ca87 Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Thu, 2 Jan 2025 00:38:32 -0500
Subject: [PATCH 10/10] show regardless of browser

---
 openadapt/visualize.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/openadapt/visualize.py b/openadapt/visualize.py
index 92cf6d063..8722d0883 100644
--- a/openadapt/visualize.py
+++ b/openadapt/visualize.py
@@ -444,15 +444,13 @@ def main(
     os.makedirs(RECORDING_DIR_PATH, exist_ok=True)
     output_file(fname_out, title=title)
 
-    # Open the file using the specified browser command or fallback
+    result = show(layout(rows))  # noqa: F841
+
     if browser:
         import subprocess
 
         logger.info(f"Opening browser with command: {browser}")
         subprocess.run([browser, f"file://{fname_out}"], check=True)
-    else:
-        logger.info("Falling back to default browser behavior")
-        result = show(layout(rows))  # noqa: F841
 
     def _cleanup() -> None:
         os.remove(fname_out)