From 6acc88693207af13376758f45750f09915cda0e6 Mon Sep 17 00:00:00 2001 From: Kiran Gopinathan Date: Thu, 20 Nov 2025 11:25:17 -0500 Subject: [PATCH 1/7] added maker example --- tests/test_maker.py | 313 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 tests/test_maker.py diff --git a/tests/test_maker.py b/tests/test_maker.py new file mode 100644 index 00000000..641a6505 --- /dev/null +++ b/tests/test_maker.py @@ -0,0 +1,313 @@ +import itertools +import logging +import random +import re +from collections import Counter +from typing import Optional + +from openai import OpenAI +from pydantic.dataclasses import dataclass + +from effectful.handlers import futures +from effectful.handlers.futures import Executor, ThreadPoolFuturesInterpretation +from effectful.handlers.llm import Template +from effectful.handlers.llm.providers import LLMLoggingHandler, OpenAIAPIProvider +from effectful.ops.semantics import handler + + +@dataclass +class Step: + start: int + end: int + + +@dataclass(frozen=True) +class GameState: + """State of a game of towers of Hanoi where the initial state is a + set of towers. We use higher numbers to represesnt smaller + disks. So [1,2,3] is a valid tower. The towers are all stacked at + the left at the start (self.towers[0]), and the goal is to move + them to the rightmost tower (self.towers[-1]).""" + + size: int + towers: tuple[tuple[int, ...], ...] + + @classmethod + def new(cls, size: int) -> "GameState": + towers = [[] for _ in range(size)] + towers[0] = list(reversed(range(size))) + towers = tuple(tuple(tower) for tower in towers) + return cls(size, towers) + + def visualise_text(self): + max_disk = self.size + width = max_disk * 2 + 3 + for i, tower in enumerate(self.towers): + print(f"\nTower {i}:") + for disk in reversed(tower): + disk_width = (disk + 1) * 2 - 1 + padding = (max_disk - disk_width) // 2 + print(" " * padding + "=" * disk_width + " " * padding) + print("=" * width) + print() + + def visualise_image(self): + "Uses python graphics libraries to visualise the state of the hanoi game." + try: + from PIL import Image, ImageDraw + except ImportError: + return None + # Pillow-based visualization + tower_width = 150 + disk_height = 30 + base_height = 20 + pole_width = 10 + img_width = tower_width * len(self.towers) + img_height = disk_height * (self.size + 1) + base_height + 50 + + img = Image.new("RGB", (img_width, img_height), "white") + draw = ImageDraw.Draw(img) + + for tower_idx, tower in enumerate(self.towers): + # Draw pole + pole_x = tower_idx * tower_width + tower_width // 2 + pole_top = 40 + pole_bottom = img_height - base_height - 10 + draw.rectangle( + [ + pole_x - pole_width // 2, + pole_top, + pole_x + pole_width // 2, + pole_bottom, + ], + fill="brown", + ) + + # Draw base + base_y = img_height - base_height - 10 + draw.rectangle( + [ + tower_idx * tower_width + 20, + base_y, + (tower_idx + 1) * tower_width - 20, + base_y + base_height, + ], + fill="gray", + ) + + # Draw disks + for disk_idx, disk in enumerate(tower): + disk_width_px = 30 + disk * 15 + disk_y = pole_bottom - (disk_idx + 1) * disk_height + disk_x1 = pole_x - disk_width_px // 2 + disk_x2 = pole_x + disk_width_px // 2 + + # Color gradient based on disk size + color_intensity = int(255 * (disk / self.size)) + color = (color_intensity, 100, 255 - color_intensity) + draw.rectangle( + [disk_x1, disk_y, disk_x2, disk_y + disk_height - 5], + fill=color, + outline="black", + width=2, + ) + return img + + def visualise(self): + img = self.visualise_image() + if img: + img.show() + else: + self.visualise_text() + + def apply(self, step: Step) -> Optional["GameState"]: + """ + Given a tower `start` and a target tower `end` moves the topmost disk to the end tower. + """ + start, end = step.start, step.end + + if not (0 <= start < len(self.towers) and 0 <= end < len(self.towers)): + return None + + # start tower is non empty + if len(self.towers[start]) == 0: + return None + + # end tower is a valid target + if len(self.towers[end]) > 0 and self.towers[start][-1] > self.towers[end][-1]: + return None + + # create state with the move applied + new_towers = [list(tower) for tower in self.towers] + disk = new_towers[start].pop() + new_towers[end].append(disk) + + # + new_state = GameState( + size=self.size, towers=tuple(tuple(tower) for tower in new_towers) + ) + return new_state + + def steps_to_complete(self) -> int: + """Compute the number of steps to complete the towers of hanoi from a given configuration if using the optimal algorithm.""" + # Count disks on each tower + total_moves = 0 + + # For each tower that's not the destination, we need to move all its disks + for tower_idx, tower in enumerate(self.towers): + if tower_idx == self.size - 1: + continue + + # Number of disks on this tower + n_disks = len(tower) + + if n_disks > 0: + # Moving n disks from one peg to another requires 2^n - 1 moves + total_moves += (2**n_disks) - 1 + + return total_moves + + def is_done(self) -> bool: + return all(len(tower) == 0 for tower in self.towers[:-1]) and all( + self.towers[-1][i] > self.towers[-1][i + 1] + for i in range(len(self.towers[-1]) - 1) + ) + + def valid_steps(self) -> list[Step]: + steps = [] + for i, tower_i in enumerate(self.towers): + for j, tower_j in enumerate(self.towers): + if i == j: + continue + if len(tower_i) == 0: + continue + # if tower_i's disk is smaller than tower_j's topmost, then it is valid to move from tower i to j + if len(tower_j) == 0 or tower_i[-1] < tower_j[-1]: + steps.append(Step(i, j)) + return steps + + +class MicroAgent: + """Micro agent (based on MAKERS paper) responsible for predicting a single next step.""" + + game_state: GameState + + def __init__(self, state: GameState): + self.game_state = state + + @Template.define + def predict_next_step(self) -> str: + """ + Given the state of the game of towers of Hanoi as follows: + + {self.game_state} + + Predict the next step to complete the game (moving all disks to the rightmost tower). + + Give a reasoning for your prediction, and return the step following the format: + + start,end + + where start and end are zero-based indices for the towers to move. Be concise and avoid wordy answers. + """ + pass + + def parse_response(self, response: str) -> Step | None: + "Parse the predicted step from an LLM response." + pattern = r"\s*(\d+)\s*,\s*(\d+)\s*" + m = re.search(pattern, response) + if not m: + raise ValueError( + f"No valid start,end tag found in: {response!r}" + ) + return Step(int(m.group(1)), int(m.group(2))) + + def has_no_red_flags(self, response: str) -> Step | None: + """Returns the underlying step if the provided step has no red flags.""" + if len(response) > 450.0: # based on a sample + return None + + step = self.parse_response(response) + if not step: + return None + if not ( + 0 <= step.start < len(self.game_state.towers) + and 0 <= step.end < len(self.game_state.towers) + ): + return None + if step not in self.game_state.valid_steps(): + return None + return step + + def get_vote(self): # algorithm 3 + while True: + resp = self.predict_next_step() + if step := self.has_no_red_flags(resp): + return step + + +class FirstToAheadMoveSelector: + k: int + game_state: GameState + agents: list[MicroAgent] + votes: Counter[Step] + + def __init__(self, state: GameState, no_agents=6, k=3): + self.k = k + self.game_state = state + self.agents = [MicroAgent(self.game_state) for _ in range(no_agents)] + self.votes = Counter() + + def do_voting(self) -> Step: # algorithm 2 + # run n in parallel repeatedly until k come out in top + while True: + # submit a batch of votes + for vote in futures.as_completed( + Executor.submit(agent.get_vote) for agent in self.agents + ): + self.votes[vote] += 1 + max_other_votes = max( + self.votes[o_vote] for o_vote in self.votes if o_vote != vote + ) + if self.votes[vote] >= max_other_votes + self.k: + return vote + + +def calculate_average_sample_size(): + """Function I used to calculate the number 450. in the above code.""" + sizes = [] + samples = [] + + with handler(OpenAIAPIProvider(OpenAI())): + for _ in range(10): + s = GameState.new(random.randint(3, 6)) + for i in range(100): + step = random.choice(s.valid_steps()) + s = s.apply(step) or s + resp = MicroAgent(s).predict_next_step() + samples.append(resp) + sizes.append(len(resp)) + return sum(sizes) / len(sizes) + + +def solve_hanoi(state: GameState): + log = [] + + for i in itertools.count(): + print(f"step {i} - {state}") + step = FirstToAheadMoveSelector(state).do_voting() + # track the step at each point + log.append((state, step)) + + state = state.apply(step) + state.visualise() + + +logging.basicConfig() + +with ( + handler(ThreadPoolFuturesInterpretation(max_workers=3)), + handler(LLMLoggingHandler()), + handler(OpenAIAPIProvider(OpenAI())), +): + solve_hanoi(state=GameState.new(3)) From ab6695e7567acfa97cbf78c7453c65af755c4199 Mon Sep 17 00:00:00 2001 From: Kiran Gopinathan Date: Thu, 20 Nov 2025 11:36:50 -0500 Subject: [PATCH 2/7] implmented MAKER --- tests/test_maker.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/test_maker.py b/tests/test_maker.py index 641a6505..2c591b2f 100644 --- a/tests/test_maker.py +++ b/tests/test_maker.py @@ -2,6 +2,7 @@ import logging import random import re +import sys from collections import Counter from typing import Optional @@ -15,7 +16,7 @@ from effectful.ops.semantics import handler -@dataclass +@dataclass(frozen=True) class Step: start: int end: int @@ -267,7 +268,8 @@ def do_voting(self) -> Step: # algorithm 2 ): self.votes[vote] += 1 max_other_votes = max( - self.votes[o_vote] for o_vote in self.votes if o_vote != vote + (self.votes[o_vote] for o_vote in self.votes if o_vote != vote), + default=0, ) if self.votes[vote] >= max_other_votes + self.k: return vote @@ -303,11 +305,15 @@ def solve_hanoi(state: GameState): state.visualise() -logging.basicConfig() +logging.basicConfig( + level=logging.INFO, + stream=sys.stdout, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) with ( - handler(ThreadPoolFuturesInterpretation(max_workers=3)), - handler(LLMLoggingHandler()), + handler(ThreadPoolFuturesInterpretation()), handler(OpenAIAPIProvider(OpenAI())), + handler(LLMLoggingHandler()), ): solve_hanoi(state=GameState.new(3)) From 78c394133271511833f6e3cc8621b63dbd35f1ab Mon Sep 17 00:00:00 2001 From: Kiran Gopinathan Date: Thu, 20 Nov 2025 11:43:45 -0500 Subject: [PATCH 3/7] updated parse response to return None on unparseable predictions --- tests/test_maker.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_maker.py b/tests/test_maker.py index 2c591b2f..89ed2cda 100644 --- a/tests/test_maker.py +++ b/tests/test_maker.py @@ -218,9 +218,7 @@ def parse_response(self, response: str) -> Step | None: pattern = r"\s*(\d+)\s*,\s*(\d+)\s*" m = re.search(pattern, response) if not m: - raise ValueError( - f"No valid start,end tag found in: {response!r}" - ) + return None return Step(int(m.group(1)), int(m.group(2))) def has_no_red_flags(self, response: str) -> Step | None: From ff14d96ce7665b33fdb0b66f32749ea15bba7d10 Mon Sep 17 00:00:00 2001 From: Kiran Gopinathan Date: Thu, 20 Nov 2025 16:36:33 -0500 Subject: [PATCH 4/7] fixed minor bug in futures --- tests/test_maker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_maker.py b/tests/test_maker.py index 89ed2cda..0acaf471 100644 --- a/tests/test_maker.py +++ b/tests/test_maker.py @@ -262,8 +262,9 @@ def do_voting(self) -> Step: # algorithm 2 while True: # submit a batch of votes for vote in futures.as_completed( - Executor.submit(agent.get_vote) for agent in self.agents + [Executor.submit(agent.get_vote) for agent in self.agents] ): + vote = vote.result() self.votes[vote] += 1 max_other_votes = max( (self.votes[o_vote] for o_vote in self.votes if o_vote != vote), From 5df2dbe009e8bd95e85ee7f28bc7d9533d571d38 Mon Sep 17 00:00:00 2001 From: Kiran Gopinathan Date: Thu, 20 Nov 2025 17:25:17 -0500 Subject: [PATCH 5/7] added break on complete to solve_hanoi --- tests/test_maker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_maker.py b/tests/test_maker.py index 0acaf471..22ae0e97 100644 --- a/tests/test_maker.py +++ b/tests/test_maker.py @@ -302,6 +302,8 @@ def solve_hanoi(state: GameState): state = state.apply(step) state.visualise() + if state.is_done(): + break logging.basicConfig( From 9dbdbd00d56253131b1936acf419f4e9ad2a636c Mon Sep 17 00:00:00 2001 From: Kiran Gopinathan Date: Tue, 9 Dec 2025 20:27:12 -0500 Subject: [PATCH 6/7] updated with latest versions --- tests/test_maker.py | 214 ++++++++++++++------------------------------ 1 file changed, 69 insertions(+), 145 deletions(-) diff --git a/tests/test_maker.py b/tests/test_maker.py index 22ae0e97..e6ffb7f9 100644 --- a/tests/test_maker.py +++ b/tests/test_maker.py @@ -1,25 +1,24 @@ import itertools import logging -import random -import re import sys -from collections import Counter +from abc import ABC, abstractmethod from typing import Optional -from openai import OpenAI +import pydantic +from litellm import ConfigDict +from PIL import Image, ImageDraw from pydantic.dataclasses import dataclass -from effectful.handlers import futures -from effectful.handlers.futures import Executor, ThreadPoolFuturesInterpretation from effectful.handlers.llm import Template -from effectful.handlers.llm.providers import LLMLoggingHandler, OpenAIAPIProvider +from effectful.handlers.llm.providers import ( + LiteLLMProvider, + RetryLLMHandler, +) +from effectful.handlers.llm.sampling import KAheadSampler from effectful.ops.semantics import handler +from effectful.ops.types import NotHandled - -@dataclass(frozen=True) -class Step: - start: int - end: int +type Step = tuple[int, int] @dataclass(frozen=True) @@ -40,25 +39,8 @@ def new(cls, size: int) -> "GameState": towers = tuple(tuple(tower) for tower in towers) return cls(size, towers) - def visualise_text(self): - max_disk = self.size - width = max_disk * 2 + 3 - for i, tower in enumerate(self.towers): - print(f"\nTower {i}:") - for disk in reversed(tower): - disk_width = (disk + 1) * 2 - 1 - padding = (max_disk - disk_width) // 2 - print(" " * padding + "=" * disk_width + " " * padding) - print("=" * width) - print() - - def visualise_image(self): + def visualise_image(self) -> Image: "Uses python graphics libraries to visualise the state of the hanoi game." - try: - from PIL import Image, ImageDraw - except ImportError: - return None - # Pillow-based visualization tower_width = 150 disk_height = 30 base_height = 20 @@ -116,16 +98,13 @@ def visualise_image(self): def visualise(self): img = self.visualise_image() - if img: - img.show() - else: - self.visualise_text() + img.show() def apply(self, step: Step) -> Optional["GameState"]: """ Given a tower `start` and a target tower `end` moves the topmost disk to the end tower. """ - start, end = step.start, step.end + start, end = step if not (0 <= start < len(self.towers) and 0 <= end < len(self.towers)): return None @@ -149,25 +128,6 @@ def apply(self, step: Step) -> Optional["GameState"]: ) return new_state - def steps_to_complete(self) -> int: - """Compute the number of steps to complete the towers of hanoi from a given configuration if using the optimal algorithm.""" - # Count disks on each tower - total_moves = 0 - - # For each tower that's not the destination, we need to move all its disks - for tower_idx, tower in enumerate(self.towers): - if tower_idx == self.size - 1: - continue - - # Number of disks on this tower - n_disks = len(tower) - - if n_disks > 0: - # Moving n disks from one peg to another requires 2^n - 1 moves - total_moves += (2**n_disks) - 1 - - return total_moves - def is_done(self) -> bool: return all(len(tower) == 0 for tower in self.towers[:-1]) and all( self.towers[-1][i] > self.towers[-1][i + 1] @@ -184,24 +144,62 @@ def valid_steps(self) -> list[Step]: continue # if tower_i's disk is smaller than tower_j's topmost, then it is valid to move from tower i to j if len(tower_j) == 0 or tower_i[-1] < tower_j[-1]: - steps.append(Step(i, j)) + steps.append((i, j)) return steps -class MicroAgent: - """Micro agent (based on MAKERS paper) responsible for predicting a single next step.""" +class Step(ABC): + @property + @abstractmethod + def start(self) -> int: + raise NotImplementedError + + @property + @abstractmethod + def end(self) -> int: + raise NotImplementedError + + +def build_validated_model(game_state: GameState) -> type[Step]: + valid_steps = game_state.valid_steps() + + @pydantic.dataclasses.dataclass(frozen=True) + class StepModel: + start: int + end: int + explanation: str = "" + model_config = ConfigDict(extra="forbid") + + @pydantic.field_validator("start", "end", mode="before") + def validate_indices(cls, v, info): + if isinstance(v, int): + if not (0 <= v < len(game_state.towers)): + raise ValueError(f"{info.field_name} {v} out of range") + else: + raise TypeError("start/end must both be int") + return v - game_state: GameState + @pydantic.model_validator(mode="after") + def validate_step(self): + if (self.start, self.end) not in valid_steps: + raise ValueError("step is not in {self.valid_steps}") + return self - def __init__(self, state: GameState): - self.game_state = state + def __hash__(self): + return hash((self.start, self.end)) + + return StepModel + + +def predict_next_step(game_state: GameState) -> Step: + ValidStep = build_validated_model(game_state) @Template.define - def predict_next_step(self) -> str: + def predict_next_step_inner(game_state) -> ValidStep: """ Given the state of the game of towers of Hanoi as follows: - {self.game_state} + {game_state} Predict the next step to complete the game (moving all disks to the rightmost tower). @@ -211,84 +209,10 @@ def predict_next_step(self) -> str: where start and end are zero-based indices for the towers to move. Be concise and avoid wordy answers. """ - pass - - def parse_response(self, response: str) -> Step | None: - "Parse the predicted step from an LLM response." - pattern = r"\s*(\d+)\s*,\s*(\d+)\s*" - m = re.search(pattern, response) - if not m: - return None - return Step(int(m.group(1)), int(m.group(2))) - - def has_no_red_flags(self, response: str) -> Step | None: - """Returns the underlying step if the provided step has no red flags.""" - if len(response) > 450.0: # based on a sample - return None - - step = self.parse_response(response) - if not step: - return None - if not ( - 0 <= step.start < len(self.game_state.towers) - and 0 <= step.end < len(self.game_state.towers) - ): - return None - if step not in self.game_state.valid_steps(): - return None - return step - - def get_vote(self): # algorithm 3 - while True: - resp = self.predict_next_step() - if step := self.has_no_red_flags(resp): - return step - - -class FirstToAheadMoveSelector: - k: int - game_state: GameState - agents: list[MicroAgent] - votes: Counter[Step] - - def __init__(self, state: GameState, no_agents=6, k=3): - self.k = k - self.game_state = state - self.agents = [MicroAgent(self.game_state) for _ in range(no_agents)] - self.votes = Counter() - - def do_voting(self) -> Step: # algorithm 2 - # run n in parallel repeatedly until k come out in top - while True: - # submit a batch of votes - for vote in futures.as_completed( - [Executor.submit(agent.get_vote) for agent in self.agents] - ): - vote = vote.result() - self.votes[vote] += 1 - max_other_votes = max( - (self.votes[o_vote] for o_vote in self.votes if o_vote != vote), - default=0, - ) - if self.votes[vote] >= max_other_votes + self.k: - return vote - - -def calculate_average_sample_size(): - """Function I used to calculate the number 450. in the above code.""" - sizes = [] - samples = [] + raise NotHandled - with handler(OpenAIAPIProvider(OpenAI())): - for _ in range(10): - s = GameState.new(random.randint(3, 6)) - for i in range(100): - step = random.choice(s.valid_steps()) - s = s.apply(step) or s - resp = MicroAgent(s).predict_next_step() - samples.append(resp) - sizes.append(len(resp)) - return sum(sizes) / len(sizes) + s = predict_next_step_inner(game_state) + return (s.start, s.end) def solve_hanoi(state: GameState): @@ -296,11 +220,13 @@ def solve_hanoi(state: GameState): for i in itertools.count(): print(f"step {i} - {state}") - step = FirstToAheadMoveSelector(state).do_voting() + with handler(KAheadSampler()), handler(RetryLLMHandler()): + step = predict_next_step(state) # track the step at each point - log.append((state, step)) + if new_state := state.apply(step): + log.append((state, step)) - state = state.apply(step) + state = new_state or state state.visualise() if state.is_done(): break @@ -313,8 +239,6 @@ def solve_hanoi(state: GameState): ) with ( - handler(ThreadPoolFuturesInterpretation()), - handler(OpenAIAPIProvider(OpenAI())), - handler(LLMLoggingHandler()), + handler(LiteLLMProvider(model_name="gpt-4o-mini")), ): solve_hanoi(state=GameState.new(3)) From 86f5482c60df0e80461cfcbcb2075813326f5d5d Mon Sep 17 00:00:00 2001 From: Kiran Gopinathan Date: Wed, 10 Dec 2025 10:05:07 -0500 Subject: [PATCH 7/7] lint --- tests/test_maker.py | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/test_maker.py b/tests/test_maker.py index e6ffb7f9..58442c55 100644 --- a/tests/test_maker.py +++ b/tests/test_maker.py @@ -18,7 +18,19 @@ from effectful.ops.semantics import handler from effectful.ops.types import NotHandled -type Step = tuple[int, int] +type Move = tuple[int, int] + + +class Step(ABC): + @property + @abstractmethod + def start(self) -> int: + raise NotImplementedError + + @property + @abstractmethod + def end(self) -> int: + raise NotImplementedError @dataclass(frozen=True) @@ -34,12 +46,12 @@ class GameState: @classmethod def new(cls, size: int) -> "GameState": - towers = [[] for _ in range(size)] + towers: list[list[int]] = [[] for _ in range(size)] towers[0] = list(reversed(range(size))) - towers = tuple(tuple(tower) for tower in towers) - return cls(size, towers) + state: tuple[tuple[int, ...], ...] = tuple(tuple(tower) for tower in towers) + return cls(size, state) - def visualise_image(self) -> Image: + def visualise_image(self) -> Image.Image: "Uses python graphics libraries to visualise the state of the hanoi game." tower_width = 150 disk_height = 30 @@ -100,7 +112,7 @@ def visualise(self): img = self.visualise_image() img.show() - def apply(self, step: Step) -> Optional["GameState"]: + def apply(self, step: Move) -> Optional["GameState"]: """ Given a tower `start` and a target tower `end` moves the topmost disk to the end tower. """ @@ -134,7 +146,7 @@ def is_done(self) -> bool: for i in range(len(self.towers[-1]) - 1) ) - def valid_steps(self) -> list[Step]: + def valid_steps(self) -> list[Move]: steps = [] for i, tower_i in enumerate(self.towers): for j, tower_j in enumerate(self.towers): @@ -148,18 +160,6 @@ def valid_steps(self) -> list[Step]: return steps -class Step(ABC): - @property - @abstractmethod - def start(self) -> int: - raise NotImplementedError - - @property - @abstractmethod - def end(self) -> int: - raise NotImplementedError - - def build_validated_model(game_state: GameState) -> type[Step]: valid_steps = game_state.valid_steps() @@ -188,14 +188,14 @@ def validate_step(self): def __hash__(self): return hash((self.start, self.end)) - return StepModel + return StepModel # type: ignore -def predict_next_step(game_state: GameState) -> Step: +def predict_next_step(game_state: GameState) -> Move: ValidStep = build_validated_model(game_state) @Template.define - def predict_next_step_inner(game_state) -> ValidStep: + def predict_next_step_inner(game_state) -> ValidStep: # type: ignore """ Given the state of the game of towers of Hanoi as follows: