Skip to content

Commit ea07b44

Browse files
committed
fix: better file path detection
1 parent 48eb874 commit ea07b44

File tree

1 file changed

+42
-36
lines changed

1 file changed

+42
-36
lines changed

cli/files.py

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import re
2-
32
from pathlib import Path
43
from typing import List, Tuple
54

@@ -12,49 +11,61 @@ class FileHandler:
1211
def __init__(self, interface: ChatInterface):
1312
self.interface = interface
1413

14+
ext_pattern = r"\.[a-zA-Z0-9]{2,6}\b"
15+
path_chars = r"[\w\-. \/\\\\]"
16+
1517
self.patterns = [
16-
re.compile(r"file://([^\s]+)", re.IGNORECASE), # file:///path/to/file.ext
17-
re.compile(r'"([^"]+\.[a-zA-Z0-9]+)"'), # "path/to/file.ext"
18-
re.compile(r"'([^']+\.[a-zA-Z0-9]+)'"), # 'path/to/file.ext'
19-
re.compile(
20-
r"\b([~/.][\w\-./\\]*\.[a-zA-Z0-9]+)\b"
21-
), # ./file.txt, ~/docs/file.pdf
22-
re.compile(
23-
r"\b([A-Za-z]:[/\\][\w\-./\\]*\.[a-zA-Z0-9]+)\b"
24-
), # C:/path/file.txt (Windows)
25-
re.compile(
26-
r"\b(/[\w\-./]*\.[a-zA-Z0-9]+)\b"
27-
), # /absolute/path/file.txt (Unix)
18+
re.compile(r"file://(" + path_chars + r"*" + ext_pattern + r")", re.IGNORECASE),
19+
re.compile(r"\b([A-Za-z]:[\\/]" + path_chars + r"*" + ext_pattern + r")", re.IGNORECASE),
20+
re.compile(r"(?<![\w/\\:])(/" + path_chars + r"*" + ext_pattern + r")"),
21+
re.compile(r"\b((?:~|(?:\.\.?))[\\/]" + path_chars + r"*" + ext_pattern + r")"),
22+
re.compile(r"""
23+
(['"])
24+
(
25+
(?:(?!\1).)*?
26+
[/\\]
27+
(?:(?!\1).)*?
28+
""" + ext_pattern + r"""
29+
)
30+
\1
31+
""", re.VERBOSE),
32+
re.compile(r"\b([\w\-.]+\.(?:pdf|docx|xlsx|txt|csv|json|xml|log|py|js|html|css|zip|tar|gz|jpg|jpeg|png|gif|mp4|mov))\b", re.IGNORECASE)
2833
]
2934

3035
def extract_files(self, prompt: str) -> Tuple[str, List[str]]:
3136
"""Extract valid file paths from prompt, returning cleaned prompt and file list."""
32-
found_files = []
37+
found_files_set = set()
3338
cleaned_prompt = prompt
3439

3540
for pattern in self.patterns:
3641
matches = list(pattern.finditer(cleaned_prompt))
3742

3843
for match in reversed(matches):
39-
path_str = match.group(1) if match.groups() else match.group(0)
40-
if path_str.startswith("file://"):
41-
path_str = path_str[7:]
44+
if len(match.groups()) > 1:
45+
path_str = match.group(2)
46+
elif match.groups():
47+
path_str = match.group(1)
48+
else:
49+
continue
4250

4351
if self._is_like_file_path(path_str):
44-
expanded_path = Path(path_str).expanduser().resolve()
45-
46-
if expanded_path.exists() and expanded_path.is_file():
47-
if str(expanded_path) not in found_files:
48-
found_files.append(str(expanded_path))
49-
cleaned_prompt = (
50-
cleaned_prompt[: match.start()]
51-
+ cleaned_prompt[match.end() :]
52-
)
53-
else:
52+
cleaned_prompt = (
53+
cleaned_prompt[: match.start()]
54+
+ cleaned_prompt[match.end() :]
55+
)
56+
57+
try:
58+
expanded_path = Path(path_str).expanduser().resolve()
59+
except (RuntimeError, ValueError):
60+
self.interface.show_warning(f"Could not resolve path: {path_str}")
61+
continue
62+
63+
found_files_set.add(str(expanded_path))
64+
if not expanded_path.exists() or not expanded_path.is_file():
5465
self.interface.show_warning(f"File not found: {path_str}")
5566

5667
cleaned_prompt = re.sub(r"\s+", " ", cleaned_prompt).strip()
57-
return cleaned_prompt, found_files
68+
return cleaned_prompt, sorted(list(found_files_set))
5869

5970
def _is_like_file_path(self, path_str: str) -> bool:
6071
"""Check if string looks like a valid file path."""
@@ -63,17 +74,12 @@ def _is_like_file_path(self, path_str: str) -> bool:
6374

6475
if any(char in path_str for char in ["<", ">", "|", "*", "?", "\n", "\r"]):
6576
return False
66-
if "://" in path_str and not path_str.startswith("file://"):
77+
78+
if "://" in path_str and not path_str.lower().startswith("file://"):
6779
return False
68-
if " " in path_str and not (
69-
path_str.startswith('"') or path_str.startswith("'")
70-
):
71-
words = path_str.split()
72-
if len(words) > 3:
73-
return False
7480

7581
try:
76-
Path(path_str.replace("file://", ""))
82+
Path(path_str.replace("file://", "", 1))
7783
return True
7884
except (ValueError, OSError):
7985
return False

0 commit comments

Comments
 (0)