11import re
2-
32from pathlib import Path
43from typing import List , Tuple
54
@@ -12,49 +11,61 @@ class FileHandler:
1211 def __init__ (self , interface : ChatInterface ):
1312 self .interface = interface
1413
14+ ext_pattern = r"\.[a-zA-Z0-9]{2,6}\b"
15+ path_chars = r"[\w\-. \/\\\\]"
16+
1517 self .patterns = [
16- re .compile (r"file://([^\s]+)" , re .IGNORECASE ), # file:///path/to/file.ext
17- re .compile (r'"([^"]+\.[a-zA-Z0-9]+)"' ), # "path/to/file.ext"
18- re .compile (r"'([^']+\.[a-zA-Z0-9]+)'" ), # 'path/to/file.ext'
19- re .compile (
20- r"\b([~/.][\w\-./\\]*\.[a-zA-Z0-9]+)\b"
21- ), # ./file.txt, ~/docs/file.pdf
22- re .compile (
23- r"\b([A-Za-z]:[/\\][\w\-./\\]*\.[a-zA-Z0-9]+)\b"
24- ), # C:/path/file.txt (Windows)
25- re .compile (
26- r"\b(/[\w\-./]*\.[a-zA-Z0-9]+)\b"
27- ), # /absolute/path/file.txt (Unix)
18+ re .compile (r"file://(" + path_chars + r"*" + ext_pattern + r")" , re .IGNORECASE ),
19+ re .compile (r"\b([A-Za-z]:[\\/]" + path_chars + r"*" + ext_pattern + r")" , re .IGNORECASE ),
20+ re .compile (r"(?<![\w/\\:])(/" + path_chars + r"*" + ext_pattern + r")" ),
21+ re .compile (r"\b((?:~|(?:\.\.?))[\\/]" + path_chars + r"*" + ext_pattern + r")" ),
22+ re .compile (r"""
23+ (['"])
24+ (
25+ (?:(?!\1).)*?
26+ [/\\]
27+ (?:(?!\1).)*?
28+ """ + ext_pattern + r"""
29+ )
30+ \1
31+ """ , re .VERBOSE ),
32+ re .compile (r"\b([\w\-.]+\.(?:pdf|docx|xlsx|txt|csv|json|xml|log|py|js|html|css|zip|tar|gz|jpg|jpeg|png|gif|mp4|mov))\b" , re .IGNORECASE )
2833 ]
2934
3035 def extract_files (self , prompt : str ) -> Tuple [str , List [str ]]:
3136 """Extract valid file paths from prompt, returning cleaned prompt and file list."""
32- found_files = []
37+ found_files_set = set ()
3338 cleaned_prompt = prompt
3439
3540 for pattern in self .patterns :
3641 matches = list (pattern .finditer (cleaned_prompt ))
3742
3843 for match in reversed (matches ):
39- path_str = match .group (1 ) if match .groups () else match .group (0 )
40- if path_str .startswith ("file://" ):
41- path_str = path_str [7 :]
44+ if len (match .groups ()) > 1 :
45+ path_str = match .group (2 )
46+ elif match .groups ():
47+ path_str = match .group (1 )
48+ else :
49+ continue
4250
4351 if self ._is_like_file_path (path_str ):
44- expanded_path = Path (path_str ).expanduser ().resolve ()
45-
46- if expanded_path .exists () and expanded_path .is_file ():
47- if str (expanded_path ) not in found_files :
48- found_files .append (str (expanded_path ))
49- cleaned_prompt = (
50- cleaned_prompt [: match .start ()]
51- + cleaned_prompt [match .end () :]
52- )
53- else :
52+ cleaned_prompt = (
53+ cleaned_prompt [: match .start ()]
54+ + cleaned_prompt [match .end () :]
55+ )
56+
57+ try :
58+ expanded_path = Path (path_str ).expanduser ().resolve ()
59+ except (RuntimeError , ValueError ):
60+ self .interface .show_warning (f"Could not resolve path: { path_str } " )
61+ continue
62+
63+ found_files_set .add (str (expanded_path ))
64+ if not expanded_path .exists () or not expanded_path .is_file ():
5465 self .interface .show_warning (f"File not found: { path_str } " )
5566
5667 cleaned_prompt = re .sub (r"\s+" , " " , cleaned_prompt ).strip ()
57- return cleaned_prompt , found_files
68+ return cleaned_prompt , sorted ( list ( found_files_set ))
5869
5970 def _is_like_file_path (self , path_str : str ) -> bool :
6071 """Check if string looks like a valid file path."""
@@ -63,17 +74,12 @@ def _is_like_file_path(self, path_str: str) -> bool:
6374
6475 if any (char in path_str for char in ["<" , ">" , "|" , "*" , "?" , "\n " , "\r " ]):
6576 return False
66- if "://" in path_str and not path_str .startswith ("file://" ):
77+
78+ if "://" in path_str and not path_str .lower ().startswith ("file://" ):
6779 return False
68- if " " in path_str and not (
69- path_str .startswith ('"' ) or path_str .startswith ("'" )
70- ):
71- words = path_str .split ()
72- if len (words ) > 3 :
73- return False
7480
7581 try :
76- Path (path_str .replace ("file://" , "" ))
82+ Path (path_str .replace ("file://" , "" , 1 ))
7783 return True
7884 except (ValueError , OSError ):
7985 return False
0 commit comments