ck_engine/
lib.rs

1use anyhow::Result;
2use ck_core::{CkError, IncludePattern, SearchMode, SearchOptions, SearchResult, Span};
3use globset::{Glob, GlobSet, GlobSetBuilder};
4use rayon::prelude::*;
5use regex::{Regex, RegexBuilder};
6use std::collections::HashMap;
7use std::fs;
8use std::path::PathBuf as StdPathBuf;
9use std::path::{Path, PathBuf};
10use tantivy::collector::TopDocs;
11use tantivy::query::QueryParser;
12use tantivy::schema::{STORED, Schema, TEXT, Value};
13use tantivy::{Index, ReloadPolicy, TantivyDocument, doc};
14use walkdir::WalkDir;
15
16mod semantic_v3;
17pub use semantic_v3::{semantic_search_v3, semantic_search_v3_with_progress};
18
19pub type SearchProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
20pub type IndexingProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
21pub type DetailedIndexingProgressCallback = Box<dyn Fn(ck_index::EmbeddingProgress) + Send + Sync>;
22
23/// Resolve the actual file path to read content from
24/// For PDFs: returns cache path and validates it exists
25/// For regular files: returns original path
26fn resolve_content_path(file_path: &Path, repo_root: &Path) -> Result<PathBuf> {
27    if ck_core::pdf::is_pdf_file(file_path) {
28        // PDFs: Read from cached extracted text
29        let cache_path = ck_core::pdf::get_content_cache_path(repo_root, file_path);
30        if !cache_path.exists() {
31            return Err(anyhow::anyhow!(
32                "PDF not preprocessed. Run 'ck --index' first."
33            ));
34        }
35        Ok(cache_path)
36    } else {
37        // Regular files: Read from original source
38        Ok(file_path.to_path_buf())
39    }
40}
41
42/// Read content from file for search result extraction
43/// Regular files: read directly from source
44/// PDFs: read from preprocessed cache
45fn read_file_content(file_path: &Path, repo_root: &Path) -> Result<String> {
46    let content_path = resolve_content_path(file_path, repo_root)?;
47    Ok(fs::read_to_string(content_path)?)
48}
49
50/// Extract content from a file using a span (streaming version)
51async fn extract_content_from_span(file_path: &Path, span: &ck_core::Span) -> Result<String> {
52    // Find repo root to locate cache
53    let repo_root = find_nearest_index_root(file_path)
54        .unwrap_or_else(|| file_path.parent().unwrap_or(file_path).to_path_buf());
55
56    // Use centralized path resolution
57    let content_path = resolve_content_path(file_path, &repo_root)?;
58
59    // Stream only the needed lines
60    extract_lines_from_file(&content_path, span.line_start, span.line_end)
61}
62
63/// Stream-read specific lines from a file without loading the entire content
64fn extract_lines_from_file(file_path: &Path, line_start: usize, line_end: usize) -> Result<String> {
65    use std::io::{BufRead, BufReader};
66
67    if line_start == 0 {
68        return Ok(String::new());
69    }
70
71    let file = fs::File::open(file_path)?;
72    let reader = BufReader::new(file);
73    let mut result = Vec::new();
74
75    // Convert to 0-based indexing
76    let start_idx = line_start.saturating_sub(1);
77    let end_idx = line_end.saturating_sub(1);
78
79    for (current_line, line_result) in reader.lines().enumerate() {
80        if current_line > end_idx {
81            break; // Stop reading once we've passed the needed lines
82        }
83
84        let line = line_result?;
85
86        if current_line >= start_idx {
87            result.push(line);
88        }
89    }
90
91    // Handle case where requested lines exceed file length
92    if result.is_empty() && line_start > 0 {
93        return Ok(String::new());
94    }
95
96    Ok(result.join("\n"))
97}
98
99/// Split content into lines while preserving the exact number of trailing newline bytes per line.
100/// Handles Unix (\n), Windows (\r\n) and old Mac (\r) line endings.
101fn split_lines_with_endings(content: &str) -> (Vec<String>, Vec<usize>) {
102    let mut lines = Vec::new();
103    let mut endings = Vec::new();
104
105    let bytes = content.as_bytes();
106    let mut start = 0usize;
107    let mut i = 0usize;
108
109    while i < bytes.len() {
110        match bytes[i] {
111            b'\n' => {
112                lines.push(content[start..i].to_string());
113                endings.push(1);
114                i += 1;
115                start = i;
116            }
117            b'\r' => {
118                lines.push(content[start..i].to_string());
119                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
120                    endings.push(2);
121                    i += 2;
122                } else {
123                    endings.push(1);
124                    i += 1;
125                }
126                start = i;
127            }
128            _ => {
129                i += 1;
130            }
131        }
132    }
133
134    if start < bytes.len() {
135        lines.push(content[start..].to_string());
136        endings.push(0);
137    }
138
139    (lines, endings)
140}
141
142fn canonicalize_for_matching(path: &Path) -> PathBuf {
143    if let Ok(canonical) = path.canonicalize() {
144        return canonical;
145    }
146
147    if path.is_absolute() {
148        path.to_path_buf()
149    } else {
150        std::env::current_dir()
151            .map(|cwd| cwd.join(path))
152            .unwrap_or_else(|_| path.to_path_buf())
153    }
154}
155
156fn path_matches_include(path: &Path, include_patterns: &[IncludePattern]) -> bool {
157    if include_patterns.is_empty() {
158        return true;
159    }
160
161    let candidate = canonicalize_for_matching(path);
162    include_patterns.iter().any(|pattern| {
163        if pattern.is_dir {
164            candidate.starts_with(&pattern.path)
165        } else {
166            candidate == pattern.path
167        }
168    })
169}
170
171fn filter_files_by_include(
172    files: Vec<PathBuf>,
173    include_patterns: &[IncludePattern],
174) -> Vec<PathBuf> {
175    if include_patterns.is_empty() {
176        return files;
177    }
178
179    files
180        .into_iter()
181        .filter(|path| path_matches_include(path, include_patterns))
182        .collect()
183}
184
185fn find_nearest_index_root(path: &Path) -> Option<StdPathBuf> {
186    let mut current = if path.is_file() {
187        path.parent().unwrap_or(path)
188    } else {
189        path
190    };
191    loop {
192        if current.join(".ck").exists() {
193            return Some(current.to_path_buf());
194        }
195        match current.parent() {
196            Some(parent) => current = parent,
197            None => return None,
198        }
199    }
200}
201
202#[derive(Clone, Debug)]
203pub struct ResolvedModel {
204    pub canonical_name: String,
205    pub alias: String,
206    pub dimensions: usize,
207}
208
209fn find_model_entry<'a>(
210    registry: &'a ck_models::ModelRegistry,
211    key: &str,
212) -> Option<(String, &'a ck_models::ModelConfig)> {
213    if let Some(config) = registry.get_model(key) {
214        return Some((key.to_string(), config));
215    }
216
217    registry
218        .models
219        .iter()
220        .find(|(_, config)| config.name == key)
221        .map(|(alias, config)| (alias.clone(), config))
222}
223
224pub(crate) fn resolve_model_from_root(
225    index_root: &Path,
226    cli_model: Option<&str>,
227) -> Result<ResolvedModel> {
228    use ck_models::ModelRegistry;
229
230    let registry = ModelRegistry::default();
231    let index_dir = index_root.join(".ck");
232    let manifest_path = index_dir.join("manifest.json");
233
234    if manifest_path.exists() {
235        let data = std::fs::read(&manifest_path)?;
236        let manifest: ck_index::IndexManifest = serde_json::from_slice(&data)?;
237
238        if let Some(existing_model) = manifest.embedding_model {
239            let (alias, config_opt) = find_model_entry(&registry, &existing_model)
240                .map(|(alias, config)| (alias, Some(config)))
241                .unwrap_or_else(|| (existing_model.clone(), None));
242
243            let dims = manifest
244                .embedding_dimensions
245                .or_else(|| config_opt.map(|c| c.dimensions))
246                .unwrap_or(384);
247
248            if let Some(requested) = cli_model {
249                let (_, requested_config) =
250                    find_model_entry(&registry, requested).ok_or_else(|| {
251                        CkError::Embedding(format!(
252                            "Unknown model '{}'. Available models: {}",
253                            requested,
254                            registry
255                                .models
256                                .keys()
257                                .cloned()
258                                .collect::<Vec<_>>()
259                                .join(", ")
260                        ))
261                    })?;
262
263                if requested_config.name != existing_model {
264                    let suggested_alias = alias.clone();
265                    return Err(CkError::Embedding(format!(
266                        "Index was built with embedding model '{}' (alias '{}'), but '--model {}' was requested. To switch models run `ck --clean .` then `ck --index --model {}`. To keep using this index rerun your command with '--model {}'.",
267                        existing_model,
268                        suggested_alias,
269                        requested,
270                        requested,
271                        suggested_alias
272                    ))
273                    .into());
274                }
275            }
276
277            return Ok(ResolvedModel {
278                canonical_name: existing_model,
279                alias,
280                dimensions: dims,
281            });
282        }
283    }
284
285    let (alias, config) = if let Some(requested) = cli_model {
286        find_model_entry(&registry, requested).ok_or_else(|| {
287            CkError::Embedding(format!(
288                "Unknown model '{}'. Available models: {}",
289                requested,
290                registry
291                    .models
292                    .keys()
293                    .cloned()
294                    .collect::<Vec<_>>()
295                    .join(", ")
296            ))
297        })?
298    } else {
299        let alias = registry.default_model.clone();
300        let config = registry.get_default_model().ok_or_else(|| {
301            CkError::Embedding("No default embedding model configured".to_string())
302        })?;
303        (alias, config)
304    };
305
306    Ok(ResolvedModel {
307        canonical_name: config.name.clone(),
308        alias,
309        dimensions: config.dimensions,
310    })
311}
312
313pub fn resolve_model_for_path(path: &Path, cli_model: Option<&str>) -> Result<ResolvedModel> {
314    let index_root = find_nearest_index_root(path).unwrap_or_else(|| {
315        if path.is_file() {
316            path.parent().unwrap_or(path).to_path_buf()
317        } else {
318            path.to_path_buf()
319        }
320    });
321    resolve_model_from_root(&index_root, cli_model)
322}
323
324pub async fn search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
325    let results = search_enhanced(options).await?;
326    Ok(results.matches)
327}
328
329pub async fn search_with_progress(
330    options: &SearchOptions,
331    progress_callback: Option<SearchProgressCallback>,
332) -> Result<Vec<SearchResult>> {
333    let results = search_enhanced_with_progress(options, progress_callback).await?;
334    Ok(results.matches)
335}
336
337/// Enhanced search that includes near-miss information for threshold queries
338pub async fn search_enhanced(options: &SearchOptions) -> Result<ck_core::SearchResults> {
339    search_enhanced_with_progress(options, None).await
340}
341
342/// Enhanced search with progress callback that includes near-miss information
343pub async fn search_enhanced_with_progress(
344    options: &SearchOptions,
345    progress_callback: Option<SearchProgressCallback>,
346) -> Result<ck_core::SearchResults> {
347    search_enhanced_with_indexing_progress(options, progress_callback, None, None).await
348}
349
350/// Enhanced search with both search and indexing progress callbacks
351pub async fn search_enhanced_with_indexing_progress(
352    options: &SearchOptions,
353    progress_callback: Option<SearchProgressCallback>,
354    indexing_progress_callback: Option<IndexingProgressCallback>,
355    detailed_indexing_progress_callback: Option<DetailedIndexingProgressCallback>,
356) -> Result<ck_core::SearchResults> {
357    // Validate that the search path exists
358    if !options.path.exists() {
359        return Err(ck_core::CkError::Search(format!(
360            "Path does not exist: {}",
361            options.path.display()
362        ))
363        .into());
364    }
365
366    // Auto-update index if needed (unless it's regex-only mode)
367    if !matches!(options.mode, SearchMode::Regex) {
368        let need_embeddings = matches!(options.mode, SearchMode::Semantic | SearchMode::Hybrid);
369        ensure_index_updated_with_progress(
370            &options.path,
371            options.reindex,
372            need_embeddings,
373            indexing_progress_callback,
374            detailed_indexing_progress_callback,
375            options.respect_gitignore,
376            &options.exclude_patterns,
377            options.embedding_model.as_deref(),
378        )
379        .await?;
380    }
381
382    let search_results = match options.mode {
383        SearchMode::Regex => {
384            let matches = regex_search(options)?;
385            ck_core::SearchResults {
386                matches,
387                closest_below_threshold: None,
388            }
389        }
390        SearchMode::Lexical => {
391            let matches = lexical_search(options).await?;
392            ck_core::SearchResults {
393                matches,
394                closest_below_threshold: None,
395            }
396        }
397        SearchMode::Semantic => {
398            // Use v3 semantic search (reads pre-computed embeddings from sidecars using spans)
399            semantic_search_v3_with_progress(options, progress_callback).await?
400        }
401        SearchMode::Hybrid => {
402            let matches = hybrid_search_with_progress(options, progress_callback).await?;
403            ck_core::SearchResults {
404                matches,
405                closest_below_threshold: None,
406            }
407        }
408    };
409
410    Ok(search_results)
411}
412
413fn regex_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
414    let pattern = if options.fixed_string {
415        regex::escape(&options.query)
416    } else if options.whole_word {
417        format!(r"\b{}\b", regex::escape(&options.query))
418    } else {
419        options.query.clone()
420    };
421
422    let regex = RegexBuilder::new(&pattern)
423        .case_insensitive(options.case_insensitive)
424        .build()
425        .map_err(CkError::Regex)?;
426
427    // Default to recursive for directories (like grep) to maintain compatibility
428    let should_recurse = options.path.is_dir() || options.recursive;
429    let files = if should_recurse {
430        // Use ck_index's collect_files which respects gitignore
431        let collected = ck_index::collect_files(
432            &options.path,
433            options.respect_gitignore,
434            &options.exclude_patterns,
435        )?;
436        filter_files_by_include(collected, &options.include_patterns)
437    } else {
438        // For non-recursive, use the local collect_files
439        let collected = collect_files(&options.path, should_recurse, &options.exclude_patterns)?;
440        filter_files_by_include(collected, &options.include_patterns)
441    };
442
443    let results: Vec<Vec<SearchResult>> = files
444        .par_iter()
445        .filter_map(|file_path| match search_file(&regex, file_path, options) {
446            Ok(matches) => {
447                if matches.is_empty() {
448                    None
449                } else {
450                    Some(matches)
451                }
452            }
453            Err(e) => {
454                tracing::debug!("Error searching {:?}: {}", file_path, e);
455                None
456            }
457        })
458        .collect();
459
460    let mut all_results: Vec<SearchResult> = results.into_iter().flatten().collect();
461    // Deterministic ordering: file path, then line number
462    all_results.sort_by(|a, b| {
463        let path_cmp = a.file.cmp(&b.file);
464        if path_cmp != std::cmp::Ordering::Equal {
465            return path_cmp;
466        }
467        a.span.line_start.cmp(&b.span.line_start)
468    });
469
470    if let Some(top_k) = options.top_k {
471        all_results.truncate(top_k);
472    }
473
474    Ok(all_results)
475}
476
477fn search_file(
478    regex: &Regex,
479    file_path: &Path,
480    options: &SearchOptions,
481) -> Result<Vec<SearchResult>> {
482    // Find repo root to locate cache
483    let repo_root = find_nearest_index_root(file_path)
484        .unwrap_or_else(|| file_path.parent().unwrap_or(file_path).to_path_buf());
485
486    // For full_section mode, we need the entire content for parsing
487    // For context previews, we need all lines for surrounding context
488    // So we'll load content when needed, but optimize for the common case
489    if options.full_section || options.context_lines > 0 {
490        // Load full content when we need section parsing or context
491        let content = read_file_content(file_path, &repo_root)?;
492        let (lines, line_ending_lengths) = split_lines_with_endings(&content);
493
494        // If full_section is enabled, try to parse the file and find code sections
495        let code_sections = if options.full_section {
496            extract_code_sections(file_path, &content)
497        } else {
498            None
499        };
500
501        search_file_in_memory(
502            regex,
503            file_path,
504            options,
505            &lines,
506            &code_sections,
507            &line_ending_lengths,
508        )
509    } else {
510        // Streaming search (simple case)
511        search_file_streaming(regex, file_path, &repo_root, options)
512    }
513}
514
515/// In-memory search for cases requiring context or code sections
516fn search_file_in_memory(
517    regex: &Regex,
518    file_path: &Path,
519    options: &SearchOptions,
520    lines: &[String],
521    code_sections: &Option<Vec<(usize, usize, String)>>,
522    line_ending_lengths: &[usize],
523) -> Result<Vec<SearchResult>> {
524    let mut results = Vec::new();
525    let mut byte_offset = 0;
526
527    for (line_idx, line) in lines.iter().enumerate() {
528        let line_number = line_idx + 1;
529
530        // Special handling for empty pattern - match the entire line once
531        // An empty regex pattern will match at every position, so we need to handle it specially
532        if regex.as_str().is_empty() {
533            // Empty pattern matches the whole line once (grep compatibility)
534            let preview = if options.full_section {
535                // Try to find the containing code section
536                if let Some(sections) = code_sections {
537                    if let Some(section) = find_containing_section(sections, line_idx) {
538                        section.clone()
539                    } else {
540                        // Fall back to context lines if no section found
541                        get_context_preview(lines, line_idx, options)
542                    }
543                } else {
544                    get_context_preview(lines, line_idx, options)
545                }
546            } else {
547                get_context_preview(lines, line_idx, options)
548            };
549
550            results.push(SearchResult {
551                file: file_path.to_path_buf(),
552                span: Span {
553                    byte_start: byte_offset,
554                    byte_end: byte_offset + line.len(),
555                    line_start: line_number,
556                    line_end: line_number,
557                },
558                score: 1.0,
559                preview,
560                lang: ck_core::Language::from_path(file_path),
561                symbol: None,
562                chunk_hash: None,
563                index_epoch: None,
564            });
565        } else {
566            // Find all matches in the line with their positions
567            for mat in regex.find_iter(line) {
568                let preview = if options.full_section {
569                    // Try to find the containing code section
570                    if let Some(sections) = code_sections {
571                        if let Some(section) = find_containing_section(sections, line_idx) {
572                            section.clone()
573                        } else {
574                            // Fall back to context lines if no section found
575                            get_context_preview(lines, line_idx, options)
576                        }
577                    } else {
578                        get_context_preview(lines, line_idx, options)
579                    }
580                } else {
581                    get_context_preview(lines, line_idx, options)
582                };
583
584                results.push(SearchResult {
585                    file: file_path.to_path_buf(),
586                    span: Span {
587                        byte_start: byte_offset + mat.start(),
588                        byte_end: byte_offset + mat.end(),
589                        line_start: line_number,
590                        line_end: line_number,
591                    },
592                    score: 1.0,
593                    preview,
594                    lang: ck_core::Language::from_path(file_path),
595                    symbol: None,
596                    chunk_hash: None,
597                    index_epoch: None,
598                });
599            }
600        }
601
602        // Update byte offset for next line (add line length + actual line ending length)
603        byte_offset += line.len();
604        byte_offset += line_ending_lengths.get(line_idx).copied().unwrap_or(0);
605    }
606
607    Ok(results)
608}
609
610/// Streaming search for simple cases without context or code sections
611fn search_file_streaming(
612    regex: &Regex,
613    file_path: &Path,
614    repo_root: &Path,
615    _options: &SearchOptions,
616) -> Result<Vec<SearchResult>> {
617    use std::io::{BufRead, BufReader};
618
619    let content_path = resolve_content_path(file_path, repo_root)?;
620    let file = std::fs::File::open(&content_path)?;
621    let mut reader = BufReader::new(file);
622
623    let mut results = Vec::new();
624    let mut line = String::new();
625    let mut byte_offset = 0usize;
626    let mut line_number = 1usize;
627
628    loop {
629        line.clear();
630        let bytes_read = reader.read_line(&mut line)?;
631        if bytes_read == 0 {
632            break;
633        }
634
635        // Determine the length of the trailing line ending (if any) and
636        // normalise the line buffer so it no longer contains newline bytes.
637        let mut newline_len = 0usize;
638        if line.ends_with("\r\n") {
639            line.pop(); // remove \n
640            line.pop(); // remove \r
641            newline_len = 2;
642        } else if line.ends_with(['\n', '\r']) {
643            line.pop();
644            newline_len = 1;
645        }
646
647        // Old Mac-style files may use bare carriage returns as separators.
648        // When the trimmed line still contains '\r' characters, treat them as
649        // record separators so the byte offsets remain accurate.
650        let treat_cr_as_newline = line.contains('\r');
651
652        if treat_cr_as_newline {
653            let bytes = line.as_bytes();
654            let mut segment_start = 0usize;
655            while segment_start <= bytes.len() {
656                match bytes[segment_start..].iter().position(|&b| b == b'\r') {
657                    Some(rel_idx) => {
658                        let idx = segment_start + rel_idx;
659                        let segment_bytes = &bytes[segment_start..idx];
660                        let segment_str = std::str::from_utf8(segment_bytes)?;
661                        process_streaming_line(
662                            regex,
663                            file_path,
664                            segment_str,
665                            line_number,
666                            byte_offset,
667                            &mut results,
668                        );
669                        byte_offset += segment_bytes.len() + 1; // account for \r
670                        line_number += 1;
671                        segment_start = idx + 1;
672                    }
673                    None => {
674                        let segment_bytes = &bytes[segment_start..];
675                        let segment_str = std::str::from_utf8(segment_bytes)?;
676                        process_streaming_line(
677                            regex,
678                            file_path,
679                            segment_str,
680                            line_number,
681                            byte_offset,
682                            &mut results,
683                        );
684                        byte_offset += segment_bytes.len();
685                        line_number += 1;
686                        break;
687                    }
688                }
689            }
690            byte_offset += newline_len;
691        } else {
692            let line_str = line.as_str();
693            process_streaming_line(
694                regex,
695                file_path,
696                line_str,
697                line_number,
698                byte_offset,
699                &mut results,
700            );
701            byte_offset += line_str.len() + newline_len;
702            line_number += 1;
703        }
704    }
705
706    Ok(results)
707}
708
709fn process_streaming_line(
710    regex: &Regex,
711    file_path: &Path,
712    line: &str,
713    line_number: usize,
714    byte_offset: usize,
715    results: &mut Vec<SearchResult>,
716) {
717    if regex.as_str().is_empty() {
718        results.push(SearchResult {
719            file: file_path.to_path_buf(),
720            span: Span {
721                byte_start: byte_offset,
722                byte_end: byte_offset + line.len(),
723                line_start: line_number,
724                line_end: line_number,
725            },
726            score: 1.0,
727            preview: line.to_string(),
728            lang: ck_core::Language::from_path(file_path),
729            symbol: None,
730            chunk_hash: None,
731            index_epoch: None,
732        });
733    } else {
734        for mat in regex.find_iter(line) {
735            results.push(SearchResult {
736                file: file_path.to_path_buf(),
737                span: Span {
738                    byte_start: byte_offset + mat.start(),
739                    byte_end: byte_offset + mat.end(),
740                    line_start: line_number,
741                    line_end: line_number,
742                },
743                score: 1.0,
744                preview: line.to_string(),
745                lang: ck_core::Language::from_path(file_path),
746                symbol: None,
747                chunk_hash: None,
748                index_epoch: None,
749            });
750        }
751    }
752}
753
754async fn lexical_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
755    // Handle both files and directories and reuse nearest existing .ck index up the tree
756    let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
757        if options.path.is_file() {
758            options.path.parent().unwrap_or(&options.path).to_path_buf()
759        } else {
760            options.path.clone()
761        }
762    });
763
764    let index_dir = index_root.join(".ck");
765    if !index_dir.exists() {
766        return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
767    }
768
769    let tantivy_index_path = index_dir.join("tantivy_index");
770
771    if !tantivy_index_path.exists() {
772        return build_tantivy_index(options).await;
773    }
774
775    let mut schema_builder = Schema::builder();
776    let content_field = schema_builder.add_text_field("content", TEXT | STORED);
777    let path_field = schema_builder.add_text_field("path", TEXT | STORED);
778    let _schema = schema_builder.build();
779
780    let index = Index::open_in_dir(&tantivy_index_path)
781        .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
782
783    let reader = index
784        .reader_builder()
785        .reload_policy(ReloadPolicy::OnCommitWithDelay)
786        .try_into()
787        .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
788
789    let searcher = reader.searcher();
790    let query_parser = QueryParser::for_index(&index, vec![content_field]);
791
792    let query = query_parser
793        .parse_query(&options.query)
794        .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
795
796    let top_docs = if let Some(top_k) = options.top_k {
797        searcher.search(&query, &TopDocs::with_limit(top_k))?
798    } else {
799        searcher.search(&query, &TopDocs::with_limit(100))?
800    };
801
802    // First, collect all results with raw scores
803    let mut raw_results = Vec::new();
804    for (_score, doc_address) in top_docs {
805        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
806        let path_text = retrieved_doc
807            .get_first(path_field)
808            .map(|field_value| field_value.as_str().unwrap_or(""))
809            .unwrap_or("");
810        let content_text = retrieved_doc
811            .get_first(content_field)
812            .map(|field_value| field_value.as_str().unwrap_or(""))
813            .unwrap_or("");
814
815        let file_path = PathBuf::from(path_text);
816        if !path_matches_include(&file_path, &options.include_patterns) {
817            continue;
818        }
819        let preview = if options.full_section {
820            content_text.to_string()
821        } else {
822            content_text.lines().take(3).collect::<Vec<_>>().join("\n")
823        };
824
825        raw_results.push((
826            _score,
827            SearchResult {
828                file: file_path,
829                span: Span {
830                    byte_start: 0,
831                    byte_end: content_text.len(),
832                    line_start: 1,
833                    line_end: content_text.lines().count(),
834                },
835                score: _score,
836                preview,
837                lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
838                symbol: None,
839                chunk_hash: None,
840                index_epoch: None,
841            },
842        ));
843    }
844
845    // Normalize scores to 0-1 range and apply threshold
846    let mut results = Vec::new();
847    if !raw_results.is_empty() {
848        let max_score = raw_results
849            .iter()
850            .map(|(score, _)| *score)
851            .fold(0.0f32, f32::max);
852        if max_score > 0.0 {
853            for (raw_score, mut result) in raw_results {
854                let normalized_score = raw_score / max_score;
855
856                // Apply threshold filtering with normalized score
857                if let Some(threshold) = options.threshold
858                    && normalized_score < threshold
859                {
860                    continue;
861                }
862
863                result.score = normalized_score;
864                results.push(result);
865            }
866        }
867    }
868
869    Ok(results)
870}
871
872async fn build_tantivy_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
873    // Handle both files and directories by finding the appropriate directory for indexing
874    let index_root = if options.path.is_file() {
875        options.path.parent().unwrap_or(&options.path)
876    } else {
877        &options.path
878    };
879
880    let index_dir = index_root.join(".ck");
881    let tantivy_index_path = index_dir.join("tantivy_index");
882
883    fs::create_dir_all(&tantivy_index_path)?;
884
885    let mut schema_builder = Schema::builder();
886    let content_field = schema_builder.add_text_field("content", TEXT | STORED);
887    let path_field = schema_builder.add_text_field("path", TEXT | STORED);
888    let schema = schema_builder.build();
889
890    let index = Index::create_in_dir(&tantivy_index_path, schema.clone())
891        .map_err(|e| CkError::Index(format!("Failed to create tantivy index: {}", e)))?;
892
893    let mut index_writer = index
894        .writer(50_000_000)
895        .map_err(|e| CkError::Index(format!("Failed to create index writer: {}", e)))?;
896
897    let files = filter_files_by_include(
898        collect_files(index_root, true, &options.exclude_patterns)?,
899        &options.include_patterns,
900    );
901
902    for file_path in &files {
903        if let Ok(content) = fs::read_to_string(file_path) {
904            let doc = doc!(
905                content_field => content,
906                path_field => file_path.display().to_string()
907            );
908            index_writer.add_document(doc)?;
909        }
910    }
911
912    index_writer
913        .commit()
914        .map_err(|e| CkError::Index(format!("Failed to commit index: {}", e)))?;
915
916    // After building, search again with the same options
917    let tantivy_index_path = index_root.join(".ck").join("tantivy_index");
918    let mut schema_builder = Schema::builder();
919    let content_field = schema_builder.add_text_field("content", TEXT | STORED);
920    let path_field = schema_builder.add_text_field("path", TEXT | STORED);
921    let _schema = schema_builder.build();
922
923    let index = Index::open_in_dir(&tantivy_index_path)
924        .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
925
926    let reader = index
927        .reader_builder()
928        .reload_policy(ReloadPolicy::OnCommitWithDelay)
929        .try_into()
930        .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
931
932    let searcher = reader.searcher();
933    let query_parser = QueryParser::for_index(&index, vec![content_field]);
934
935    let query = query_parser
936        .parse_query(&options.query)
937        .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
938
939    let top_docs = if let Some(top_k) = options.top_k {
940        searcher.search(&query, &TopDocs::with_limit(top_k))?
941    } else {
942        searcher.search(&query, &TopDocs::with_limit(100))?
943    };
944
945    // First, collect all results with raw scores
946    let mut raw_results = Vec::new();
947    for (_score, doc_address) in top_docs {
948        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
949        let path_text = retrieved_doc
950            .get_first(path_field)
951            .map(|field_value| field_value.as_str().unwrap_or(""))
952            .unwrap_or("");
953        let content_text = retrieved_doc
954            .get_first(content_field)
955            .map(|field_value| field_value.as_str().unwrap_or(""))
956            .unwrap_or("");
957
958        let file_path = PathBuf::from(path_text);
959        let preview = if options.full_section {
960            content_text.to_string()
961        } else {
962            content_text.lines().take(3).collect::<Vec<_>>().join("\n")
963        };
964
965        raw_results.push((
966            _score,
967            SearchResult {
968                file: file_path,
969                span: Span {
970                    byte_start: 0,
971                    byte_end: content_text.len(),
972                    line_start: 1,
973                    line_end: content_text.lines().count(),
974                },
975                score: _score,
976                preview,
977                lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
978                symbol: None,
979                chunk_hash: None,
980                index_epoch: None,
981            },
982        ));
983    }
984
985    // Normalize scores to 0-1 range and apply threshold
986    let mut results = Vec::new();
987    if !raw_results.is_empty() {
988        let max_score = raw_results
989            .iter()
990            .map(|(score, _)| *score)
991            .fold(0.0f32, f32::max);
992        if max_score > 0.0 {
993            for (raw_score, mut result) in raw_results {
994                let normalized_score = raw_score / max_score;
995
996                // Apply threshold filtering with normalized score
997                if let Some(threshold) = options.threshold
998                    && normalized_score < threshold
999                {
1000                    continue;
1001                }
1002
1003                result.score = normalized_score;
1004                results.push(result);
1005            }
1006        }
1007    }
1008
1009    Ok(results)
1010}
1011
1012#[allow(dead_code)]
1013async fn hybrid_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
1014    hybrid_search_with_progress(options, None).await
1015}
1016
1017async fn hybrid_search_with_progress(
1018    options: &SearchOptions,
1019    progress_callback: Option<SearchProgressCallback>,
1020) -> Result<Vec<SearchResult>> {
1021    if let Some(ref callback) = progress_callback {
1022        callback("Running regex search...");
1023    }
1024    let regex_results = regex_search(options)?;
1025
1026    if let Some(ref callback) = progress_callback {
1027        callback("Running semantic search...");
1028    }
1029    let semantic_results = semantic_search_v3_with_progress(options, progress_callback).await?;
1030
1031    let mut combined = HashMap::new();
1032
1033    for (rank, result) in regex_results.iter().enumerate() {
1034        let key = format!("{}:{}", result.file.display(), result.span.line_start);
1035        combined
1036            .entry(key)
1037            .or_insert(Vec::new())
1038            .push((rank + 1, result.clone()));
1039    }
1040
1041    for (rank, result) in semantic_results.matches.iter().enumerate() {
1042        let key = format!("{}:{}", result.file.display(), result.span.line_start);
1043        combined
1044            .entry(key)
1045            .or_insert(Vec::new())
1046            .push((rank + 1, result.clone()));
1047    }
1048
1049    // Calculate RRF scores according to original paper: RRFscore(d) = Σ(r∈R) 1/(k + r(d))
1050    let mut rrf_results: Vec<SearchResult> = combined
1051        .into_values()
1052        .map(|ranks| {
1053            let mut result = ranks[0].1.clone();
1054            let rrf_score = ranks
1055                .iter()
1056                .map(|(rank, _)| 1.0 / (60.0 + *rank as f32))
1057                .sum();
1058            result.score = rrf_score;
1059            result
1060        })
1061        .filter(|result| {
1062            // Apply threshold filtering to raw RRF scores
1063            if let Some(threshold) = options.threshold {
1064                result.score >= threshold
1065            } else {
1066                true
1067            }
1068        })
1069        .collect();
1070
1071    rrf_results.retain(|result| path_matches_include(&result.file, &options.include_patterns));
1072
1073    // Sort by RRF score (highest first)
1074    rrf_results.sort_by(|a, b| {
1075        b.score
1076            .partial_cmp(&a.score)
1077            .unwrap_or(std::cmp::Ordering::Equal)
1078    });
1079
1080    if let Some(top_k) = options.top_k {
1081        rrf_results.truncate(top_k);
1082    }
1083
1084    Ok(rrf_results)
1085}
1086
1087fn build_globset(patterns: &[String]) -> GlobSet {
1088    let mut builder = GlobSetBuilder::new();
1089    for pat in patterns {
1090        // Treat patterns as filename or directory globs
1091        if let Ok(glob) = Glob::new(pat) {
1092            builder.add(glob);
1093        }
1094    }
1095    builder.build().unwrap_or_else(|_| GlobSet::empty())
1096}
1097
1098fn should_exclude_path(path: &Path, globset: &GlobSet) -> bool {
1099    // Match against each path component and the full path
1100    if globset.is_match(path) {
1101        return true;
1102    }
1103    for component in path.components() {
1104        if let std::path::Component::Normal(name) = component
1105            && globset.is_match(name)
1106        {
1107            return true;
1108        }
1109    }
1110    false
1111}
1112
1113fn collect_files(
1114    path: &Path,
1115    recursive: bool,
1116    exclude_patterns: &[String],
1117) -> Result<Vec<PathBuf>> {
1118    let mut files = Vec::new();
1119    let globset = build_globset(exclude_patterns);
1120
1121    if path.is_file() {
1122        // Always add single files, even if they're excluded (user explicitly requested)
1123        files.push(path.to_path_buf());
1124    } else if recursive {
1125        for entry in WalkDir::new(path).into_iter().filter_entry(|e| {
1126            // Skip excluded directories entirely for efficiency
1127            let name = e.file_name();
1128            !globset.is_match(e.path()) && !globset.is_match(name)
1129        }) {
1130            match entry {
1131                Ok(entry) => {
1132                    if entry.file_type().is_file() && !should_exclude_path(entry.path(), &globset) {
1133                        files.push(entry.path().to_path_buf());
1134                    }
1135                }
1136                Err(e) => {
1137                    // Log directory traversal errors but continue processing
1138                    tracing::debug!("Skipping path due to error: {}", e);
1139                    continue;
1140                }
1141            }
1142        }
1143    } else {
1144        match fs::read_dir(path) {
1145            Ok(read_dir) => {
1146                for entry in read_dir {
1147                    match entry {
1148                        Ok(entry) => {
1149                            let path = entry.path();
1150                            if path.is_file() && !should_exclude_path(&path, &globset) {
1151                                files.push(path);
1152                            }
1153                        }
1154                        Err(e) => {
1155                            tracing::debug!("Skipping directory entry due to error: {}", e);
1156                            continue;
1157                        }
1158                    }
1159                }
1160            }
1161            Err(e) => {
1162                tracing::debug!("Cannot read directory {:?}: {}", path, e);
1163                return Err(e.into());
1164            }
1165        }
1166    }
1167
1168    Ok(files)
1169}
1170
1171#[allow(clippy::too_many_arguments)]
1172async fn ensure_index_updated_with_progress(
1173    path: &Path,
1174    force_reindex: bool,
1175    need_embeddings: bool,
1176    progress_callback: Option<ck_index::ProgressCallback>,
1177    detailed_progress_callback: Option<ck_index::DetailedProgressCallback>,
1178    respect_gitignore: bool,
1179    exclude_patterns: &[String],
1180    model_override: Option<&str>,
1181) -> Result<()> {
1182    // Find index root for .ck directory location
1183    let index_root_buf = find_nearest_index_root(path).unwrap_or_else(|| {
1184        if path.is_file() {
1185            path.parent().unwrap_or(path).to_path_buf()
1186        } else {
1187            path.to_path_buf()
1188        }
1189    });
1190    let index_root = &index_root_buf;
1191
1192    // Pass the original path to indexing function so it can index just that file/directory
1193    // The indexing function will use collect_files() which now handles individual files correctly
1194    if force_reindex {
1195        let stats = ck_index::smart_update_index_with_detailed_progress(
1196            index_root,
1197            true,
1198            progress_callback,
1199            detailed_progress_callback,
1200            need_embeddings,
1201            respect_gitignore,
1202            exclude_patterns, // Use search-specific exclude patterns
1203            model_override,
1204        )
1205        .await?;
1206        if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
1207            tracing::info!(
1208                "Index updated: {} files indexed, {} orphaned files removed",
1209                stats.files_indexed,
1210                stats.orphaned_files_removed
1211            );
1212        }
1213        return Ok(());
1214    }
1215
1216    // For incremental updates with individual files, we need special handling
1217    // to ensure only the specific file is indexed, not the entire directory
1218    if path.is_file() {
1219        // Index just this one file
1220        use ck_index::index_file;
1221        index_file(path, need_embeddings).await?;
1222    } else {
1223        // For directories, use the standard smart update
1224        let stats = ck_index::smart_update_index_with_detailed_progress(
1225            index_root,
1226            false,
1227            progress_callback,
1228            detailed_progress_callback,
1229            need_embeddings,
1230            respect_gitignore,
1231            exclude_patterns,
1232            model_override,
1233        )
1234        .await?;
1235        if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
1236            tracing::info!(
1237                "Index updated: {} files indexed, {} orphaned files removed",
1238                stats.files_indexed,
1239                stats.orphaned_files_removed
1240            );
1241        }
1242    }
1243
1244    Ok(())
1245}
1246
1247fn get_context_preview(lines: &[String], line_idx: usize, options: &SearchOptions) -> String {
1248    let before = options.before_context_lines.max(options.context_lines);
1249    let after = options.after_context_lines.max(options.context_lines);
1250
1251    if before > 0 || after > 0 {
1252        let start_idx = line_idx.saturating_sub(before);
1253        let end_idx = (line_idx + after + 1).min(lines.len());
1254        lines[start_idx..end_idx].join("\n")
1255    } else {
1256        lines[line_idx].to_string()
1257    }
1258}
1259
1260fn extract_code_sections(file_path: &Path, content: &str) -> Option<Vec<(usize, usize, String)>> {
1261    let lang = ck_core::Language::from_path(file_path)?;
1262
1263    // Parse the file with tree-sitter and extract function/class sections
1264    if let Ok(chunks) = ck_chunk::chunk_text(content, Some(lang)) {
1265        let sections: Vec<(usize, usize, String)> = chunks
1266            .into_iter()
1267            .filter(|chunk| {
1268                matches!(
1269                    chunk.chunk_type,
1270                    ck_chunk::ChunkType::Function
1271                        | ck_chunk::ChunkType::Class
1272                        | ck_chunk::ChunkType::Method
1273                )
1274            })
1275            .map(|chunk| {
1276                (
1277                    chunk.span.line_start - 1, // Convert to 0-based index
1278                    chunk.span.line_end - 1,
1279                    chunk.text,
1280                )
1281            })
1282            .collect();
1283
1284        if sections.is_empty() {
1285            None
1286        } else {
1287            Some(sections)
1288        }
1289    } else {
1290        None
1291    }
1292}
1293
1294fn find_containing_section(
1295    sections: &[(usize, usize, String)],
1296    line_idx: usize,
1297) -> Option<&String> {
1298    for (start, end, text) in sections {
1299        if line_idx >= *start && line_idx <= *end {
1300            return Some(text);
1301        }
1302    }
1303    None
1304}
1305
1306#[cfg(test)]
1307mod tests {
1308    use super::*;
1309    use std::fs;
1310    use tempfile::TempDir;
1311
1312    fn create_test_files(dir: &std::path::Path) -> Vec<PathBuf> {
1313        let files = vec![
1314            ("test1.txt", "hello world rust programming"),
1315            ("test2.rs", "fn main() { println!(\"Hello Rust\"); }"),
1316            ("test3.py", "print('Hello Python')"),
1317            ("test4.txt", "machine learning artificial intelligence"),
1318        ];
1319
1320        let mut paths = Vec::new();
1321        for (name, content) in files {
1322            let path = dir.join(name);
1323            fs::write(&path, content).unwrap();
1324            paths.push(path);
1325        }
1326        paths
1327    }
1328
1329    #[test]
1330    fn test_extract_lines_from_file() {
1331        let temp_dir = TempDir::new().unwrap();
1332        let test_file = temp_dir.path().join("test_lines.txt");
1333
1334        // Create a multi-line test file
1335        let content =
1336            "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10";
1337        fs::write(&test_file, content).unwrap();
1338
1339        // Test extracting lines 3-5 (1-based indexing)
1340        let result = extract_lines_from_file(&test_file, 3, 5).unwrap();
1341        assert_eq!(result, "Line 3\nLine 4\nLine 5");
1342
1343        // Test extracting a single line
1344        let result = extract_lines_from_file(&test_file, 7, 7).unwrap();
1345        assert_eq!(result, "Line 7");
1346
1347        // Test extracting from line 8 to end
1348        let result = extract_lines_from_file(&test_file, 8, 100).unwrap();
1349        assert_eq!(result, "Line 8\nLine 9\nLine 10");
1350
1351        // Test line_start == 0 (should return empty)
1352        let result = extract_lines_from_file(&test_file, 0, 5).unwrap();
1353        assert_eq!(result, "");
1354
1355        // Test line_start > file length (should return empty)
1356        let result = extract_lines_from_file(&test_file, 20, 25).unwrap();
1357        assert_eq!(result, "");
1358    }
1359
1360    #[tokio::test]
1361    async fn test_extract_content_from_span() {
1362        let temp_dir = TempDir::new().unwrap();
1363        let test_file = temp_dir.path().join("code.rs");
1364
1365        // Create a multi-line code file
1366        let content = "fn first() {\n    println!(\"First\");\n}\n\nfn second() {\n    println!(\"Second\");\n}\n\nfn third() {\n    println!(\"Third\");\n}";
1367        fs::write(&test_file, content).unwrap();
1368
1369        // Test extracting the second function (lines 5-7)
1370        let span = ck_core::Span {
1371            byte_start: 0, // Not used in line extraction
1372            byte_end: 0,   // Not used in line extraction
1373            line_start: 5,
1374            line_end: 7,
1375        };
1376
1377        let result = extract_content_from_span(&test_file, &span).await.unwrap();
1378        assert_eq!(result, "fn second() {\n    println!(\"Second\");\n}");
1379
1380        // Test extracting a single line
1381        let span = ck_core::Span {
1382            byte_start: 0,
1383            byte_end: 0,
1384            line_start: 2,
1385            line_end: 2,
1386        };
1387
1388        let result = extract_content_from_span(&test_file, &span).await.unwrap();
1389        assert_eq!(result, "    println!(\"First\");");
1390    }
1391
1392    #[test]
1393    fn test_collect_files() {
1394        let temp_dir = TempDir::new().unwrap();
1395        let test_files = create_test_files(temp_dir.path());
1396
1397        // Test non-recursive
1398        let files = collect_files(temp_dir.path(), false, &[]).unwrap();
1399        assert_eq!(files.len(), 4);
1400
1401        // Test recursive
1402        let files = collect_files(temp_dir.path(), true, &[]).unwrap();
1403        assert_eq!(files.len(), 4);
1404
1405        // Test single file
1406        let files = collect_files(&test_files[0], false, &[]).unwrap();
1407        assert_eq!(files.len(), 1);
1408        assert_eq!(files[0], test_files[0]);
1409    }
1410
1411    #[test]
1412    fn test_regex_search() {
1413        let temp_dir = TempDir::new().unwrap();
1414        create_test_files(temp_dir.path());
1415
1416        let options = SearchOptions {
1417            mode: SearchMode::Regex,
1418            query: "rust".to_string(),
1419            path: temp_dir.path().to_path_buf(),
1420            recursive: true,
1421            ..Default::default()
1422        };
1423
1424        let results = regex_search(&options).unwrap();
1425        assert!(!results.is_empty());
1426
1427        // Should find matches in files containing "rust"
1428        let rust_matches: Vec<_> = results
1429            .iter()
1430            .filter(|r| r.preview.to_lowercase().contains("rust"))
1431            .collect();
1432        assert!(!rust_matches.is_empty());
1433    }
1434
1435    #[test]
1436    fn test_regex_search_case_insensitive() {
1437        let temp_dir = TempDir::new().unwrap();
1438        create_test_files(temp_dir.path());
1439
1440        let options = SearchOptions {
1441            mode: SearchMode::Regex,
1442            query: "HELLO".to_string(),
1443            path: temp_dir.path().to_path_buf(),
1444            recursive: true,
1445            case_insensitive: true,
1446            ..Default::default()
1447        };
1448
1449        let results = regex_search(&options).unwrap();
1450        assert!(!results.is_empty());
1451    }
1452
1453    #[test]
1454    fn test_regex_search_fixed_string() {
1455        let temp_dir = TempDir::new().unwrap();
1456        create_test_files(temp_dir.path());
1457
1458        let options = SearchOptions {
1459            mode: SearchMode::Regex,
1460            query: "fn main()".to_string(),
1461            path: temp_dir.path().to_path_buf(),
1462            recursive: true,
1463            fixed_string: true,
1464            ..Default::default()
1465        };
1466
1467        let results = regex_search(&options).unwrap();
1468        assert!(!results.is_empty());
1469    }
1470
1471    #[test]
1472    fn test_regex_search_whole_word() {
1473        let temp_dir = TempDir::new().unwrap();
1474        fs::write(
1475            temp_dir.path().join("word_test.txt"),
1476            "rust rusty rustacean",
1477        )
1478        .unwrap();
1479
1480        let options = SearchOptions {
1481            mode: SearchMode::Regex,
1482            query: "rust".to_string(),
1483            path: temp_dir.path().to_path_buf(),
1484            recursive: true,
1485            whole_word: true,
1486            ..Default::default()
1487        };
1488
1489        let results = regex_search(&options).unwrap();
1490        assert!(!results.is_empty());
1491        // Should only match "rust" as a whole word, not "rusty" or "rustacean"
1492    }
1493
1494    #[test]
1495    fn test_regex_search_top_k() {
1496        let temp_dir = TempDir::new().unwrap();
1497
1498        // Create multiple files with matches
1499        for i in 0..10 {
1500            fs::write(
1501                temp_dir.path().join(format!("file{}.txt", i)),
1502                "test content",
1503            )
1504            .unwrap();
1505        }
1506
1507        let options = SearchOptions {
1508            mode: SearchMode::Regex,
1509            query: "test".to_string(),
1510            path: temp_dir.path().to_path_buf(),
1511            recursive: true,
1512            top_k: Some(5),
1513            ..Default::default()
1514        };
1515
1516        let results = regex_search(&options).unwrap();
1517        assert!(results.len() <= 5);
1518    }
1519
1520    #[test]
1521    fn test_regex_search_span_offsets() {
1522        // Test that span offsets are correctly calculated for multiple matches on a line
1523        let temp_dir = TempDir::new().unwrap();
1524        let test_file = temp_dir.path().join("spans.txt");
1525        fs::write(&test_file, "test test test\nline two test\ntest end").unwrap();
1526
1527        let options = SearchOptions {
1528            mode: SearchMode::Regex,
1529            query: "test".to_string(),
1530            path: test_file.clone(),
1531            recursive: false,
1532            ..Default::default()
1533        };
1534
1535        let results = regex_search(&options).unwrap();
1536
1537        // Should find 5 matches total
1538        assert_eq!(results.len(), 5);
1539
1540        // Check first line has 3 matches with correct byte offsets
1541        let line1_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 1).collect();
1542        assert_eq!(line1_matches.len(), 3);
1543        assert_eq!(line1_matches[0].span.byte_start, 0);
1544        assert_eq!(line1_matches[1].span.byte_start, 5);
1545        assert_eq!(line1_matches[2].span.byte_start, 10);
1546
1547        // Check second line match
1548        let line2_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 2).collect();
1549        assert_eq!(line2_matches.len(), 1);
1550        assert_eq!(line2_matches[0].span.byte_start, 24); // "test test test\n" = 15 bytes, "line two " = 9 bytes
1551
1552        // Each match should have different byte offsets
1553        let mut byte_starts: Vec<_> = results.iter().map(|r| r.span.byte_start).collect();
1554        byte_starts.sort();
1555        byte_starts.dedup();
1556        assert_eq!(byte_starts.len(), 5); // All byte_starts should be unique
1557    }
1558
1559    #[test]
1560    fn test_search_file() {
1561        let temp_dir = TempDir::new().unwrap();
1562        let file_path = temp_dir.path().join("test.txt");
1563        fs::write(
1564            &file_path,
1565            "line 1: hello\nline 2: world\nline 3: rust programming",
1566        )
1567        .unwrap();
1568
1569        let regex = regex::Regex::new("rust").unwrap();
1570        let options = SearchOptions::default();
1571
1572        let results = search_file(&regex, &file_path, &options).unwrap();
1573        assert_eq!(results.len(), 1);
1574        assert_eq!(results[0].span.line_start, 3);
1575        assert!(results[0].preview.contains("rust"));
1576    }
1577
1578    #[test]
1579    fn test_search_file_with_context() {
1580        let temp_dir = TempDir::new().unwrap();
1581        let file_path = temp_dir.path().join("test.txt");
1582        fs::write(&file_path, "line 1\nline 2\ntarget line\nline 4\nline 5").unwrap();
1583
1584        let regex = regex::Regex::new("target").unwrap();
1585        let options = SearchOptions {
1586            context_lines: 1,
1587            ..Default::default()
1588        };
1589
1590        let results = search_file(&regex, &file_path, &options).unwrap();
1591        assert_eq!(results.len(), 1);
1592
1593        println!("Preview: '{}'", results[0].preview);
1594
1595        // The target line is line 3, with 1 context line before and after
1596        // So we should get lines 2, 3, 4
1597        assert!(results[0].preview.contains("line 2"));
1598        assert!(results[0].preview.contains("target line"));
1599        assert!(results[0].preview.contains("line 4"));
1600    }
1601
1602    #[tokio::test]
1603    async fn test_search_main_function() {
1604        let temp_dir = TempDir::new().unwrap();
1605        create_test_files(temp_dir.path());
1606
1607        let options = SearchOptions {
1608            mode: SearchMode::Regex,
1609            query: "hello".to_string(),
1610            path: temp_dir.path().to_path_buf(),
1611            recursive: true,
1612            case_insensitive: true,
1613            ..Default::default()
1614        };
1615
1616        let results = search(&options).await.unwrap();
1617        assert!(!results.is_empty());
1618    }
1619
1620    #[tokio::test]
1621    async fn test_regex_search_mixed_line_endings() {
1622        // Regression test for byte offset issues with different line endings
1623        let temp_dir = TempDir::new().unwrap();
1624
1625        // Create test file with mixed line endings (Windows \r\n and Unix \n)
1626        let test_file = temp_dir.path().join("mixed_endings.txt");
1627        let content = "line1\r\nline2\nline3\r\npattern here\nline5\r\n";
1628        std::fs::write(&test_file, content).unwrap();
1629
1630        let options = SearchOptions {
1631            mode: SearchMode::Regex,
1632            query: "pattern".to_string(),
1633            path: test_file.clone(),
1634            recursive: false,
1635            ..Default::default()
1636        };
1637
1638        let results = search(&options).await.unwrap();
1639        assert_eq!(results.len(), 1);
1640
1641        let result = &results[0];
1642        // Verify byte offsets are correct - should point to start of "pattern"
1643        let original_content = std::fs::read_to_string(&test_file).unwrap();
1644        let pattern_start = original_content.find("pattern").unwrap();
1645
1646        assert_eq!(result.span.byte_start, pattern_start);
1647        assert_eq!(result.span.line_start, 4); // Fourth line
1648    }
1649
1650    #[tokio::test]
1651    async fn test_regex_search_windows_line_endings() {
1652        // Regression test specifically for Windows \r\n line endings
1653        let temp_dir = TempDir::new().unwrap();
1654
1655        let test_file = temp_dir.path().join("windows_endings.txt");
1656        let content = "first line\r\nsecond line\r\nmatch this\r\nfourth line\r\n";
1657        std::fs::write(&test_file, content).unwrap();
1658
1659        let options = SearchOptions {
1660            mode: SearchMode::Regex,
1661            query: "match".to_string(),
1662            path: test_file.clone(),
1663            recursive: false,
1664            ..Default::default()
1665        };
1666
1667        let results = search(&options).await.unwrap();
1668        assert_eq!(results.len(), 1);
1669
1670        let result = &results[0];
1671
1672        // Verify the match is on line 3
1673        assert_eq!(result.span.line_start, 3);
1674
1675        // Verify byte offset accounts for \r\n endings
1676        // first line\r\n = 12 bytes, second line\r\n = 13 bytes, total = 25 bytes before "match"
1677        let expected_byte_start = 25; // Position of "match" in the content
1678        assert_eq!(result.span.byte_start, expected_byte_start);
1679    }
1680
1681    #[test]
1682    fn test_split_lines_with_endings_helper() {
1683        // Unix line endings
1684        let unix_content = "line1\nline2\nline3\n";
1685        let (unix_lines, unix_endings) = split_lines_with_endings(unix_content);
1686        assert_eq!(unix_lines, vec!["line1", "line2", "line3"]);
1687        assert_eq!(unix_endings, vec![1, 1, 1]);
1688
1689        // Windows line endings
1690        let windows_content = "line1\r\nline2\r\nline3\r\n";
1691        let (windows_lines, windows_endings) = split_lines_with_endings(windows_content);
1692        assert_eq!(windows_lines, vec!["line1", "line2", "line3"]);
1693        assert_eq!(windows_endings, vec![2, 2, 2]);
1694
1695        // Old Mac line endings
1696        let mac_content = "line1\rline2\rline3\r";
1697        let (mac_lines, mac_endings) = split_lines_with_endings(mac_content);
1698        assert_eq!(mac_lines, vec!["line1", "line2", "line3"]);
1699        assert_eq!(mac_endings, vec![1, 1, 1]);
1700
1701        // Mixed endings
1702        let mixed_content = "line1\nline2\r\nline3\r";
1703        let (mixed_lines, mixed_endings) = split_lines_with_endings(mixed_content);
1704        assert_eq!(mixed_lines, vec!["line1", "line2", "line3"]);
1705        assert_eq!(mixed_endings, vec![1, 2, 1]);
1706
1707        // No line endings
1708        let no_endings = "single line";
1709        let (no_lines, no_endings_vec) = split_lines_with_endings(no_endings);
1710        assert_eq!(no_lines, vec!["single line"]);
1711        assert_eq!(no_endings_vec, vec![0]);
1712    }
1713}