1use anyhow::Result;
2use ck_core::{CkError, IncludePattern, SearchMode, SearchOptions, SearchResult, Span};
3use globset::{Glob, GlobSet, GlobSetBuilder};
4use rayon::prelude::*;
5use regex::{Regex, RegexBuilder};
6use std::collections::HashMap;
7use std::fs;
8use std::path::PathBuf as StdPathBuf;
9use std::path::{Path, PathBuf};
10use tantivy::collector::TopDocs;
11use tantivy::query::QueryParser;
12use tantivy::schema::{STORED, Schema, TEXT, Value};
13use tantivy::{Index, ReloadPolicy, TantivyDocument, doc};
14use walkdir::WalkDir;
15
16mod semantic_v3;
17pub use semantic_v3::{semantic_search_v3, semantic_search_v3_with_progress};
18
19pub type SearchProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
20pub type IndexingProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
21pub type DetailedIndexingProgressCallback = Box<dyn Fn(ck_index::EmbeddingProgress) + Send + Sync>;
22
23fn resolve_content_path(file_path: &Path, repo_root: &Path) -> Result<PathBuf> {
27 if ck_core::pdf::is_pdf_file(file_path) {
28 let cache_path = ck_core::pdf::get_content_cache_path(repo_root, file_path);
30 if !cache_path.exists() {
31 return Err(anyhow::anyhow!(
32 "PDF not preprocessed. Run 'ck --index' first."
33 ));
34 }
35 Ok(cache_path)
36 } else {
37 Ok(file_path.to_path_buf())
39 }
40}
41
42fn read_file_content(file_path: &Path, repo_root: &Path) -> Result<String> {
46 let content_path = resolve_content_path(file_path, repo_root)?;
47 Ok(fs::read_to_string(content_path)?)
48}
49
50async fn extract_content_from_span(file_path: &Path, span: &ck_core::Span) -> Result<String> {
52 let repo_root = find_nearest_index_root(file_path)
54 .unwrap_or_else(|| file_path.parent().unwrap_or(file_path).to_path_buf());
55
56 let content_path = resolve_content_path(file_path, &repo_root)?;
58
59 extract_lines_from_file(&content_path, span.line_start, span.line_end)
61}
62
63fn extract_lines_from_file(file_path: &Path, line_start: usize, line_end: usize) -> Result<String> {
65 use std::io::{BufRead, BufReader};
66
67 if line_start == 0 {
68 return Ok(String::new());
69 }
70
71 let file = fs::File::open(file_path)?;
72 let reader = BufReader::new(file);
73 let mut result = Vec::new();
74
75 let start_idx = line_start.saturating_sub(1);
77 let end_idx = line_end.saturating_sub(1);
78
79 for (current_line, line_result) in reader.lines().enumerate() {
80 if current_line > end_idx {
81 break; }
83
84 let line = line_result?;
85
86 if current_line >= start_idx {
87 result.push(line);
88 }
89 }
90
91 if result.is_empty() && line_start > 0 {
93 return Ok(String::new());
94 }
95
96 Ok(result.join("\n"))
97}
98
99fn split_lines_with_endings(content: &str) -> (Vec<String>, Vec<usize>) {
102 let mut lines = Vec::new();
103 let mut endings = Vec::new();
104
105 let bytes = content.as_bytes();
106 let mut start = 0usize;
107 let mut i = 0usize;
108
109 while i < bytes.len() {
110 match bytes[i] {
111 b'\n' => {
112 lines.push(content[start..i].to_string());
113 endings.push(1);
114 i += 1;
115 start = i;
116 }
117 b'\r' => {
118 lines.push(content[start..i].to_string());
119 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
120 endings.push(2);
121 i += 2;
122 } else {
123 endings.push(1);
124 i += 1;
125 }
126 start = i;
127 }
128 _ => {
129 i += 1;
130 }
131 }
132 }
133
134 if start < bytes.len() {
135 lines.push(content[start..].to_string());
136 endings.push(0);
137 }
138
139 (lines, endings)
140}
141
142fn canonicalize_for_matching(path: &Path) -> PathBuf {
143 if let Ok(canonical) = path.canonicalize() {
144 return canonical;
145 }
146
147 if path.is_absolute() {
148 path.to_path_buf()
149 } else {
150 std::env::current_dir()
151 .map(|cwd| cwd.join(path))
152 .unwrap_or_else(|_| path.to_path_buf())
153 }
154}
155
156fn path_matches_include(path: &Path, include_patterns: &[IncludePattern]) -> bool {
157 if include_patterns.is_empty() {
158 return true;
159 }
160
161 let candidate = canonicalize_for_matching(path);
162 include_patterns.iter().any(|pattern| {
163 if pattern.is_dir {
164 candidate.starts_with(&pattern.path)
165 } else {
166 candidate == pattern.path
167 }
168 })
169}
170
171fn filter_files_by_include(
172 files: Vec<PathBuf>,
173 include_patterns: &[IncludePattern],
174) -> Vec<PathBuf> {
175 if include_patterns.is_empty() {
176 return files;
177 }
178
179 files
180 .into_iter()
181 .filter(|path| path_matches_include(path, include_patterns))
182 .collect()
183}
184
185fn find_nearest_index_root(path: &Path) -> Option<StdPathBuf> {
186 let mut current = if path.is_file() {
187 path.parent().unwrap_or(path)
188 } else {
189 path
190 };
191 loop {
192 if current.join(".ck").exists() {
193 return Some(current.to_path_buf());
194 }
195 match current.parent() {
196 Some(parent) => current = parent,
197 None => return None,
198 }
199 }
200}
201
202#[derive(Clone, Debug)]
203pub struct ResolvedModel {
204 pub canonical_name: String,
205 pub alias: String,
206 pub dimensions: usize,
207}
208
209fn find_model_entry<'a>(
210 registry: &'a ck_models::ModelRegistry,
211 key: &str,
212) -> Option<(String, &'a ck_models::ModelConfig)> {
213 if let Some(config) = registry.get_model(key) {
214 return Some((key.to_string(), config));
215 }
216
217 registry
218 .models
219 .iter()
220 .find(|(_, config)| config.name == key)
221 .map(|(alias, config)| (alias.clone(), config))
222}
223
224pub(crate) fn resolve_model_from_root(
225 index_root: &Path,
226 cli_model: Option<&str>,
227) -> Result<ResolvedModel> {
228 use ck_models::ModelRegistry;
229
230 let registry = ModelRegistry::default();
231 let index_dir = index_root.join(".ck");
232 let manifest_path = index_dir.join("manifest.json");
233
234 if manifest_path.exists() {
235 let data = std::fs::read(&manifest_path)?;
236 let manifest: ck_index::IndexManifest = serde_json::from_slice(&data)?;
237
238 if let Some(existing_model) = manifest.embedding_model {
239 let (alias, config_opt) = find_model_entry(®istry, &existing_model)
240 .map(|(alias, config)| (alias, Some(config)))
241 .unwrap_or_else(|| (existing_model.clone(), None));
242
243 let dims = manifest
244 .embedding_dimensions
245 .or_else(|| config_opt.map(|c| c.dimensions))
246 .unwrap_or(384);
247
248 if let Some(requested) = cli_model {
249 let (_, requested_config) =
250 find_model_entry(®istry, requested).ok_or_else(|| {
251 CkError::Embedding(format!(
252 "Unknown model '{}'. Available models: {}",
253 requested,
254 registry
255 .models
256 .keys()
257 .cloned()
258 .collect::<Vec<_>>()
259 .join(", ")
260 ))
261 })?;
262
263 if requested_config.name != existing_model {
264 let suggested_alias = alias.clone();
265 return Err(CkError::Embedding(format!(
266 "Index was built with embedding model '{}' (alias '{}'), but '--model {}' was requested. To switch models run `ck --clean .` then `ck --index --model {}`. To keep using this index rerun your command with '--model {}'.",
267 existing_model,
268 suggested_alias,
269 requested,
270 requested,
271 suggested_alias
272 ))
273 .into());
274 }
275 }
276
277 return Ok(ResolvedModel {
278 canonical_name: existing_model,
279 alias,
280 dimensions: dims,
281 });
282 }
283 }
284
285 let (alias, config) = if let Some(requested) = cli_model {
286 find_model_entry(®istry, requested).ok_or_else(|| {
287 CkError::Embedding(format!(
288 "Unknown model '{}'. Available models: {}",
289 requested,
290 registry
291 .models
292 .keys()
293 .cloned()
294 .collect::<Vec<_>>()
295 .join(", ")
296 ))
297 })?
298 } else {
299 let alias = registry.default_model.clone();
300 let config = registry.get_default_model().ok_or_else(|| {
301 CkError::Embedding("No default embedding model configured".to_string())
302 })?;
303 (alias, config)
304 };
305
306 Ok(ResolvedModel {
307 canonical_name: config.name.clone(),
308 alias,
309 dimensions: config.dimensions,
310 })
311}
312
313pub fn resolve_model_for_path(path: &Path, cli_model: Option<&str>) -> Result<ResolvedModel> {
314 let index_root = find_nearest_index_root(path).unwrap_or_else(|| {
315 if path.is_file() {
316 path.parent().unwrap_or(path).to_path_buf()
317 } else {
318 path.to_path_buf()
319 }
320 });
321 resolve_model_from_root(&index_root, cli_model)
322}
323
324pub async fn search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
325 let results = search_enhanced(options).await?;
326 Ok(results.matches)
327}
328
329pub async fn search_with_progress(
330 options: &SearchOptions,
331 progress_callback: Option<SearchProgressCallback>,
332) -> Result<Vec<SearchResult>> {
333 let results = search_enhanced_with_progress(options, progress_callback).await?;
334 Ok(results.matches)
335}
336
337pub async fn search_enhanced(options: &SearchOptions) -> Result<ck_core::SearchResults> {
339 search_enhanced_with_progress(options, None).await
340}
341
342pub async fn search_enhanced_with_progress(
344 options: &SearchOptions,
345 progress_callback: Option<SearchProgressCallback>,
346) -> Result<ck_core::SearchResults> {
347 search_enhanced_with_indexing_progress(options, progress_callback, None, None).await
348}
349
350pub async fn search_enhanced_with_indexing_progress(
352 options: &SearchOptions,
353 progress_callback: Option<SearchProgressCallback>,
354 indexing_progress_callback: Option<IndexingProgressCallback>,
355 detailed_indexing_progress_callback: Option<DetailedIndexingProgressCallback>,
356) -> Result<ck_core::SearchResults> {
357 if !options.path.exists() {
359 return Err(ck_core::CkError::Search(format!(
360 "Path does not exist: {}",
361 options.path.display()
362 ))
363 .into());
364 }
365
366 if !matches!(options.mode, SearchMode::Regex) {
368 let need_embeddings = matches!(options.mode, SearchMode::Semantic | SearchMode::Hybrid);
369 ensure_index_updated_with_progress(
370 &options.path,
371 options.reindex,
372 need_embeddings,
373 indexing_progress_callback,
374 detailed_indexing_progress_callback,
375 options.respect_gitignore,
376 &options.exclude_patterns,
377 options.embedding_model.as_deref(),
378 )
379 .await?;
380 }
381
382 let search_results = match options.mode {
383 SearchMode::Regex => {
384 let matches = regex_search(options)?;
385 ck_core::SearchResults {
386 matches,
387 closest_below_threshold: None,
388 }
389 }
390 SearchMode::Lexical => {
391 let matches = lexical_search(options).await?;
392 ck_core::SearchResults {
393 matches,
394 closest_below_threshold: None,
395 }
396 }
397 SearchMode::Semantic => {
398 semantic_search_v3_with_progress(options, progress_callback).await?
400 }
401 SearchMode::Hybrid => {
402 let matches = hybrid_search_with_progress(options, progress_callback).await?;
403 ck_core::SearchResults {
404 matches,
405 closest_below_threshold: None,
406 }
407 }
408 };
409
410 Ok(search_results)
411}
412
413fn regex_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
414 let pattern = if options.fixed_string {
415 regex::escape(&options.query)
416 } else if options.whole_word {
417 format!(r"\b{}\b", regex::escape(&options.query))
418 } else {
419 options.query.clone()
420 };
421
422 let regex = RegexBuilder::new(&pattern)
423 .case_insensitive(options.case_insensitive)
424 .build()
425 .map_err(CkError::Regex)?;
426
427 let should_recurse = options.path.is_dir() || options.recursive;
429 let files = if should_recurse {
430 let collected = ck_index::collect_files(
432 &options.path,
433 options.respect_gitignore,
434 &options.exclude_patterns,
435 )?;
436 filter_files_by_include(collected, &options.include_patterns)
437 } else {
438 let collected = collect_files(&options.path, should_recurse, &options.exclude_patterns)?;
440 filter_files_by_include(collected, &options.include_patterns)
441 };
442
443 let results: Vec<Vec<SearchResult>> = files
444 .par_iter()
445 .filter_map(|file_path| match search_file(®ex, file_path, options) {
446 Ok(matches) => {
447 if matches.is_empty() {
448 None
449 } else {
450 Some(matches)
451 }
452 }
453 Err(e) => {
454 tracing::debug!("Error searching {:?}: {}", file_path, e);
455 None
456 }
457 })
458 .collect();
459
460 let mut all_results: Vec<SearchResult> = results.into_iter().flatten().collect();
461 all_results.sort_by(|a, b| {
463 let path_cmp = a.file.cmp(&b.file);
464 if path_cmp != std::cmp::Ordering::Equal {
465 return path_cmp;
466 }
467 a.span.line_start.cmp(&b.span.line_start)
468 });
469
470 if let Some(top_k) = options.top_k {
471 all_results.truncate(top_k);
472 }
473
474 Ok(all_results)
475}
476
477fn search_file(
478 regex: &Regex,
479 file_path: &Path,
480 options: &SearchOptions,
481) -> Result<Vec<SearchResult>> {
482 let repo_root = find_nearest_index_root(file_path)
484 .unwrap_or_else(|| file_path.parent().unwrap_or(file_path).to_path_buf());
485
486 if options.full_section || options.context_lines > 0 {
490 let content = read_file_content(file_path, &repo_root)?;
492 let (lines, line_ending_lengths) = split_lines_with_endings(&content);
493
494 let code_sections = if options.full_section {
496 extract_code_sections(file_path, &content)
497 } else {
498 None
499 };
500
501 search_file_in_memory(
502 regex,
503 file_path,
504 options,
505 &lines,
506 &code_sections,
507 &line_ending_lengths,
508 )
509 } else {
510 search_file_streaming(regex, file_path, &repo_root, options)
512 }
513}
514
515fn search_file_in_memory(
517 regex: &Regex,
518 file_path: &Path,
519 options: &SearchOptions,
520 lines: &[String],
521 code_sections: &Option<Vec<(usize, usize, String)>>,
522 line_ending_lengths: &[usize],
523) -> Result<Vec<SearchResult>> {
524 let mut results = Vec::new();
525 let mut byte_offset = 0;
526
527 for (line_idx, line) in lines.iter().enumerate() {
528 let line_number = line_idx + 1;
529
530 if regex.as_str().is_empty() {
533 let preview = if options.full_section {
535 if let Some(sections) = code_sections {
537 if let Some(section) = find_containing_section(sections, line_idx) {
538 section.clone()
539 } else {
540 get_context_preview(lines, line_idx, options)
542 }
543 } else {
544 get_context_preview(lines, line_idx, options)
545 }
546 } else {
547 get_context_preview(lines, line_idx, options)
548 };
549
550 results.push(SearchResult {
551 file: file_path.to_path_buf(),
552 span: Span {
553 byte_start: byte_offset,
554 byte_end: byte_offset + line.len(),
555 line_start: line_number,
556 line_end: line_number,
557 },
558 score: 1.0,
559 preview,
560 lang: ck_core::Language::from_path(file_path),
561 symbol: None,
562 chunk_hash: None,
563 index_epoch: None,
564 });
565 } else {
566 for mat in regex.find_iter(line) {
568 let preview = if options.full_section {
569 if let Some(sections) = code_sections {
571 if let Some(section) = find_containing_section(sections, line_idx) {
572 section.clone()
573 } else {
574 get_context_preview(lines, line_idx, options)
576 }
577 } else {
578 get_context_preview(lines, line_idx, options)
579 }
580 } else {
581 get_context_preview(lines, line_idx, options)
582 };
583
584 results.push(SearchResult {
585 file: file_path.to_path_buf(),
586 span: Span {
587 byte_start: byte_offset + mat.start(),
588 byte_end: byte_offset + mat.end(),
589 line_start: line_number,
590 line_end: line_number,
591 },
592 score: 1.0,
593 preview,
594 lang: ck_core::Language::from_path(file_path),
595 symbol: None,
596 chunk_hash: None,
597 index_epoch: None,
598 });
599 }
600 }
601
602 byte_offset += line.len();
604 byte_offset += line_ending_lengths.get(line_idx).copied().unwrap_or(0);
605 }
606
607 Ok(results)
608}
609
610fn search_file_streaming(
612 regex: &Regex,
613 file_path: &Path,
614 repo_root: &Path,
615 _options: &SearchOptions,
616) -> Result<Vec<SearchResult>> {
617 use std::io::{BufRead, BufReader};
618
619 let content_path = resolve_content_path(file_path, repo_root)?;
620 let file = std::fs::File::open(&content_path)?;
621 let mut reader = BufReader::new(file);
622
623 let mut results = Vec::new();
624 let mut line = String::new();
625 let mut byte_offset = 0usize;
626 let mut line_number = 1usize;
627
628 loop {
629 line.clear();
630 let bytes_read = reader.read_line(&mut line)?;
631 if bytes_read == 0 {
632 break;
633 }
634
635 let mut newline_len = 0usize;
638 if line.ends_with("\r\n") {
639 line.pop(); line.pop(); newline_len = 2;
642 } else if line.ends_with(['\n', '\r']) {
643 line.pop();
644 newline_len = 1;
645 }
646
647 let treat_cr_as_newline = line.contains('\r');
651
652 if treat_cr_as_newline {
653 let bytes = line.as_bytes();
654 let mut segment_start = 0usize;
655 while segment_start <= bytes.len() {
656 match bytes[segment_start..].iter().position(|&b| b == b'\r') {
657 Some(rel_idx) => {
658 let idx = segment_start + rel_idx;
659 let segment_bytes = &bytes[segment_start..idx];
660 let segment_str = std::str::from_utf8(segment_bytes)?;
661 process_streaming_line(
662 regex,
663 file_path,
664 segment_str,
665 line_number,
666 byte_offset,
667 &mut results,
668 );
669 byte_offset += segment_bytes.len() + 1; line_number += 1;
671 segment_start = idx + 1;
672 }
673 None => {
674 let segment_bytes = &bytes[segment_start..];
675 let segment_str = std::str::from_utf8(segment_bytes)?;
676 process_streaming_line(
677 regex,
678 file_path,
679 segment_str,
680 line_number,
681 byte_offset,
682 &mut results,
683 );
684 byte_offset += segment_bytes.len();
685 line_number += 1;
686 break;
687 }
688 }
689 }
690 byte_offset += newline_len;
691 } else {
692 let line_str = line.as_str();
693 process_streaming_line(
694 regex,
695 file_path,
696 line_str,
697 line_number,
698 byte_offset,
699 &mut results,
700 );
701 byte_offset += line_str.len() + newline_len;
702 line_number += 1;
703 }
704 }
705
706 Ok(results)
707}
708
709fn process_streaming_line(
710 regex: &Regex,
711 file_path: &Path,
712 line: &str,
713 line_number: usize,
714 byte_offset: usize,
715 results: &mut Vec<SearchResult>,
716) {
717 if regex.as_str().is_empty() {
718 results.push(SearchResult {
719 file: file_path.to_path_buf(),
720 span: Span {
721 byte_start: byte_offset,
722 byte_end: byte_offset + line.len(),
723 line_start: line_number,
724 line_end: line_number,
725 },
726 score: 1.0,
727 preview: line.to_string(),
728 lang: ck_core::Language::from_path(file_path),
729 symbol: None,
730 chunk_hash: None,
731 index_epoch: None,
732 });
733 } else {
734 for mat in regex.find_iter(line) {
735 results.push(SearchResult {
736 file: file_path.to_path_buf(),
737 span: Span {
738 byte_start: byte_offset + mat.start(),
739 byte_end: byte_offset + mat.end(),
740 line_start: line_number,
741 line_end: line_number,
742 },
743 score: 1.0,
744 preview: line.to_string(),
745 lang: ck_core::Language::from_path(file_path),
746 symbol: None,
747 chunk_hash: None,
748 index_epoch: None,
749 });
750 }
751 }
752}
753
754async fn lexical_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
755 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
757 if options.path.is_file() {
758 options.path.parent().unwrap_or(&options.path).to_path_buf()
759 } else {
760 options.path.clone()
761 }
762 });
763
764 let index_dir = index_root.join(".ck");
765 if !index_dir.exists() {
766 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
767 }
768
769 let tantivy_index_path = index_dir.join("tantivy_index");
770
771 if !tantivy_index_path.exists() {
772 return build_tantivy_index(options).await;
773 }
774
775 let mut schema_builder = Schema::builder();
776 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
777 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
778 let _schema = schema_builder.build();
779
780 let index = Index::open_in_dir(&tantivy_index_path)
781 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
782
783 let reader = index
784 .reader_builder()
785 .reload_policy(ReloadPolicy::OnCommitWithDelay)
786 .try_into()
787 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
788
789 let searcher = reader.searcher();
790 let query_parser = QueryParser::for_index(&index, vec![content_field]);
791
792 let query = query_parser
793 .parse_query(&options.query)
794 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
795
796 let top_docs = if let Some(top_k) = options.top_k {
797 searcher.search(&query, &TopDocs::with_limit(top_k))?
798 } else {
799 searcher.search(&query, &TopDocs::with_limit(100))?
800 };
801
802 let mut raw_results = Vec::new();
804 for (_score, doc_address) in top_docs {
805 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
806 let path_text = retrieved_doc
807 .get_first(path_field)
808 .map(|field_value| field_value.as_str().unwrap_or(""))
809 .unwrap_or("");
810 let content_text = retrieved_doc
811 .get_first(content_field)
812 .map(|field_value| field_value.as_str().unwrap_or(""))
813 .unwrap_or("");
814
815 let file_path = PathBuf::from(path_text);
816 if !path_matches_include(&file_path, &options.include_patterns) {
817 continue;
818 }
819 let preview = if options.full_section {
820 content_text.to_string()
821 } else {
822 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
823 };
824
825 raw_results.push((
826 _score,
827 SearchResult {
828 file: file_path,
829 span: Span {
830 byte_start: 0,
831 byte_end: content_text.len(),
832 line_start: 1,
833 line_end: content_text.lines().count(),
834 },
835 score: _score,
836 preview,
837 lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
838 symbol: None,
839 chunk_hash: None,
840 index_epoch: None,
841 },
842 ));
843 }
844
845 let mut results = Vec::new();
847 if !raw_results.is_empty() {
848 let max_score = raw_results
849 .iter()
850 .map(|(score, _)| *score)
851 .fold(0.0f32, f32::max);
852 if max_score > 0.0 {
853 for (raw_score, mut result) in raw_results {
854 let normalized_score = raw_score / max_score;
855
856 if let Some(threshold) = options.threshold
858 && normalized_score < threshold
859 {
860 continue;
861 }
862
863 result.score = normalized_score;
864 results.push(result);
865 }
866 }
867 }
868
869 Ok(results)
870}
871
872async fn build_tantivy_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
873 let index_root = if options.path.is_file() {
875 options.path.parent().unwrap_or(&options.path)
876 } else {
877 &options.path
878 };
879
880 let index_dir = index_root.join(".ck");
881 let tantivy_index_path = index_dir.join("tantivy_index");
882
883 fs::create_dir_all(&tantivy_index_path)?;
884
885 let mut schema_builder = Schema::builder();
886 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
887 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
888 let schema = schema_builder.build();
889
890 let index = Index::create_in_dir(&tantivy_index_path, schema.clone())
891 .map_err(|e| CkError::Index(format!("Failed to create tantivy index: {}", e)))?;
892
893 let mut index_writer = index
894 .writer(50_000_000)
895 .map_err(|e| CkError::Index(format!("Failed to create index writer: {}", e)))?;
896
897 let files = filter_files_by_include(
898 collect_files(index_root, true, &options.exclude_patterns)?,
899 &options.include_patterns,
900 );
901
902 for file_path in &files {
903 if let Ok(content) = fs::read_to_string(file_path) {
904 let doc = doc!(
905 content_field => content,
906 path_field => file_path.display().to_string()
907 );
908 index_writer.add_document(doc)?;
909 }
910 }
911
912 index_writer
913 .commit()
914 .map_err(|e| CkError::Index(format!("Failed to commit index: {}", e)))?;
915
916 let tantivy_index_path = index_root.join(".ck").join("tantivy_index");
918 let mut schema_builder = Schema::builder();
919 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
920 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
921 let _schema = schema_builder.build();
922
923 let index = Index::open_in_dir(&tantivy_index_path)
924 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
925
926 let reader = index
927 .reader_builder()
928 .reload_policy(ReloadPolicy::OnCommitWithDelay)
929 .try_into()
930 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
931
932 let searcher = reader.searcher();
933 let query_parser = QueryParser::for_index(&index, vec![content_field]);
934
935 let query = query_parser
936 .parse_query(&options.query)
937 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
938
939 let top_docs = if let Some(top_k) = options.top_k {
940 searcher.search(&query, &TopDocs::with_limit(top_k))?
941 } else {
942 searcher.search(&query, &TopDocs::with_limit(100))?
943 };
944
945 let mut raw_results = Vec::new();
947 for (_score, doc_address) in top_docs {
948 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
949 let path_text = retrieved_doc
950 .get_first(path_field)
951 .map(|field_value| field_value.as_str().unwrap_or(""))
952 .unwrap_or("");
953 let content_text = retrieved_doc
954 .get_first(content_field)
955 .map(|field_value| field_value.as_str().unwrap_or(""))
956 .unwrap_or("");
957
958 let file_path = PathBuf::from(path_text);
959 let preview = if options.full_section {
960 content_text.to_string()
961 } else {
962 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
963 };
964
965 raw_results.push((
966 _score,
967 SearchResult {
968 file: file_path,
969 span: Span {
970 byte_start: 0,
971 byte_end: content_text.len(),
972 line_start: 1,
973 line_end: content_text.lines().count(),
974 },
975 score: _score,
976 preview,
977 lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
978 symbol: None,
979 chunk_hash: None,
980 index_epoch: None,
981 },
982 ));
983 }
984
985 let mut results = Vec::new();
987 if !raw_results.is_empty() {
988 let max_score = raw_results
989 .iter()
990 .map(|(score, _)| *score)
991 .fold(0.0f32, f32::max);
992 if max_score > 0.0 {
993 for (raw_score, mut result) in raw_results {
994 let normalized_score = raw_score / max_score;
995
996 if let Some(threshold) = options.threshold
998 && normalized_score < threshold
999 {
1000 continue;
1001 }
1002
1003 result.score = normalized_score;
1004 results.push(result);
1005 }
1006 }
1007 }
1008
1009 Ok(results)
1010}
1011
1012#[allow(dead_code)]
1013async fn hybrid_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
1014 hybrid_search_with_progress(options, None).await
1015}
1016
1017async fn hybrid_search_with_progress(
1018 options: &SearchOptions,
1019 progress_callback: Option<SearchProgressCallback>,
1020) -> Result<Vec<SearchResult>> {
1021 if let Some(ref callback) = progress_callback {
1022 callback("Running regex search...");
1023 }
1024 let regex_results = regex_search(options)?;
1025
1026 if let Some(ref callback) = progress_callback {
1027 callback("Running semantic search...");
1028 }
1029 let semantic_results = semantic_search_v3_with_progress(options, progress_callback).await?;
1030
1031 let mut combined = HashMap::new();
1032
1033 for (rank, result) in regex_results.iter().enumerate() {
1034 let key = format!("{}:{}", result.file.display(), result.span.line_start);
1035 combined
1036 .entry(key)
1037 .or_insert(Vec::new())
1038 .push((rank + 1, result.clone()));
1039 }
1040
1041 for (rank, result) in semantic_results.matches.iter().enumerate() {
1042 let key = format!("{}:{}", result.file.display(), result.span.line_start);
1043 combined
1044 .entry(key)
1045 .or_insert(Vec::new())
1046 .push((rank + 1, result.clone()));
1047 }
1048
1049 let mut rrf_results: Vec<SearchResult> = combined
1051 .into_values()
1052 .map(|ranks| {
1053 let mut result = ranks[0].1.clone();
1054 let rrf_score = ranks
1055 .iter()
1056 .map(|(rank, _)| 1.0 / (60.0 + *rank as f32))
1057 .sum();
1058 result.score = rrf_score;
1059 result
1060 })
1061 .filter(|result| {
1062 if let Some(threshold) = options.threshold {
1064 result.score >= threshold
1065 } else {
1066 true
1067 }
1068 })
1069 .collect();
1070
1071 rrf_results.retain(|result| path_matches_include(&result.file, &options.include_patterns));
1072
1073 rrf_results.sort_by(|a, b| {
1075 b.score
1076 .partial_cmp(&a.score)
1077 .unwrap_or(std::cmp::Ordering::Equal)
1078 });
1079
1080 if let Some(top_k) = options.top_k {
1081 rrf_results.truncate(top_k);
1082 }
1083
1084 Ok(rrf_results)
1085}
1086
1087fn build_globset(patterns: &[String]) -> GlobSet {
1088 let mut builder = GlobSetBuilder::new();
1089 for pat in patterns {
1090 if let Ok(glob) = Glob::new(pat) {
1092 builder.add(glob);
1093 }
1094 }
1095 builder.build().unwrap_or_else(|_| GlobSet::empty())
1096}
1097
1098fn should_exclude_path(path: &Path, globset: &GlobSet) -> bool {
1099 if globset.is_match(path) {
1101 return true;
1102 }
1103 for component in path.components() {
1104 if let std::path::Component::Normal(name) = component
1105 && globset.is_match(name)
1106 {
1107 return true;
1108 }
1109 }
1110 false
1111}
1112
1113fn collect_files(
1114 path: &Path,
1115 recursive: bool,
1116 exclude_patterns: &[String],
1117) -> Result<Vec<PathBuf>> {
1118 let mut files = Vec::new();
1119 let globset = build_globset(exclude_patterns);
1120
1121 if path.is_file() {
1122 files.push(path.to_path_buf());
1124 } else if recursive {
1125 for entry in WalkDir::new(path).into_iter().filter_entry(|e| {
1126 let name = e.file_name();
1128 !globset.is_match(e.path()) && !globset.is_match(name)
1129 }) {
1130 match entry {
1131 Ok(entry) => {
1132 if entry.file_type().is_file() && !should_exclude_path(entry.path(), &globset) {
1133 files.push(entry.path().to_path_buf());
1134 }
1135 }
1136 Err(e) => {
1137 tracing::debug!("Skipping path due to error: {}", e);
1139 continue;
1140 }
1141 }
1142 }
1143 } else {
1144 match fs::read_dir(path) {
1145 Ok(read_dir) => {
1146 for entry in read_dir {
1147 match entry {
1148 Ok(entry) => {
1149 let path = entry.path();
1150 if path.is_file() && !should_exclude_path(&path, &globset) {
1151 files.push(path);
1152 }
1153 }
1154 Err(e) => {
1155 tracing::debug!("Skipping directory entry due to error: {}", e);
1156 continue;
1157 }
1158 }
1159 }
1160 }
1161 Err(e) => {
1162 tracing::debug!("Cannot read directory {:?}: {}", path, e);
1163 return Err(e.into());
1164 }
1165 }
1166 }
1167
1168 Ok(files)
1169}
1170
1171#[allow(clippy::too_many_arguments)]
1172async fn ensure_index_updated_with_progress(
1173 path: &Path,
1174 force_reindex: bool,
1175 need_embeddings: bool,
1176 progress_callback: Option<ck_index::ProgressCallback>,
1177 detailed_progress_callback: Option<ck_index::DetailedProgressCallback>,
1178 respect_gitignore: bool,
1179 exclude_patterns: &[String],
1180 model_override: Option<&str>,
1181) -> Result<()> {
1182 let index_root_buf = find_nearest_index_root(path).unwrap_or_else(|| {
1184 if path.is_file() {
1185 path.parent().unwrap_or(path).to_path_buf()
1186 } else {
1187 path.to_path_buf()
1188 }
1189 });
1190 let index_root = &index_root_buf;
1191
1192 if force_reindex {
1195 let stats = ck_index::smart_update_index_with_detailed_progress(
1196 index_root,
1197 true,
1198 progress_callback,
1199 detailed_progress_callback,
1200 need_embeddings,
1201 respect_gitignore,
1202 exclude_patterns, model_override,
1204 )
1205 .await?;
1206 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
1207 tracing::info!(
1208 "Index updated: {} files indexed, {} orphaned files removed",
1209 stats.files_indexed,
1210 stats.orphaned_files_removed
1211 );
1212 }
1213 return Ok(());
1214 }
1215
1216 if path.is_file() {
1219 use ck_index::index_file;
1221 index_file(path, need_embeddings).await?;
1222 } else {
1223 let stats = ck_index::smart_update_index_with_detailed_progress(
1225 index_root,
1226 false,
1227 progress_callback,
1228 detailed_progress_callback,
1229 need_embeddings,
1230 respect_gitignore,
1231 exclude_patterns,
1232 model_override,
1233 )
1234 .await?;
1235 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
1236 tracing::info!(
1237 "Index updated: {} files indexed, {} orphaned files removed",
1238 stats.files_indexed,
1239 stats.orphaned_files_removed
1240 );
1241 }
1242 }
1243
1244 Ok(())
1245}
1246
1247fn get_context_preview(lines: &[String], line_idx: usize, options: &SearchOptions) -> String {
1248 let before = options.before_context_lines.max(options.context_lines);
1249 let after = options.after_context_lines.max(options.context_lines);
1250
1251 if before > 0 || after > 0 {
1252 let start_idx = line_idx.saturating_sub(before);
1253 let end_idx = (line_idx + after + 1).min(lines.len());
1254 lines[start_idx..end_idx].join("\n")
1255 } else {
1256 lines[line_idx].to_string()
1257 }
1258}
1259
1260fn extract_code_sections(file_path: &Path, content: &str) -> Option<Vec<(usize, usize, String)>> {
1261 let lang = ck_core::Language::from_path(file_path)?;
1262
1263 if let Ok(chunks) = ck_chunk::chunk_text(content, Some(lang)) {
1265 let sections: Vec<(usize, usize, String)> = chunks
1266 .into_iter()
1267 .filter(|chunk| {
1268 matches!(
1269 chunk.chunk_type,
1270 ck_chunk::ChunkType::Function
1271 | ck_chunk::ChunkType::Class
1272 | ck_chunk::ChunkType::Method
1273 )
1274 })
1275 .map(|chunk| {
1276 (
1277 chunk.span.line_start - 1, chunk.span.line_end - 1,
1279 chunk.text,
1280 )
1281 })
1282 .collect();
1283
1284 if sections.is_empty() {
1285 None
1286 } else {
1287 Some(sections)
1288 }
1289 } else {
1290 None
1291 }
1292}
1293
1294fn find_containing_section(
1295 sections: &[(usize, usize, String)],
1296 line_idx: usize,
1297) -> Option<&String> {
1298 for (start, end, text) in sections {
1299 if line_idx >= *start && line_idx <= *end {
1300 return Some(text);
1301 }
1302 }
1303 None
1304}
1305
1306#[cfg(test)]
1307mod tests {
1308 use super::*;
1309 use std::fs;
1310 use tempfile::TempDir;
1311
1312 fn create_test_files(dir: &std::path::Path) -> Vec<PathBuf> {
1313 let files = vec![
1314 ("test1.txt", "hello world rust programming"),
1315 ("test2.rs", "fn main() { println!(\"Hello Rust\"); }"),
1316 ("test3.py", "print('Hello Python')"),
1317 ("test4.txt", "machine learning artificial intelligence"),
1318 ];
1319
1320 let mut paths = Vec::new();
1321 for (name, content) in files {
1322 let path = dir.join(name);
1323 fs::write(&path, content).unwrap();
1324 paths.push(path);
1325 }
1326 paths
1327 }
1328
1329 #[test]
1330 fn test_extract_lines_from_file() {
1331 let temp_dir = TempDir::new().unwrap();
1332 let test_file = temp_dir.path().join("test_lines.txt");
1333
1334 let content =
1336 "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10";
1337 fs::write(&test_file, content).unwrap();
1338
1339 let result = extract_lines_from_file(&test_file, 3, 5).unwrap();
1341 assert_eq!(result, "Line 3\nLine 4\nLine 5");
1342
1343 let result = extract_lines_from_file(&test_file, 7, 7).unwrap();
1345 assert_eq!(result, "Line 7");
1346
1347 let result = extract_lines_from_file(&test_file, 8, 100).unwrap();
1349 assert_eq!(result, "Line 8\nLine 9\nLine 10");
1350
1351 let result = extract_lines_from_file(&test_file, 0, 5).unwrap();
1353 assert_eq!(result, "");
1354
1355 let result = extract_lines_from_file(&test_file, 20, 25).unwrap();
1357 assert_eq!(result, "");
1358 }
1359
1360 #[tokio::test]
1361 async fn test_extract_content_from_span() {
1362 let temp_dir = TempDir::new().unwrap();
1363 let test_file = temp_dir.path().join("code.rs");
1364
1365 let content = "fn first() {\n println!(\"First\");\n}\n\nfn second() {\n println!(\"Second\");\n}\n\nfn third() {\n println!(\"Third\");\n}";
1367 fs::write(&test_file, content).unwrap();
1368
1369 let span = ck_core::Span {
1371 byte_start: 0, byte_end: 0, line_start: 5,
1374 line_end: 7,
1375 };
1376
1377 let result = extract_content_from_span(&test_file, &span).await.unwrap();
1378 assert_eq!(result, "fn second() {\n println!(\"Second\");\n}");
1379
1380 let span = ck_core::Span {
1382 byte_start: 0,
1383 byte_end: 0,
1384 line_start: 2,
1385 line_end: 2,
1386 };
1387
1388 let result = extract_content_from_span(&test_file, &span).await.unwrap();
1389 assert_eq!(result, " println!(\"First\");");
1390 }
1391
1392 #[test]
1393 fn test_collect_files() {
1394 let temp_dir = TempDir::new().unwrap();
1395 let test_files = create_test_files(temp_dir.path());
1396
1397 let files = collect_files(temp_dir.path(), false, &[]).unwrap();
1399 assert_eq!(files.len(), 4);
1400
1401 let files = collect_files(temp_dir.path(), true, &[]).unwrap();
1403 assert_eq!(files.len(), 4);
1404
1405 let files = collect_files(&test_files[0], false, &[]).unwrap();
1407 assert_eq!(files.len(), 1);
1408 assert_eq!(files[0], test_files[0]);
1409 }
1410
1411 #[test]
1412 fn test_regex_search() {
1413 let temp_dir = TempDir::new().unwrap();
1414 create_test_files(temp_dir.path());
1415
1416 let options = SearchOptions {
1417 mode: SearchMode::Regex,
1418 query: "rust".to_string(),
1419 path: temp_dir.path().to_path_buf(),
1420 recursive: true,
1421 ..Default::default()
1422 };
1423
1424 let results = regex_search(&options).unwrap();
1425 assert!(!results.is_empty());
1426
1427 let rust_matches: Vec<_> = results
1429 .iter()
1430 .filter(|r| r.preview.to_lowercase().contains("rust"))
1431 .collect();
1432 assert!(!rust_matches.is_empty());
1433 }
1434
1435 #[test]
1436 fn test_regex_search_case_insensitive() {
1437 let temp_dir = TempDir::new().unwrap();
1438 create_test_files(temp_dir.path());
1439
1440 let options = SearchOptions {
1441 mode: SearchMode::Regex,
1442 query: "HELLO".to_string(),
1443 path: temp_dir.path().to_path_buf(),
1444 recursive: true,
1445 case_insensitive: true,
1446 ..Default::default()
1447 };
1448
1449 let results = regex_search(&options).unwrap();
1450 assert!(!results.is_empty());
1451 }
1452
1453 #[test]
1454 fn test_regex_search_fixed_string() {
1455 let temp_dir = TempDir::new().unwrap();
1456 create_test_files(temp_dir.path());
1457
1458 let options = SearchOptions {
1459 mode: SearchMode::Regex,
1460 query: "fn main()".to_string(),
1461 path: temp_dir.path().to_path_buf(),
1462 recursive: true,
1463 fixed_string: true,
1464 ..Default::default()
1465 };
1466
1467 let results = regex_search(&options).unwrap();
1468 assert!(!results.is_empty());
1469 }
1470
1471 #[test]
1472 fn test_regex_search_whole_word() {
1473 let temp_dir = TempDir::new().unwrap();
1474 fs::write(
1475 temp_dir.path().join("word_test.txt"),
1476 "rust rusty rustacean",
1477 )
1478 .unwrap();
1479
1480 let options = SearchOptions {
1481 mode: SearchMode::Regex,
1482 query: "rust".to_string(),
1483 path: temp_dir.path().to_path_buf(),
1484 recursive: true,
1485 whole_word: true,
1486 ..Default::default()
1487 };
1488
1489 let results = regex_search(&options).unwrap();
1490 assert!(!results.is_empty());
1491 }
1493
1494 #[test]
1495 fn test_regex_search_top_k() {
1496 let temp_dir = TempDir::new().unwrap();
1497
1498 for i in 0..10 {
1500 fs::write(
1501 temp_dir.path().join(format!("file{}.txt", i)),
1502 "test content",
1503 )
1504 .unwrap();
1505 }
1506
1507 let options = SearchOptions {
1508 mode: SearchMode::Regex,
1509 query: "test".to_string(),
1510 path: temp_dir.path().to_path_buf(),
1511 recursive: true,
1512 top_k: Some(5),
1513 ..Default::default()
1514 };
1515
1516 let results = regex_search(&options).unwrap();
1517 assert!(results.len() <= 5);
1518 }
1519
1520 #[test]
1521 fn test_regex_search_span_offsets() {
1522 let temp_dir = TempDir::new().unwrap();
1524 let test_file = temp_dir.path().join("spans.txt");
1525 fs::write(&test_file, "test test test\nline two test\ntest end").unwrap();
1526
1527 let options = SearchOptions {
1528 mode: SearchMode::Regex,
1529 query: "test".to_string(),
1530 path: test_file.clone(),
1531 recursive: false,
1532 ..Default::default()
1533 };
1534
1535 let results = regex_search(&options).unwrap();
1536
1537 assert_eq!(results.len(), 5);
1539
1540 let line1_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 1).collect();
1542 assert_eq!(line1_matches.len(), 3);
1543 assert_eq!(line1_matches[0].span.byte_start, 0);
1544 assert_eq!(line1_matches[1].span.byte_start, 5);
1545 assert_eq!(line1_matches[2].span.byte_start, 10);
1546
1547 let line2_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 2).collect();
1549 assert_eq!(line2_matches.len(), 1);
1550 assert_eq!(line2_matches[0].span.byte_start, 24); let mut byte_starts: Vec<_> = results.iter().map(|r| r.span.byte_start).collect();
1554 byte_starts.sort();
1555 byte_starts.dedup();
1556 assert_eq!(byte_starts.len(), 5); }
1558
1559 #[test]
1560 fn test_search_file() {
1561 let temp_dir = TempDir::new().unwrap();
1562 let file_path = temp_dir.path().join("test.txt");
1563 fs::write(
1564 &file_path,
1565 "line 1: hello\nline 2: world\nline 3: rust programming",
1566 )
1567 .unwrap();
1568
1569 let regex = regex::Regex::new("rust").unwrap();
1570 let options = SearchOptions::default();
1571
1572 let results = search_file(®ex, &file_path, &options).unwrap();
1573 assert_eq!(results.len(), 1);
1574 assert_eq!(results[0].span.line_start, 3);
1575 assert!(results[0].preview.contains("rust"));
1576 }
1577
1578 #[test]
1579 fn test_search_file_with_context() {
1580 let temp_dir = TempDir::new().unwrap();
1581 let file_path = temp_dir.path().join("test.txt");
1582 fs::write(&file_path, "line 1\nline 2\ntarget line\nline 4\nline 5").unwrap();
1583
1584 let regex = regex::Regex::new("target").unwrap();
1585 let options = SearchOptions {
1586 context_lines: 1,
1587 ..Default::default()
1588 };
1589
1590 let results = search_file(®ex, &file_path, &options).unwrap();
1591 assert_eq!(results.len(), 1);
1592
1593 println!("Preview: '{}'", results[0].preview);
1594
1595 assert!(results[0].preview.contains("line 2"));
1598 assert!(results[0].preview.contains("target line"));
1599 assert!(results[0].preview.contains("line 4"));
1600 }
1601
1602 #[tokio::test]
1603 async fn test_search_main_function() {
1604 let temp_dir = TempDir::new().unwrap();
1605 create_test_files(temp_dir.path());
1606
1607 let options = SearchOptions {
1608 mode: SearchMode::Regex,
1609 query: "hello".to_string(),
1610 path: temp_dir.path().to_path_buf(),
1611 recursive: true,
1612 case_insensitive: true,
1613 ..Default::default()
1614 };
1615
1616 let results = search(&options).await.unwrap();
1617 assert!(!results.is_empty());
1618 }
1619
1620 #[tokio::test]
1621 async fn test_regex_search_mixed_line_endings() {
1622 let temp_dir = TempDir::new().unwrap();
1624
1625 let test_file = temp_dir.path().join("mixed_endings.txt");
1627 let content = "line1\r\nline2\nline3\r\npattern here\nline5\r\n";
1628 std::fs::write(&test_file, content).unwrap();
1629
1630 let options = SearchOptions {
1631 mode: SearchMode::Regex,
1632 query: "pattern".to_string(),
1633 path: test_file.clone(),
1634 recursive: false,
1635 ..Default::default()
1636 };
1637
1638 let results = search(&options).await.unwrap();
1639 assert_eq!(results.len(), 1);
1640
1641 let result = &results[0];
1642 let original_content = std::fs::read_to_string(&test_file).unwrap();
1644 let pattern_start = original_content.find("pattern").unwrap();
1645
1646 assert_eq!(result.span.byte_start, pattern_start);
1647 assert_eq!(result.span.line_start, 4); }
1649
1650 #[tokio::test]
1651 async fn test_regex_search_windows_line_endings() {
1652 let temp_dir = TempDir::new().unwrap();
1654
1655 let test_file = temp_dir.path().join("windows_endings.txt");
1656 let content = "first line\r\nsecond line\r\nmatch this\r\nfourth line\r\n";
1657 std::fs::write(&test_file, content).unwrap();
1658
1659 let options = SearchOptions {
1660 mode: SearchMode::Regex,
1661 query: "match".to_string(),
1662 path: test_file.clone(),
1663 recursive: false,
1664 ..Default::default()
1665 };
1666
1667 let results = search(&options).await.unwrap();
1668 assert_eq!(results.len(), 1);
1669
1670 let result = &results[0];
1671
1672 assert_eq!(result.span.line_start, 3);
1674
1675 let expected_byte_start = 25; assert_eq!(result.span.byte_start, expected_byte_start);
1679 }
1680
1681 #[test]
1682 fn test_split_lines_with_endings_helper() {
1683 let unix_content = "line1\nline2\nline3\n";
1685 let (unix_lines, unix_endings) = split_lines_with_endings(unix_content);
1686 assert_eq!(unix_lines, vec!["line1", "line2", "line3"]);
1687 assert_eq!(unix_endings, vec![1, 1, 1]);
1688
1689 let windows_content = "line1\r\nline2\r\nline3\r\n";
1691 let (windows_lines, windows_endings) = split_lines_with_endings(windows_content);
1692 assert_eq!(windows_lines, vec!["line1", "line2", "line3"]);
1693 assert_eq!(windows_endings, vec![2, 2, 2]);
1694
1695 let mac_content = "line1\rline2\rline3\r";
1697 let (mac_lines, mac_endings) = split_lines_with_endings(mac_content);
1698 assert_eq!(mac_lines, vec!["line1", "line2", "line3"]);
1699 assert_eq!(mac_endings, vec![1, 1, 1]);
1700
1701 let mixed_content = "line1\nline2\r\nline3\r";
1703 let (mixed_lines, mixed_endings) = split_lines_with_endings(mixed_content);
1704 assert_eq!(mixed_lines, vec!["line1", "line2", "line3"]);
1705 assert_eq!(mixed_endings, vec![1, 2, 1]);
1706
1707 let no_endings = "single line";
1709 let (no_lines, no_endings_vec) = split_lines_with_endings(no_endings);
1710 assert_eq!(no_lines, vec!["single line"]);
1711 assert_eq!(no_endings_vec, vec![0]);
1712 }
1713}