use { std::{ fmt::Write, sync::{LazyLock, Mutex}, }, tantivy::schema, tempfile::TempDir, }; const SEARCH_INDEX_SIZE: usize = 10_000_000; const SEARCH_SIZE: usize = 10; static INDEX_PATH: LazyLock> = LazyLock::new(|| Mutex::new(TempDir::new().unwrap())); static SCHEMA: LazyLock> = LazyLock::new(|| { Mutex::new({ let mut schema_builder = schema::Schema::builder(); schema_builder.add_text_field("path", schema::TEXT | schema::STORED); schema_builder.add_text_field("description", schema::TEXT | schema::STORED); schema_builder.add_text_field("content", schema::TEXT | schema::STORED); schema_builder.build() }) }); static INDEX: LazyLock> = LazyLock::new(|| { Mutex::new({ tantivy::Index::create_in_dir( &(*INDEX_PATH.lock().unwrap()), (*SCHEMA.lock().unwrap()).clone(), ) .unwrap() }) }); static INDEX_WRITER: LazyLock> = LazyLock::new(|| { Mutex::new((*INDEX.lock().unwrap()).writer(SEARCH_INDEX_SIZE).unwrap()) }); pub(super) fn module(router: &mut windmark::router::Router) { crate::route::track_mount( router, "/search", "A search engine for this Gemini capsule", |context| { let mut response = String::from( "# Search\n\n=> /search?action=go Search!\n=> /random I'm Feeling \ Lucky", ); if let Some(query) = context.url.query_pairs().next() { if query.0 == "action" && query.1 == "go" { return windmark::response::Response::input( "What would you like to search for?", ); } { let schema = (*SCHEMA.lock().unwrap()).clone(); let path = schema.get_field("path").unwrap(); let description = schema.get_field("description").unwrap(); let content = schema.get_field("content").unwrap(); let mut results = String::new(); let index = INDEX.lock().unwrap(); let searcher = index .reader_builder() .reload_policy(tantivy::ReloadPolicy::OnCommit) .try_into() .unwrap() .searcher(); let top_documents = searcher .search( &tantivy::query::QueryParser::for_index(&index, vec![ path, description, content, ]) .parse_query(&query.0) .unwrap(), &tantivy::collector::TopDocs::with_limit(SEARCH_SIZE), ) .unwrap(); for (_score, document_address) in top_documents { let retrieved_document = searcher.doc(document_address).unwrap(); macro_rules! text { ($field:ident) => {{ retrieved_document.get_first($field).unwrap().as_text().unwrap() }}; /* ($document:ident, $field:ident) => {{ * $document.get_first($field).unwrap().as_text(). * unwrap() }}; */ } let _ = write!( results, "{}", &format!("=> {} {}{}\n", text!(path), text!(description), { let mut lines = retrieved_document .get_first(content) .unwrap() .as_text() .unwrap() .lines() .skip(2); lines.next().map_or_else(String::new, |first_line| { let mut context_lines = lines.skip_while(|l| { !l.to_lowercase().contains(&query.0.to_string()) }); format!( "\n> ... {}\n> {}\n> {} ...", first_line, context_lines.next().unwrap_or(""), context_lines.next().unwrap_or("") ) }) }) ); } let _ = write!( response, "{}", &format!( "\n\nYou searched for \"{}\"!\n\n## RESULTS\n\n{}\n\nIn need of \ more results? This search engine populates its index with \ route paths and route descriptions on startup. However, route \ content isn't populated until the route is first visited. \ After a route's first visit, it is updated after every five \ minutes, at time of visit.", query.0, if results.is_empty() { "There are no results for your query...".to_string() } else { results.trim_end().to_string() }, ) ); } } crate::response::success(&response, &context) }, ); } pub fn index() { info!("spawned search indexer"); loop { let schema = (*SCHEMA.lock().unwrap()).clone(); let path = schema.get_field("path").unwrap(); let description = schema.get_field("description").unwrap(); let content = schema.get_field("content").unwrap(); let time = tokio::time::Instant::now(); let mut new = 0; { let routes = crate::route::ROUTES.lock().unwrap(); let mut index_writer = INDEX_WRITER.lock().unwrap(); index_writer.delete_all_documents().unwrap(); for (route_path, information) in routes.iter() { index_writer .add_document(tantivy::doc!( path => route_path.clone(), description => information.description.clone(), content => information.text_cache.clone() )) .unwrap(); new += 1; } index_writer.commit().unwrap(); } info!( "commit {} new items into search index in {}ms", new, time.elapsed().as_nanos() as f64 / 1_000_000.0 ); std::thread::sleep(std::time::Duration::from_secs( crate::route::CACHE_RATE, )); } }