diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/modules/search.rs | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/src/modules/search.rs b/src/modules/search.rs new file mode 100644 index 0000000..57f2c0c --- /dev/null +++ b/src/modules/search.rs @@ -0,0 +1,202 @@ +// This file is part of Locus <https://github.com/gemrest/locus>. +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// SPDX-License-Identifier: GPL-3.0-only + +use std::{lazy::SyncLazy, sync::Mutex}; + +use tantivy::schema; +use tempfile::TempDir; + +const SEARCH_INDEX_SIZE: usize = 10_000_000; + +static INDEX_PATH: SyncLazy<Mutex<TempDir>> = + SyncLazy::new(|| Mutex::new(TempDir::new().unwrap())); +static SCHEMA: SyncLazy<Mutex<schema::Schema>> = SyncLazy::new(|| { + Mutex::new({ + let mut schema_builder = schema::Schema::builder(); + + schema_builder.add_text_field("path", schema::TEXT | schema::STORED); + schema_builder.add_text_field("description", schema::TEXT | schema::STORED); + schema_builder.add_text_field("content", schema::TEXT | schema::STORED); + + schema_builder.build() + }) +}); +static INDEX: SyncLazy<Mutex<tantivy::Index>> = SyncLazy::new(|| { + Mutex::new({ + tantivy::Index::create_in_dir( + &(*INDEX_PATH.lock().unwrap()), + (*SCHEMA.lock().unwrap()).clone(), + ) + .unwrap() + }) +}); +static INDEX_WRITER: SyncLazy<Mutex<tantivy::IndexWriter>> = + SyncLazy::new(|| { + Mutex::new((*INDEX.lock().unwrap()).writer(SEARCH_INDEX_SIZE).unwrap()) + }); + +pub(super) fn module(router: &mut windmark::Router) { + crate::route::track_mount( + router, + "/search", + "A search engine for this Gemini capsule", + Box::new(|context| { + let mut response = String::from( + "# SEARCH\n\n=> /search?action=go Search!\n=> /random I'm Feeling \ + Lucky", + ); + + if let Some(query) = context.url.query_pairs().next() { + if query.0 == "action" && query.1 == "go" { + return windmark::Response::Input( + "What would you like to search for?".to_string(), + ); + } + + { + let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap(); + let description = + (*SCHEMA.lock().unwrap()).get_field("description").unwrap(); + let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap(); + let mut results = String::new(); + + let searcher = (*INDEX.lock().unwrap()) + .reader_builder() + .reload_policy(tantivy::ReloadPolicy::OnCommit) + .try_into() + .unwrap() + .searcher(); + let top_docs = searcher + .search( + &tantivy::query::QueryParser::for_index( + &(*INDEX.lock().unwrap()), + vec![path, description, content], + ) + .parse_query(&query.0.to_string()) + .unwrap(), + &tantivy::collector::TopDocs::with_limit(crate::SEARCH_SIZE), + ) + .unwrap(); + + for (_score, document_address) in top_docs { + let retrieved_document = searcher.doc(document_address).unwrap(); + + macro_rules! text { + ($field:ident) => {{ + retrieved_document + .get_first($field) + .unwrap() + .as_text() + .unwrap() + }}; + ($document:ident, $field:ident) => {{ + $document.get_first($field).unwrap().as_text().unwrap() + }}; + } + + results += + &format!("=> {} {}{}\n", text!(path), text!(description), { + let mut lines = retrieved_document + .get_first(content) + .unwrap() + .as_text() + .unwrap() + .lines() + .skip(2); + + lines.next().map_or_else( + || "".to_string(), + |first_line| { + let mut context_lines = lines.skip_while(|l| { + !l.to_lowercase().contains(&query.0.to_string()) + }); + + format!( + "\n> ... {}\n> {}\n> {} ...", + first_line, + context_lines.next().unwrap_or(""), + context_lines.next().unwrap_or("") + ) + }, + ) + }); + } + + response += &format!( + "\n\nYou searched for \"{}\"!\n\n## RESULTS\n\n{}\n\nIn need of \ + more results? This search engine populates its index with route \ + paths and route descriptions on startup. However, route content \ + isn't populated until the route is first visited. After a \ + route's first visit, it is updated after every five minutes, at \ + time of visit.", + query.0, + if results.is_empty() { + "There are no results for your query...".to_string() + } else { + results.trim_end().to_string() + }, + ); + } + } + + crate::success!(response, context) + }), + ); +} + +pub fn index() { + info!("spawned search indexer"); + + loop { + let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap(); + let description = + (*SCHEMA.lock().unwrap()).get_field("description").unwrap(); + let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap(); + let time = tokio::time::Instant::now(); + let mut new = 0; + + for (route_path, information) in &(*crate::ROUTES.lock().unwrap()) { + // Pretty inefficient, but I'll figure this out later. + (*INDEX_WRITER.lock().unwrap()) + .delete_all_documents() + .unwrap(); + + (*INDEX_WRITER.lock().unwrap()) + .add_document(tantivy::doc!( + path => route_path.clone(), + description => information.description.clone(), + content => information.text_cache.clone() + )) + .unwrap(); + + new += 1; + } + + (*INDEX_WRITER.lock().unwrap()).commit().unwrap(); + + info!( + "commit {} new items into search index in {}ms", + new, + time.elapsed().as_nanos() as f64 / 1_000_000.0 + ); + + std::thread::sleep(std::time::Duration::from_secs( + crate::route::CACHE_RATE, + )); + } +} |