diff options
| author | Fuwn <[email protected]> | 2022-04-27 01:11:01 -0700 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2022-04-27 01:11:01 -0700 |
| commit | b909b6bb55ce3bccb76891494c9e0ca2c61c6072 (patch) | |
| tree | 54ea80609e16fd004d9f13cf7a0ece5b7b83c098 /src | |
| parent | format: unclarify (diff) | |
| download | locus-b909b6bb55ce3bccb76891494c9e0ca2c61c6072.tar.xz locus-b909b6bb55ce3bccb76891494c9e0ca2c61c6072.zip | |
refactor(search): restrict search internals
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.rs | 3 | ||||
| -rw-r--r-- | src/modules/mod.rs | 2 | ||||
| -rw-r--r-- | src/modules/search.rs | 128 | ||||
| -rw-r--r-- | src/route.rs | 3 | ||||
| -rw-r--r-- | src/search.rs | 95 |
5 files changed, 3 insertions, 228 deletions
diff --git a/src/main.rs b/src/main.rs index 404ea18..0f9dd55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,7 +32,6 @@ mod macros; mod modules; mod route; -mod search; #[macro_use] extern crate log; @@ -118,7 +117,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { time_mounts("module", &mut time_mount, || stateless!(router, modules)); - std::thread::spawn(search::index); + std::thread::spawn(modules::search::index); router.run().await } diff --git a/src/modules/mod.rs b/src/modules/mod.rs index bdbaef1..7dc5262 100644 --- a/src/modules/mod.rs +++ b/src/modules/mod.rs @@ -22,7 +22,7 @@ mod multi_blog; mod random; mod remarks; mod router; -mod search; +pub mod search; mod sitemap; mod r#static; mod uptime; diff --git a/src/modules/search.rs b/src/modules/search.rs deleted file mode 100644 index b63923b..0000000 --- a/src/modules/search.rs +++ /dev/null @@ -1,128 +0,0 @@ -// This file is part of Locus <https://github.com/gemrest/locus>. -// Copyright (C) 2022-2022 Fuwn <[email protected]> -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 3. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. -// -// Copyright (C) 2022-2022 Fuwn <[email protected]> -// SPDX-License-Identifier: GPL-3.0-only - -use crate::search::{INDEX, SCHEMA}; - -pub fn module(router: &mut windmark::Router) { - crate::route::track_mount( - router, - "/search", - "A search engine for this Gemini capsule", - Box::new(|context| { - let mut response = String::from( - "# SEARCH\n\n=> /search?action=go Search!\n=> /random I'm Feeling \ - Lucky", - ); - - if let Some(query) = context.url.query_pairs().next() { - if query.0 == "action" && query.1 == "go" { - return windmark::Response::Input( - "What would you like to search for?".to_string(), - ); - } - - { - let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap(); - let description = - (*SCHEMA.lock().unwrap()).get_field("description").unwrap(); - let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap(); - let mut results = String::new(); - - let searcher = (*INDEX.lock().unwrap()) - .reader_builder() - .reload_policy(tantivy::ReloadPolicy::OnCommit) - .try_into() - .unwrap() - .searcher(); - let top_docs = searcher - .search( - &tantivy::query::QueryParser::for_index( - &(*INDEX.lock().unwrap()), - vec![path, description, content], - ) - .parse_query(&query.0.to_string()) - .unwrap(), - &tantivy::collector::TopDocs::with_limit(crate::SEARCH_SIZE), - ) - .unwrap(); - - for (_score, document_address) in top_docs { - let retrieved_document = searcher.doc(document_address).unwrap(); - - macro_rules! text { - ($field:ident) => {{ - retrieved_document - .get_first($field) - .unwrap() - .as_text() - .unwrap() - }}; - ($document:ident, $field:ident) => {{ - $document.get_first($field).unwrap().as_text().unwrap() - }}; - } - - results += - &format!("=> {} {}{}\n", text!(path), text!(description), { - let mut lines = retrieved_document - .get_first(content) - .unwrap() - .as_text() - .unwrap() - .lines() - .skip(2); - - lines.next().map_or_else( - || "".to_string(), - |first_line| { - let mut context_lines = lines.skip_while(|l| { - !l.to_lowercase().contains(&query.0.to_string()) - }); - - format!( - "\n> ... {}\n> {}\n> {} ...", - first_line, - context_lines.next().unwrap_or(""), - context_lines.next().unwrap_or("") - ) - }, - ) - }); - } - - response += &format!( - "\n\nYou searched for \"{}\"!\n\n## RESULTS\n\n{}\n\nIn need of \ - more results? This search engine populates its index with route \ - paths and route descriptions on startup. However, route content \ - isn't populated until the route is first visited. After a \ - route's first visit, it is updated after every five minutes, at \ - time of visit.", - query.0, - if results.is_empty() { - "There are no results for your query...".to_string() - } else { - results.trim_end().to_string() - }, - ); - } - } - - crate::success!(response, context) - }), - ); -} diff --git a/src/route.rs b/src/route.rs index e603513..285d049 100644 --- a/src/route.rs +++ b/src/route.rs @@ -57,8 +57,7 @@ pub fn track_mount( description: &str, handler: windmark::handler::RouteResponse, ) { - (*ROUTES.lock().unwrap()) - .insert(route.to_string(), Route::new(description)); + (*ROUTES.lock().unwrap()).insert(route.to_string(), Route::new(description)); router.mount(route, handler); } diff --git a/src/search.rs b/src/search.rs deleted file mode 100644 index 2d83819..0000000 --- a/src/search.rs +++ /dev/null @@ -1,95 +0,0 @@ -// This file is part of Locus <https://github.com/gemrest/locus>. -// Copyright (C) 2022-2022 Fuwn <[email protected]> -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 3. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. -// -// Copyright (C) 2022-2022 Fuwn <[email protected]> -// SPDX-License-Identifier: GPL-3.0-only - -use std::{lazy::SyncLazy, sync::Mutex}; - -use tantivy::schema; -use tempfile::TempDir; - -const SEARCH_INDEX_SIZE: usize = 10_000_000; - -pub static INDEX_PATH: SyncLazy<Mutex<TempDir>> = - SyncLazy::new(|| Mutex::new(TempDir::new().unwrap())); -pub static SCHEMA: SyncLazy<Mutex<schema::Schema>> = - SyncLazy::new(|| { - Mutex::new({ - let mut schema_builder = schema::Schema::builder(); - - schema_builder.add_text_field("path", schema::TEXT | schema::STORED); - schema_builder - .add_text_field("description", schema::TEXT | schema::STORED); - schema_builder.add_text_field("content", schema::TEXT | schema::STORED); - - schema_builder.build() - }) - }); -pub static INDEX: SyncLazy<Mutex<tantivy::Index>> = SyncLazy::new(|| { - Mutex::new({ - tantivy::Index::create_in_dir( - &(*INDEX_PATH.lock().unwrap()), - (*SCHEMA.lock().unwrap()).clone(), - ) - .unwrap() - }) -}); -pub static INDEX_WRITER: SyncLazy<Mutex<tantivy::IndexWriter>> = - SyncLazy::new(|| { - Mutex::new((*INDEX.lock().unwrap()).writer(SEARCH_INDEX_SIZE).unwrap()) - }); - -pub fn index() { - info!("spawned search indexer"); - - loop { - let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap(); - let description = - (*SCHEMA.lock().unwrap()).get_field("description").unwrap(); - let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap(); - let time = tokio::time::Instant::now(); - let mut new = 0; - - for (route_path, information) in &(*crate::ROUTES.lock().unwrap()) { - // Pretty inefficient, but I'll figure this out later. - (*INDEX_WRITER.lock().unwrap()) - .delete_all_documents() - .unwrap(); - - (*INDEX_WRITER.lock().unwrap()) - .add_document(tantivy::doc!( - path => route_path.clone(), - description => information.description.clone(), - content => information.text_cache.clone() - )) - .unwrap(); - - new += 1; - } - - (*INDEX_WRITER.lock().unwrap()).commit().unwrap(); - - info!( - "commit {} new items into search index in {}ms", - new, - time.elapsed().as_nanos() as f64 / 1_000_000.0 - ); - - std::thread::sleep(std::time::Duration::from_secs( - crate::route::CACHE_RATE, - )); - } -} |