aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFuwn <[email protected]>2022-04-27 01:11:01 -0700
committerFuwn <[email protected]>2022-04-27 01:11:01 -0700
commitb909b6bb55ce3bccb76891494c9e0ca2c61c6072 (patch)
tree54ea80609e16fd004d9f13cf7a0ece5b7b83c098 /src
parentformat: unclarify (diff)
downloadlocus-b909b6bb55ce3bccb76891494c9e0ca2c61c6072.tar.xz
locus-b909b6bb55ce3bccb76891494c9e0ca2c61c6072.zip
refactor(search): restrict search internals
Diffstat (limited to 'src')
-rw-r--r--src/main.rs3
-rw-r--r--src/modules/mod.rs2
-rw-r--r--src/modules/search.rs128
-rw-r--r--src/route.rs3
-rw-r--r--src/search.rs95
5 files changed, 3 insertions, 228 deletions
diff --git a/src/main.rs b/src/main.rs
index 404ea18..0f9dd55 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -32,7 +32,6 @@
mod macros;
mod modules;
mod route;
-mod search;
#[macro_use]
extern crate log;
@@ -118,7 +117,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
time_mounts("module", &mut time_mount, || stateless!(router, modules));
- std::thread::spawn(search::index);
+ std::thread::spawn(modules::search::index);
router.run().await
}
diff --git a/src/modules/mod.rs b/src/modules/mod.rs
index bdbaef1..7dc5262 100644
--- a/src/modules/mod.rs
+++ b/src/modules/mod.rs
@@ -22,7 +22,7 @@ mod multi_blog;
mod random;
mod remarks;
mod router;
-mod search;
+pub mod search;
mod sitemap;
mod r#static;
mod uptime;
diff --git a/src/modules/search.rs b/src/modules/search.rs
deleted file mode 100644
index b63923b..0000000
--- a/src/modules/search.rs
+++ /dev/null
@@ -1,128 +0,0 @@
-// This file is part of Locus <https://github.com/gemrest/locus>.
-// Copyright (C) 2022-2022 Fuwn <[email protected]>
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 3.
-//
-// This program is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program. If not, see <http://www.gnu.org/licenses/>.
-//
-// Copyright (C) 2022-2022 Fuwn <[email protected]>
-// SPDX-License-Identifier: GPL-3.0-only
-
-use crate::search::{INDEX, SCHEMA};
-
-pub fn module(router: &mut windmark::Router) {
- crate::route::track_mount(
- router,
- "/search",
- "A search engine for this Gemini capsule",
- Box::new(|context| {
- let mut response = String::from(
- "# SEARCH\n\n=> /search?action=go Search!\n=> /random I'm Feeling \
- Lucky",
- );
-
- if let Some(query) = context.url.query_pairs().next() {
- if query.0 == "action" && query.1 == "go" {
- return windmark::Response::Input(
- "What would you like to search for?".to_string(),
- );
- }
-
- {
- let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap();
- let description =
- (*SCHEMA.lock().unwrap()).get_field("description").unwrap();
- let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap();
- let mut results = String::new();
-
- let searcher = (*INDEX.lock().unwrap())
- .reader_builder()
- .reload_policy(tantivy::ReloadPolicy::OnCommit)
- .try_into()
- .unwrap()
- .searcher();
- let top_docs = searcher
- .search(
- &tantivy::query::QueryParser::for_index(
- &(*INDEX.lock().unwrap()),
- vec![path, description, content],
- )
- .parse_query(&query.0.to_string())
- .unwrap(),
- &tantivy::collector::TopDocs::with_limit(crate::SEARCH_SIZE),
- )
- .unwrap();
-
- for (_score, document_address) in top_docs {
- let retrieved_document = searcher.doc(document_address).unwrap();
-
- macro_rules! text {
- ($field:ident) => {{
- retrieved_document
- .get_first($field)
- .unwrap()
- .as_text()
- .unwrap()
- }};
- ($document:ident, $field:ident) => {{
- $document.get_first($field).unwrap().as_text().unwrap()
- }};
- }
-
- results +=
- &format!("=> {} {}{}\n", text!(path), text!(description), {
- let mut lines = retrieved_document
- .get_first(content)
- .unwrap()
- .as_text()
- .unwrap()
- .lines()
- .skip(2);
-
- lines.next().map_or_else(
- || "".to_string(),
- |first_line| {
- let mut context_lines = lines.skip_while(|l| {
- !l.to_lowercase().contains(&query.0.to_string())
- });
-
- format!(
- "\n> ... {}\n> {}\n> {} ...",
- first_line,
- context_lines.next().unwrap_or(""),
- context_lines.next().unwrap_or("")
- )
- },
- )
- });
- }
-
- response += &format!(
- "\n\nYou searched for \"{}\"!\n\n## RESULTS\n\n{}\n\nIn need of \
- more results? This search engine populates its index with route \
- paths and route descriptions on startup. However, route content \
- isn't populated until the route is first visited. After a \
- route's first visit, it is updated after every five minutes, at \
- time of visit.",
- query.0,
- if results.is_empty() {
- "There are no results for your query...".to_string()
- } else {
- results.trim_end().to_string()
- },
- );
- }
- }
-
- crate::success!(response, context)
- }),
- );
-}
diff --git a/src/route.rs b/src/route.rs
index e603513..285d049 100644
--- a/src/route.rs
+++ b/src/route.rs
@@ -57,8 +57,7 @@ pub fn track_mount(
description: &str,
handler: windmark::handler::RouteResponse,
) {
- (*ROUTES.lock().unwrap())
- .insert(route.to_string(), Route::new(description));
+ (*ROUTES.lock().unwrap()).insert(route.to_string(), Route::new(description));
router.mount(route, handler);
}
diff --git a/src/search.rs b/src/search.rs
deleted file mode 100644
index 2d83819..0000000
--- a/src/search.rs
+++ /dev/null
@@ -1,95 +0,0 @@
-// This file is part of Locus <https://github.com/gemrest/locus>.
-// Copyright (C) 2022-2022 Fuwn <[email protected]>
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 3.
-//
-// This program is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program. If not, see <http://www.gnu.org/licenses/>.
-//
-// Copyright (C) 2022-2022 Fuwn <[email protected]>
-// SPDX-License-Identifier: GPL-3.0-only
-
-use std::{lazy::SyncLazy, sync::Mutex};
-
-use tantivy::schema;
-use tempfile::TempDir;
-
-const SEARCH_INDEX_SIZE: usize = 10_000_000;
-
-pub static INDEX_PATH: SyncLazy<Mutex<TempDir>> =
- SyncLazy::new(|| Mutex::new(TempDir::new().unwrap()));
-pub static SCHEMA: SyncLazy<Mutex<schema::Schema>> =
- SyncLazy::new(|| {
- Mutex::new({
- let mut schema_builder = schema::Schema::builder();
-
- schema_builder.add_text_field("path", schema::TEXT | schema::STORED);
- schema_builder
- .add_text_field("description", schema::TEXT | schema::STORED);
- schema_builder.add_text_field("content", schema::TEXT | schema::STORED);
-
- schema_builder.build()
- })
- });
-pub static INDEX: SyncLazy<Mutex<tantivy::Index>> = SyncLazy::new(|| {
- Mutex::new({
- tantivy::Index::create_in_dir(
- &(*INDEX_PATH.lock().unwrap()),
- (*SCHEMA.lock().unwrap()).clone(),
- )
- .unwrap()
- })
-});
-pub static INDEX_WRITER: SyncLazy<Mutex<tantivy::IndexWriter>> =
- SyncLazy::new(|| {
- Mutex::new((*INDEX.lock().unwrap()).writer(SEARCH_INDEX_SIZE).unwrap())
- });
-
-pub fn index() {
- info!("spawned search indexer");
-
- loop {
- let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap();
- let description =
- (*SCHEMA.lock().unwrap()).get_field("description").unwrap();
- let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap();
- let time = tokio::time::Instant::now();
- let mut new = 0;
-
- for (route_path, information) in &(*crate::ROUTES.lock().unwrap()) {
- // Pretty inefficient, but I'll figure this out later.
- (*INDEX_WRITER.lock().unwrap())
- .delete_all_documents()
- .unwrap();
-
- (*INDEX_WRITER.lock().unwrap())
- .add_document(tantivy::doc!(
- path => route_path.clone(),
- description => information.description.clone(),
- content => information.text_cache.clone()
- ))
- .unwrap();
-
- new += 1;
- }
-
- (*INDEX_WRITER.lock().unwrap()).commit().unwrap();
-
- info!(
- "commit {} new items into search index in {}ms",
- new,
- time.elapsed().as_nanos() as f64 / 1_000_000.0
- );
-
- std::thread::sleep(std::time::Duration::from_secs(
- crate::route::CACHE_RATE,
- ));
- }
-}