aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFuwn <[email protected]>2022-04-18 01:56:26 -0700
committerFuwn <[email protected]>2022-04-18 01:56:26 -0700
commit4a2f91cb02a6978401e0fe99528f43be3f8fc6a7 (patch)
treeea3f4005a9e172109d383c7d5731b8135d26150f /src
parentbuild(rustc): bump toolchain (diff)
downloadlocus-4a2f91cb02a6978401e0fe99528f43be3f8fc6a7.tar.xz
locus-4a2f91cb02a6978401e0fe99528f43be3f8fc6a7.zip
feat: prepare for real search engine
Diffstat (limited to 'src')
-rw-r--r--src/main.rs40
-rw-r--r--src/search.rs89
2 files changed, 129 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs
index edcb36d..b9ed0cb 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -32,6 +32,7 @@
mod macros;
mod modules;
mod route;
+mod search;
#[macro_use]
extern crate log;
@@ -40,6 +41,7 @@ use std::{collections::HashMap, lazy::SyncLazy, sync::Mutex};
use pickledb::PickleDb;
use route::track_mount;
+use search::{INDEX, SCHEMA};
use tokio::time::Instant;
use windmark::{Response, Router};
use yarte::Template;
@@ -287,5 +289,43 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
time_mount.elapsed().as_nanos() as f64 / 1_000_000.0
);
+ std::thread::spawn(search::index);
+
+ std::thread::spawn(|| {
+ loop {
+ std::thread::sleep(std::time::Duration::from_secs(1));
+
+ let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap();
+ let description =
+ (*SCHEMA.lock().unwrap()).get_field("description").unwrap();
+ let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap();
+
+ let reader = (*INDEX.lock().unwrap())
+ .reader_builder()
+ .reload_policy(tantivy::ReloadPolicy::OnCommit)
+ .try_into()
+ .unwrap();
+ let searcher = reader.searcher();
+ let query_parser = tantivy::query::QueryParser::for_index(
+ &(*INDEX.lock().unwrap()),
+ vec![path, description, content],
+ );
+ let query = query_parser.parse_query("Node.js").unwrap();
+ let top_docs = searcher
+ .search(&query, &tantivy::collector::TopDocs::with_limit(10))
+ .unwrap();
+
+ for (score, doc_address) in top_docs {
+ let retrieved_doc = searcher.doc(doc_address).unwrap();
+
+ println!(
+ "{}: {}",
+ score,
+ (*SCHEMA.lock().unwrap()).to_json(&retrieved_doc)
+ );
+ }
+ }
+ });
+
router.run().await
}
diff --git a/src/search.rs b/src/search.rs
new file mode 100644
index 0000000..5e5bc53
--- /dev/null
+++ b/src/search.rs
@@ -0,0 +1,89 @@
+// This file is part of Locus <https://github.com/gemrest/locus>.
+// Copyright (C) 2022-2022 Fuwn <[email protected]>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 3.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+// Copyright (C) 2022-2022 Fuwn <[email protected]>
+// SPDX-License-Identifier: GPL-3.0-only
+
+use std::{lazy::SyncLazy, sync::Mutex};
+
+use tantivy::schema;
+use tempfile::TempDir;
+
+pub static INDEX_PATH: SyncLazy<Mutex<TempDir>> =
+ SyncLazy::new(|| Mutex::new(TempDir::new().unwrap()));
+pub static SCHEMA: SyncLazy<Mutex<tantivy::schema::Schema>> =
+ SyncLazy::new(|| {
+ Mutex::new({
+ let mut schema_builder = schema::Schema::builder();
+
+ schema_builder.add_text_field("path", schema::TEXT | schema::STORED);
+ schema_builder
+ .add_text_field("description", schema::TEXT | schema::STORED);
+ schema_builder.add_text_field("content", schema::TEXT | schema::STORED);
+
+ schema_builder.build()
+ })
+ });
+pub static INDEX: SyncLazy<Mutex<tantivy::Index>> = SyncLazy::new(|| {
+ Mutex::new({
+ tantivy::Index::create_in_dir(
+ &(*INDEX_PATH.lock().unwrap()),
+ (*SCHEMA.lock().unwrap()).clone(),
+ )
+ .unwrap()
+ })
+});
+pub static INDEX_WRITER: SyncLazy<Mutex<tantivy::IndexWriter>> =
+ SyncLazy::new(|| {
+ Mutex::new((*INDEX.lock().unwrap()).writer(10_000_000).unwrap())
+ });
+
+pub fn index() {
+ loop {
+ let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap();
+ let description =
+ (*SCHEMA.lock().unwrap()).get_field("description").unwrap();
+ let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap();
+ let time = tokio::time::Instant::now();
+ let mut new = 0;
+
+ for (route_path, information) in &(*crate::ROUTES.lock().unwrap()) {
+ // Pretty inefficient, but I'll figure this out later.
+ (*INDEX_WRITER.lock().unwrap())
+ .delete_all_documents()
+ .unwrap();
+
+ (*INDEX_WRITER.lock().unwrap())
+ .add_document(tantivy::doc!(
+ path => route_path.clone(),
+ description => information.description.clone(),
+ content => information.text_cache.clone()
+ ))
+ .unwrap();
+
+ new += 1;
+ }
+
+ (*INDEX_WRITER.lock().unwrap()).commit().unwrap();
+
+ info!(
+ "commit {} new items into search index in {}ms",
+ new,
+ time.elapsed().as_nanos() as f64 / 1_000_000.0
+ );
+
+ std::thread::sleep(std::time::Duration::from_secs(1));
+ }
+}