diff options
| author | Fuwn <[email protected]> | 2022-04-18 01:56:26 -0700 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2022-04-18 01:56:26 -0700 |
| commit | 4a2f91cb02a6978401e0fe99528f43be3f8fc6a7 (patch) | |
| tree | ea3f4005a9e172109d383c7d5731b8135d26150f | |
| parent | build(rustc): bump toolchain (diff) | |
| download | locus-4a2f91cb02a6978401e0fe99528f43be3f8fc6a7.tar.xz locus-4a2f91cb02a6978401e0fe99528f43be3f8fc6a7.zip | |
feat: prepare for real search engine
| -rw-r--r-- | Cargo.toml | 6 | ||||
| -rw-r--r-- | src/main.rs | 40 | ||||
| -rw-r--r-- | src/search.rs | 89 |
3 files changed, 135 insertions, 0 deletions
@@ -44,6 +44,12 @@ rand = "0.8.5" serde = "1.0.136" serde_json = "1.0.79" +# Search +tantivy = "0.17.0" + +# Temporary Files +tempfile = "3.3.0" + [build-dependencies] # Templates yarte = "0.15.6" diff --git a/src/main.rs b/src/main.rs index edcb36d..b9ed0cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,6 +32,7 @@ mod macros; mod modules; mod route; +mod search; #[macro_use] extern crate log; @@ -40,6 +41,7 @@ use std::{collections::HashMap, lazy::SyncLazy, sync::Mutex}; use pickledb::PickleDb; use route::track_mount; +use search::{INDEX, SCHEMA}; use tokio::time::Instant; use windmark::{Response, Router}; use yarte::Template; @@ -287,5 +289,43 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { time_mount.elapsed().as_nanos() as f64 / 1_000_000.0 ); + std::thread::spawn(search::index); + + std::thread::spawn(|| { + loop { + std::thread::sleep(std::time::Duration::from_secs(1)); + + let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap(); + let description = + (*SCHEMA.lock().unwrap()).get_field("description").unwrap(); + let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap(); + + let reader = (*INDEX.lock().unwrap()) + .reader_builder() + .reload_policy(tantivy::ReloadPolicy::OnCommit) + .try_into() + .unwrap(); + let searcher = reader.searcher(); + let query_parser = tantivy::query::QueryParser::for_index( + &(*INDEX.lock().unwrap()), + vec![path, description, content], + ); + let query = query_parser.parse_query("Node.js").unwrap(); + let top_docs = searcher + .search(&query, &tantivy::collector::TopDocs::with_limit(10)) + .unwrap(); + + for (score, doc_address) in top_docs { + let retrieved_doc = searcher.doc(doc_address).unwrap(); + + println!( + "{}: {}", + score, + (*SCHEMA.lock().unwrap()).to_json(&retrieved_doc) + ); + } + } + }); + router.run().await } diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 0000000..5e5bc53 --- /dev/null +++ b/src/search.rs @@ -0,0 +1,89 @@ +// This file is part of Locus <https://github.com/gemrest/locus>. +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// SPDX-License-Identifier: GPL-3.0-only + +use std::{lazy::SyncLazy, sync::Mutex}; + +use tantivy::schema; +use tempfile::TempDir; + +pub static INDEX_PATH: SyncLazy<Mutex<TempDir>> = + SyncLazy::new(|| Mutex::new(TempDir::new().unwrap())); +pub static SCHEMA: SyncLazy<Mutex<tantivy::schema::Schema>> = + SyncLazy::new(|| { + Mutex::new({ + let mut schema_builder = schema::Schema::builder(); + + schema_builder.add_text_field("path", schema::TEXT | schema::STORED); + schema_builder + .add_text_field("description", schema::TEXT | schema::STORED); + schema_builder.add_text_field("content", schema::TEXT | schema::STORED); + + schema_builder.build() + }) + }); +pub static INDEX: SyncLazy<Mutex<tantivy::Index>> = SyncLazy::new(|| { + Mutex::new({ + tantivy::Index::create_in_dir( + &(*INDEX_PATH.lock().unwrap()), + (*SCHEMA.lock().unwrap()).clone(), + ) + .unwrap() + }) +}); +pub static INDEX_WRITER: SyncLazy<Mutex<tantivy::IndexWriter>> = + SyncLazy::new(|| { + Mutex::new((*INDEX.lock().unwrap()).writer(10_000_000).unwrap()) + }); + +pub fn index() { + loop { + let path = (*SCHEMA.lock().unwrap()).get_field("path").unwrap(); + let description = + (*SCHEMA.lock().unwrap()).get_field("description").unwrap(); + let content = (*SCHEMA.lock().unwrap()).get_field("content").unwrap(); + let time = tokio::time::Instant::now(); + let mut new = 0; + + for (route_path, information) in &(*crate::ROUTES.lock().unwrap()) { + // Pretty inefficient, but I'll figure this out later. + (*INDEX_WRITER.lock().unwrap()) + .delete_all_documents() + .unwrap(); + + (*INDEX_WRITER.lock().unwrap()) + .add_document(tantivy::doc!( + path => route_path.clone(), + description => information.description.clone(), + content => information.text_cache.clone() + )) + .unwrap(); + + new += 1; + } + + (*INDEX_WRITER.lock().unwrap()).commit().unwrap(); + + info!( + "commit {} new items into search index in {}ms", + new, + time.elapsed().as_nanos() as f64 / 1_000_000.0 + ); + + std::thread::sleep(std::time::Duration::from_secs(1)); + } +} |