aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFuwn <[email protected]>2023-03-31 17:49:21 -0700
committerFuwn <[email protected]>2023-03-31 17:49:21 -0700
commit397ce63b0c28fbbe95b4019dd22a4a360e8a083f (patch)
tree55aedcbfa0bef25168d8eae5f9c6f8f03f1379d5
parentrefactor(src): use response macros (diff)
downloadlocus-397ce63b0c28fbbe95b4019dd22a4a360e8a083f.tar.xz
locus-397ce63b0c28fbbe95b4019dd22a4a360e8a083f.zip
feat(translate): character set detection
-rw-r--r--Cargo.toml7
-rw-r--r--src/modules/router/module.rs18
-rw-r--r--src/modules/router/translate/deepl.rs14
-rw-r--r--src/modules/router/translate/module.rs133
4 files changed, 106 insertions, 66 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 5baddf9..2713d4e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,7 +24,7 @@ tokio = { version = "1.26.0", features = ["full"] } # Asynchronous Runtime
chrono = "0.4.19" # Date and Time
pickledb = "0.5.1" # Database
tantivy = "0.19.2" # Full-text Search Engine
-windmark = { version = "0.2.4", features = [
+windmark = { version = "0.2.5", features = [
"logger",
"auto-deduce-mime",
"response-macros"
@@ -39,7 +39,10 @@ yarte = "0.15.6" # Templating Engine
tempfile = "3.3.0" # Temporary File Creation and Access
dotenv = "0.15.0" # .env File Support
once_cell = "1.13.1" # Lazy Statics
-germ = { version = "0.3.7", default-features = false, features = ["ast"] } # Gemini Tool-kit
+germ = { version = "0.3.7", default-features = false, features = [
+ "ast"
+] } # Gemini Tool-kit
+chardetng = "0.1.17" # Character Encoding Detection
[build-dependencies]
vergen = { version = "8.0.0", features = [
diff --git a/src/modules/router/module.rs b/src/modules/router/module.rs
index 1987568..dc9dae8 100644
--- a/src/modules/router/module.rs
+++ b/src/modules/router/module.rs
@@ -27,14 +27,28 @@ pub fn module(router: &mut windmark::Router) {
super::ticker::module(&context);
}));
router.set_post_route_callback(Box::new({
- |context, content| {
+ |context, response| {
info!(
"closed connection from {} to {}",
context.tcp.peer_addr().unwrap().ip(),
context.url.to_string(),
);
- super::translate::module(&context, content);
+ if let Some(language) =
+ windmark::utilities::queries_from_url(context.url).get("translate")
+ {
+ if super::translate::module(&context, response, language).is_err() {
+ response.content = format!(
+ "An error occurred during the translation process. Your language \
+ might not be supported.\n\n=> {} View Original\n\n{}",
+ context
+ .url
+ .to_string()
+ .replace(&format!("translate={language}"), ""),
+ response.content
+ );
+ }
+ }
}
}));
}
diff --git a/src/modules/router/translate/deepl.rs b/src/modules/router/translate/deepl.rs
index 00e7538..b1767e8 100644
--- a/src/modules/router/translate/deepl.rs
+++ b/src/modules/router/translate/deepl.rs
@@ -35,8 +35,11 @@ struct Translation {
pub detected_source_language: String,
}
-pub fn translate(text: &str, language: &str) -> DeepL {
- let deepl_response: DeepL = serde_json::from_str(
+pub fn translate(
+ text: &str,
+ language: &str,
+) -> Result<DeepL, serde_json::Error> {
+ let deepl_response: DeepL = serde_json::from_str({
&reqwest::blocking::Client::new()
.post("https://api-free.deepl.com/v2/translate")
.header(
@@ -47,11 +50,10 @@ pub fn translate(text: &str, language: &str) -> DeepL {
.send()
.unwrap()
.text()
- .unwrap(),
- )
- .unwrap();
+ .unwrap()
+ })?;
- deepl_response
+ Ok(deepl_response)
}
pub fn language_code_to_language_name(language_code: &str) -> String {
diff --git a/src/modules/router/translate/module.rs b/src/modules/router/translate/module.rs
index 8c3b162..0c997d3 100644
--- a/src/modules/router/translate/module.rs
+++ b/src/modules/router/translate/module.rs
@@ -19,69 +19,90 @@
use super::deepl::translate;
use crate::modules::router::translate::deepl::language_code_to_language_name;
-pub fn module(
+pub fn module<S>(
cc: &windmark::returnable::CallbackContext<'_>,
- content: &mut String,
-) {
- if let Some(language) =
- windmark::utilities::queries_from_url(cc.url).get("translate")
- {
- let lines = content
- .lines()
- .map(ToString::to_string)
- .collect::<Vec<String>>();
- let mut preformatted = false;
- let mut saved_lines = std::collections::HashMap::new();
- let mut fully_translated = Vec::new();
+ response: &mut windmark::Response,
+ language: S,
+) -> Result<(), serde_json::Error>
+where
+ S: Into<String> + AsRef<str>,
+{
+ let lines = response
+ .content
+ .lines()
+ .map(ToString::to_string)
+ .collect::<Vec<String>>();
+ let mut preformatted = false;
+ let mut saved_lines = std::collections::HashMap::new();
+ let mut fully_translated = Vec::new();
- for (i, line) in &mut lines.iter().enumerate() {
- if line == "```" {
- preformatted = !preformatted;
- }
+ for (i, line) in &mut lines.iter().enumerate() {
+ if line == "```" {
+ preformatted = !preformatted;
+ }
- if line.starts_with("=>") {
- if let Some(germ::ast::Node::Link {
- to,
- text,
- }) = germ::ast::Ast::from_string(line).inner().get(0)
- {
- saved_lines.insert(
- i,
- format!(
- "=> {to}?translate={language}{}",
- text.clone().map_or_else(
- || "".to_string(),
- |text| { format!(" {}", translate(&text, language).text()) }
- )
- ),
- );
- }
- } else if preformatted {
- saved_lines.insert(i, line.to_string());
+ if line.starts_with("=>") {
+ if let Some(germ::ast::Node::Link {
+ to,
+ text,
+ }) = germ::ast::Ast::from_string(line).inner().get(0)
+ {
+ saved_lines.insert(
+ i,
+ format!(
+ "=> {to}?translate={}{}",
+ language.as_ref(),
+ text.clone().map_or_else(
+ || "".to_string(),
+ |text| {
+ format!(
+ " {}",
+ if let Ok(text) = translate(&text, language.as_ref()) {
+ text.text()
+ } else {
+ text
+ }
+ )
+ }
+ )
+ ),
+ );
}
+ } else if preformatted {
+ saved_lines.insert(i, line.to_string());
}
+ }
- let translated = translate(content, language);
+ let translated = translate(&response.content, language.as_ref())?;
- for (i, line) in translated.text().lines().enumerate() {
- if saved_lines.contains_key(&i) {
- fully_translated.push(saved_lines.get(&i).unwrap().to_string());
- } else {
- fully_translated.push(line.to_string());
- }
+ for (i, line) in translated.text().lines().enumerate() {
+ if saved_lines.contains_key(&i) {
+ fully_translated.push(saved_lines.get(&i).unwrap().to_string());
+ } else {
+ fully_translated.push(line.to_string());
}
-
- *content = format!(
- "This content has been translated from {} to {}.\n\n=> {} View \
- Original\n\n{}",
- language_code_to_language_name(
- &translated.detected_source_language().to_lowercase()
- ),
- language_code_to_language_name(&language.to_string().to_lowercase()),
- cc.url
- .to_string()
- .replace(&format!("translate={language}"), ""),
- fully_translated.join("\n")
- );
}
+
+ response.content = format!(
+ "This content has been translated from {} to {}.\n\n=> {} View \
+ Original\n\n{}",
+ language_code_to_language_name(
+ &translated.detected_source_language().to_lowercase()
+ ),
+ language_code_to_language_name(&language.as_ref().to_lowercase()),
+ cc.url
+ .to_string()
+ .replace(&format!("translate={}", language.as_ref()), ""),
+ fully_translated.join("\n")
+ );
+ response.language = Some(language.into());
+ response.character_set = Some({
+ let mut encoding_detector = chardetng::EncodingDetector::new();
+
+ encoding_detector.feed(response.content.as_bytes(), true);
+
+ encoding_detector.guess(None, true).name().to_string()
+ });
+
+ Ok(())
}