aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLucas Burns <[email protected]>2022-08-20 21:19:42 -0500
committerLucas Burns <[email protected]>2022-08-20 21:24:03 -0500
commit71fcb8f46645bfabb34c4e53f01027d82e2df002 (patch)
tree77135da4bc059d8d7b71a15d884162a1d764a209
parentadd(flag): added a verbosity flag that will display debugging messages (diff)
downloadchan-downloader-71fcb8f46645bfabb34c4e53f01027d82e2df002.tar.xz
chan-downloader-71fcb8f46645bfabb34c4e53f01027d82e2df002.zip
add(4plebs): Support for 4plebs is now added
* Refactor: get_thread_infos => get_thread_info * Remove: --preserve-filenames until support is added
-rw-r--r--rustfmt.toml46
-rw-r--r--src/bin.rs35
-rw-r--r--src/lib.rs100
3 files changed, 144 insertions, 37 deletions
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..9d6b854
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1,46 @@
+edition = "2021"
+newline_style = "Unix"
+tab_spaces = 4
+hard_tabs = false
+unstable_features = true
+comment_width = 80
+wrap_comments = true
+normalize_comments = true
+normalize_doc_attributes = false # #[doc] -> //!
+error_on_line_overflow = true # change to fix errors
+error_on_unformatted = false
+format_code_in_doc_comments = true
+format_macro_bodies = true
+format_macro_matchers = true # $a: ident -> $a:ident
+format_strings = true
+imports_granularity = "Crate"
+imports_layout = "HorizontalVertical"
+# group_imports = "StdExternalCrate" # create 3 groups
+reorder_imports = true
+reorder_modules = true
+reorder_impl_items = true
+match_arm_blocks = false
+match_block_trailing_comma = true
+trailing_semicolon = true # continue, break, return
+overflow_delimited_expr = true
+use_field_init_shorthand = true # F { x: x } -> F { x }
+use_try_shorthand = true # try!() -> ()?
+empty_item_single_line = true # fn foo() {}
+fn_single_line = false # not fn foo() { println!() }
+where_single_line = false
+max_width = 106
+struct_field_align_threshold = 20
+struct_lit_width = 30
+struct_variant_width = 60
+combine_control_expr = true # if expr within fn call
+condense_wildcard_suffixes = true # (_, _) -> ( .. )
+merge_derives = true
+spaces_around_ranges = false # 1 .. 5 -> 1..5
+type_punctuation_density = "Wide" # S: Display+Debug=Foo -> spaces
+
+color = "Always"
+hex_literal_case = "Upper" # "Preserve"
+# remove_nested_parens = true
+
+# report_fixme = "Always"
+# report_todo = "Always"
diff --git a/src/bin.rs b/src/bin.rs
index fc6913e..e09bd3e 100644
--- a/src/bin.rs
+++ b/src/bin.rs
@@ -1,16 +1,23 @@
+// TODO: Implement --preserve-filenames
+// This would preserve the filenames that are given to the files on the
+// given website. It can be accomplished, by using their API.
+// Example API URLs:
+// 4plebs: https://archive.4plebs.org/_/api/chan/thread?board=x&num=32661196
+// 4chan: https://a.4cdn.org/po/thread/570368.json
+
use futures::stream::StreamExt;
use std::{
env,
fs::create_dir_all,
+ io::Write,
path::{Path, PathBuf},
sync::{Mutex, Once},
thread,
time::{Duration, Instant},
- io::Write,
};
use anyhow::{anyhow, Context, Error, Result};
-use chan_downloader::{get_image_links, get_page_content, get_thread_infos, save_image};
+use chan_downloader::{get_image_links, get_page_content, get_thread_info, save_image};
use clap::{
crate_authors,
crate_description,
@@ -24,9 +31,8 @@ use clap::{
ValueHint,
};
use env_logger::fmt::Color as LogColor;
-use log::LevelFilter;
use indicatif::{ProgressBar, ProgressStyle};
-use log::{error, info};
+use log::{error, info, LevelFilter};
use once_cell::sync::Lazy;
use reqwest::Client;
@@ -213,9 +219,12 @@ fn create_directory(thread_link: &str, output: &str) -> Result<PathBuf> {
let workpath = env::current_dir()?;
info!("Working from {}", workpath.display());
- let (board_name, thread_id) = get_thread_infos(thread_link);
+ let thread = get_thread_info(thread_link);
- let directory = workpath.join(output).join(board_name).join(thread_id);
+ let directory = workpath
+ .join(output)
+ .join(thread.board)
+ .join(format!("{}", thread.id));
if !directory.exists() {
match create_dir_all(&directory) {
Ok(_) => {
@@ -269,13 +278,13 @@ fn build_app() -> Command<'static> {
.value_hint(ValueHint::DirPath)
.help("Output directory (Default is 'downloads')"),
)
- .arg(
- Arg::new("preserve_filenames")
- .short('p')
- .long("preserve-filenames")
- .takes_value(false)
- .help("Preserve the filenames that are found on 4chan/4plebs"),
- )
+ // .arg(
+ // Arg::new("preserve_filenames")
+ // .short('p')
+ // .long("preserve-filenames")
+ // .takes_value(false)
+ // .help("Preserve the filenames that are found on 4chan/4plebs"),
+ // )
.arg(
Arg::new("reload")
.short('r')
diff --git a/src/lib.rs b/src/lib.rs
index 7200307..de43b93 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,9 +7,17 @@ use log::info;
use reqwest::{Client, Error};
use std::{
fs::File,
- io::{copy, Cursor},
+ io::{self, Cursor},
};
+/// Represents a 4chan thread
+#[derive(Debug)]
+pub struct Thread {
+ pub board: String,
+ pub id: u32,
+}
+
+#[derive(Debug)]
pub struct Link {
pub url: String,
pub name: String,
@@ -41,7 +49,7 @@ pub async fn save_image(url: &str, path: &str, client: &Client) -> Result<String
if response.status().is_success() {
let mut dest = File::create(path).unwrap();
let mut content = Cursor::new(response.bytes().await?);
- copy(&mut content, &mut dest).unwrap();
+ io::copy(&mut content, &mut dest).unwrap();
}
info!("Saved image to: {}", path);
Ok(String::from(path))
@@ -77,20 +85,24 @@ pub async fn get_page_content(url: &str, client: &Client) -> Result<String, Erro
///
/// ```
/// let url = "https://boards.4chan.org/wg/thread/6872254";
-/// let (board_name, thread_id) = chan_downloader::get_thread_infos(url);
+/// let thread = chan_downloader::get_thread_info(url);
///
-/// assert_eq!(board_name, "wg");
-/// assert_eq!(thread_id, "6872254");
+/// assert_eq!(thread.board, "wg");
+/// assert_eq!(thread.id, 6872254);
/// ```
#[must_use]
-pub fn get_thread_infos(url: &str) -> (&str, &str) {
- info!(target: "thread_events", "Getting thread infos from: {}", url);
+pub fn get_thread_info(url: &str) -> Thread {
+ info!(target: "thread_events", "Getting thread info from: {}", url);
let url_vec: Vec<&str> = url.split('/').collect();
let board_name = url_vec[3];
let thread_vec: Vec<&str> = url_vec[5].split('#').collect();
let thread_id = thread_vec[0];
- info!("Got thread infos from: {}", url);
- (board_name, thread_id)
+ info!("Got thread info from: {}", url);
+
+ Thread {
+ board: board_name.to_owned(),
+ id: thread_id.parse::<u32>().expect("failed to parse thread id"),
+ }
}
/// Returns the links and the number of links from a page.
@@ -115,10 +127,16 @@ pub fn get_thread_infos(url: &str) -> (&str, &str) {
/// }
/// };
/// ```
+///
+/// Sample image links:
+// - https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png
+// - https://i.4cdn.org/sp/1661019073822058.jpg
#[must_use]
pub fn get_image_links(page_content: &str) -> Vec<Link> {
info!(target: "link_events", "Getting image links");
- let reg = regex!(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))");
+ let reg = regex!(
+ r"(//i(?:s|mg)?(?:\d*)?\.(?:4cdn|4chan|4plebs)\.org/(?:\w+/){1,3}(?:\d+/){0,2}(\d+\.(?:jpg|png|gif|webm)))"
+ );
let links_iter = reg.captures_iter(page_content);
let number_of_links = reg.captures_iter(page_content).count() / 2;
@@ -145,22 +163,31 @@ macro_rules! regex {
#[cfg(test)]
mod tests {
use super::*;
+ use reqwest::Client;
#[test]
- fn it_gets_thread_infos() {
+ fn it_gets_4chan_thread_info() {
let url = "https://boards.4chan.org/wg/thread/6872254";
- let (board_name, thread_id) = get_thread_infos(url);
- assert_eq!(board_name, "wg");
- assert_eq!(thread_id, "6872254");
+ let thread = get_thread_info(url);
+ assert_eq!(thread.board, "wg");
+ assert_eq!(thread.id, 6872254);
+ }
+
+ #[test]
+ fn it_gets_4plebs_thread_info() {
+ let url = "https://archive.4plebs.org/x/thread/32661196";
+ let thread = get_thread_info(url);
+ assert_eq!(thread.board, "x");
+ assert_eq!(thread.id, 32661196);
}
#[test]
- fn it_gets_image_links() {
+ fn it_gets_4chan_image_links() {
let links_iter = get_image_links(
- "
- <a href=\"//i.4cdn.org/wg/1489266570954.jpg\" target=\"_blank\">stickyop.jpg</a>
- <a href=\"//i.4cdn.org/wg/1489266570954.jpg\" target=\"_blank\">stickyop.jpg</a>
- ",
+ r#"
+ <a href="//i.4cdn.org/wg/1489266570954.jpg" target="_blank">stickyop.jpg</a>
+ <a href="//i.4cdn.org/wg/1489266570954.jpg" target="_blank">stickyop.jpg</a>
+ "#,
);
for link in links_iter {
assert_eq!(link.url, "//i.4cdn.org/wg/1489266570954.jpg");
@@ -168,9 +195,22 @@ mod tests {
}
}
+ #[test]
+ fn it_gets_4plebs_image_links() {
+ let links_iter = get_image_links(
+ r#"
+ <a href="https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png" target="_blank"></a>
+ <a href="https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png" target="_blank"></a>
+ "#,
+ );
+ for link in links_iter {
+ assert_eq!(link.url, "//img.4plebs.org/boards/x/image/1660/66/1660662319160984.png");
+ assert_eq!(link.name, "1660662319160984.png");
+ }
+ }
+
#[tokio::test]
async fn it_gets_page_content() {
- use reqwest::Client;
let client = Client::builder().user_agent("reqwest").build().unwrap();
let url = "https://raw.githubusercontent.com/mariot/chan-downloader/master/.gitignore";
let result = get_page_content(url, &client).await.unwrap();
@@ -178,9 +218,8 @@ mod tests {
}
#[tokio::test]
- async fn it_saves_image() {
- use reqwest::Client;
- use std::{env, fs::remove_file};
+ async fn it_saves_4chan_image() {
+ use std::{env, fs};
let client = Client::builder().user_agent("reqwest").build().unwrap();
let workpath = env::current_dir().unwrap().join("1489266570954.jpg");
let url = "https://i.4cdn.org/wg/1489266570954.jpg";
@@ -188,6 +227,19 @@ mod tests {
.await
.unwrap();
assert_eq!(workpath.to_str().unwrap(), answer);
- remove_file(answer).unwrap();
+ fs::remove_file(answer).unwrap();
+ }
+
+ #[tokio::test]
+ async fn it_saves_4plebs_image() {
+ use std::{env, fs};
+ let client = Client::builder().user_agent("reqwest").build().unwrap();
+ let workpath = env::current_dir().unwrap().join("1614942709612.jpg");
+ let url = "https://img.4plebs.org/boards/x/image/1614/94/1614942709612.jpg";
+ let answer = save_image(url, workpath.to_str().unwrap(), &client)
+ .await
+ .unwrap();
+ assert_eq!(workpath.to_str().unwrap(), answer);
+ fs::remove_file(answer).unwrap();
}
}