diff options
| -rw-r--r-- | rustfmt.toml | 46 | ||||
| -rw-r--r-- | src/bin.rs | 35 | ||||
| -rw-r--r-- | src/lib.rs | 100 |
3 files changed, 144 insertions, 37 deletions
diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..9d6b854 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,46 @@ +edition = "2021" +newline_style = "Unix" +tab_spaces = 4 +hard_tabs = false +unstable_features = true +comment_width = 80 +wrap_comments = true +normalize_comments = true +normalize_doc_attributes = false # #[doc] -> //! +error_on_line_overflow = true # change to fix errors +error_on_unformatted = false +format_code_in_doc_comments = true +format_macro_bodies = true +format_macro_matchers = true # $a: ident -> $a:ident +format_strings = true +imports_granularity = "Crate" +imports_layout = "HorizontalVertical" +# group_imports = "StdExternalCrate" # create 3 groups +reorder_imports = true +reorder_modules = true +reorder_impl_items = true +match_arm_blocks = false +match_block_trailing_comma = true +trailing_semicolon = true # continue, break, return +overflow_delimited_expr = true +use_field_init_shorthand = true # F { x: x } -> F { x } +use_try_shorthand = true # try!() -> ()? +empty_item_single_line = true # fn foo() {} +fn_single_line = false # not fn foo() { println!() } +where_single_line = false +max_width = 106 +struct_field_align_threshold = 20 +struct_lit_width = 30 +struct_variant_width = 60 +combine_control_expr = true # if expr within fn call +condense_wildcard_suffixes = true # (_, _) -> ( .. ) +merge_derives = true +spaces_around_ranges = false # 1 .. 5 -> 1..5 +type_punctuation_density = "Wide" # S: Display+Debug=Foo -> spaces + +color = "Always" +hex_literal_case = "Upper" # "Preserve" +# remove_nested_parens = true + +# report_fixme = "Always" +# report_todo = "Always" @@ -1,16 +1,23 @@ +// TODO: Implement --preserve-filenames +// This would preserve the filenames that are given to the files on the +// given website. It can be accomplished, by using their API. +// Example API URLs: +// 4plebs: https://archive.4plebs.org/_/api/chan/thread?board=x&num=32661196 +// 4chan: https://a.4cdn.org/po/thread/570368.json + use futures::stream::StreamExt; use std::{ env, fs::create_dir_all, + io::Write, path::{Path, PathBuf}, sync::{Mutex, Once}, thread, time::{Duration, Instant}, - io::Write, }; use anyhow::{anyhow, Context, Error, Result}; -use chan_downloader::{get_image_links, get_page_content, get_thread_infos, save_image}; +use chan_downloader::{get_image_links, get_page_content, get_thread_info, save_image}; use clap::{ crate_authors, crate_description, @@ -24,9 +31,8 @@ use clap::{ ValueHint, }; use env_logger::fmt::Color as LogColor; -use log::LevelFilter; use indicatif::{ProgressBar, ProgressStyle}; -use log::{error, info}; +use log::{error, info, LevelFilter}; use once_cell::sync::Lazy; use reqwest::Client; @@ -213,9 +219,12 @@ fn create_directory(thread_link: &str, output: &str) -> Result<PathBuf> { let workpath = env::current_dir()?; info!("Working from {}", workpath.display()); - let (board_name, thread_id) = get_thread_infos(thread_link); + let thread = get_thread_info(thread_link); - let directory = workpath.join(output).join(board_name).join(thread_id); + let directory = workpath + .join(output) + .join(thread.board) + .join(format!("{}", thread.id)); if !directory.exists() { match create_dir_all(&directory) { Ok(_) => { @@ -269,13 +278,13 @@ fn build_app() -> Command<'static> { .value_hint(ValueHint::DirPath) .help("Output directory (Default is 'downloads')"), ) - .arg( - Arg::new("preserve_filenames") - .short('p') - .long("preserve-filenames") - .takes_value(false) - .help("Preserve the filenames that are found on 4chan/4plebs"), - ) + // .arg( + // Arg::new("preserve_filenames") + // .short('p') + // .long("preserve-filenames") + // .takes_value(false) + // .help("Preserve the filenames that are found on 4chan/4plebs"), + // ) .arg( Arg::new("reload") .short('r') @@ -7,9 +7,17 @@ use log::info; use reqwest::{Client, Error}; use std::{ fs::File, - io::{copy, Cursor}, + io::{self, Cursor}, }; +/// Represents a 4chan thread +#[derive(Debug)] +pub struct Thread { + pub board: String, + pub id: u32, +} + +#[derive(Debug)] pub struct Link { pub url: String, pub name: String, @@ -41,7 +49,7 @@ pub async fn save_image(url: &str, path: &str, client: &Client) -> Result<String if response.status().is_success() { let mut dest = File::create(path).unwrap(); let mut content = Cursor::new(response.bytes().await?); - copy(&mut content, &mut dest).unwrap(); + io::copy(&mut content, &mut dest).unwrap(); } info!("Saved image to: {}", path); Ok(String::from(path)) @@ -77,20 +85,24 @@ pub async fn get_page_content(url: &str, client: &Client) -> Result<String, Erro /// /// ``` /// let url = "https://boards.4chan.org/wg/thread/6872254"; -/// let (board_name, thread_id) = chan_downloader::get_thread_infos(url); +/// let thread = chan_downloader::get_thread_info(url); /// -/// assert_eq!(board_name, "wg"); -/// assert_eq!(thread_id, "6872254"); +/// assert_eq!(thread.board, "wg"); +/// assert_eq!(thread.id, 6872254); /// ``` #[must_use] -pub fn get_thread_infos(url: &str) -> (&str, &str) { - info!(target: "thread_events", "Getting thread infos from: {}", url); +pub fn get_thread_info(url: &str) -> Thread { + info!(target: "thread_events", "Getting thread info from: {}", url); let url_vec: Vec<&str> = url.split('/').collect(); let board_name = url_vec[3]; let thread_vec: Vec<&str> = url_vec[5].split('#').collect(); let thread_id = thread_vec[0]; - info!("Got thread infos from: {}", url); - (board_name, thread_id) + info!("Got thread info from: {}", url); + + Thread { + board: board_name.to_owned(), + id: thread_id.parse::<u32>().expect("failed to parse thread id"), + } } /// Returns the links and the number of links from a page. @@ -115,10 +127,16 @@ pub fn get_thread_infos(url: &str) -> (&str, &str) { /// } /// }; /// ``` +/// +/// Sample image links: +// - https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png +// - https://i.4cdn.org/sp/1661019073822058.jpg #[must_use] pub fn get_image_links(page_content: &str) -> Vec<Link> { info!(target: "link_events", "Getting image links"); - let reg = regex!(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))"); + let reg = regex!( + r"(//i(?:s|mg)?(?:\d*)?\.(?:4cdn|4chan|4plebs)\.org/(?:\w+/){1,3}(?:\d+/){0,2}(\d+\.(?:jpg|png|gif|webm)))" + ); let links_iter = reg.captures_iter(page_content); let number_of_links = reg.captures_iter(page_content).count() / 2; @@ -145,22 +163,31 @@ macro_rules! regex { #[cfg(test)] mod tests { use super::*; + use reqwest::Client; #[test] - fn it_gets_thread_infos() { + fn it_gets_4chan_thread_info() { let url = "https://boards.4chan.org/wg/thread/6872254"; - let (board_name, thread_id) = get_thread_infos(url); - assert_eq!(board_name, "wg"); - assert_eq!(thread_id, "6872254"); + let thread = get_thread_info(url); + assert_eq!(thread.board, "wg"); + assert_eq!(thread.id, 6872254); + } + + #[test] + fn it_gets_4plebs_thread_info() { + let url = "https://archive.4plebs.org/x/thread/32661196"; + let thread = get_thread_info(url); + assert_eq!(thread.board, "x"); + assert_eq!(thread.id, 32661196); } #[test] - fn it_gets_image_links() { + fn it_gets_4chan_image_links() { let links_iter = get_image_links( - " - <a href=\"//i.4cdn.org/wg/1489266570954.jpg\" target=\"_blank\">stickyop.jpg</a> - <a href=\"//i.4cdn.org/wg/1489266570954.jpg\" target=\"_blank\">stickyop.jpg</a> - ", + r#" + <a href="//i.4cdn.org/wg/1489266570954.jpg" target="_blank">stickyop.jpg</a> + <a href="//i.4cdn.org/wg/1489266570954.jpg" target="_blank">stickyop.jpg</a> + "#, ); for link in links_iter { assert_eq!(link.url, "//i.4cdn.org/wg/1489266570954.jpg"); @@ -168,9 +195,22 @@ mod tests { } } + #[test] + fn it_gets_4plebs_image_links() { + let links_iter = get_image_links( + r#" + <a href="https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png" target="_blank"></a> + <a href="https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png" target="_blank"></a> + "#, + ); + for link in links_iter { + assert_eq!(link.url, "//img.4plebs.org/boards/x/image/1660/66/1660662319160984.png"); + assert_eq!(link.name, "1660662319160984.png"); + } + } + #[tokio::test] async fn it_gets_page_content() { - use reqwest::Client; let client = Client::builder().user_agent("reqwest").build().unwrap(); let url = "https://raw.githubusercontent.com/mariot/chan-downloader/master/.gitignore"; let result = get_page_content(url, &client).await.unwrap(); @@ -178,9 +218,8 @@ mod tests { } #[tokio::test] - async fn it_saves_image() { - use reqwest::Client; - use std::{env, fs::remove_file}; + async fn it_saves_4chan_image() { + use std::{env, fs}; let client = Client::builder().user_agent("reqwest").build().unwrap(); let workpath = env::current_dir().unwrap().join("1489266570954.jpg"); let url = "https://i.4cdn.org/wg/1489266570954.jpg"; @@ -188,6 +227,19 @@ mod tests { .await .unwrap(); assert_eq!(workpath.to_str().unwrap(), answer); - remove_file(answer).unwrap(); + fs::remove_file(answer).unwrap(); + } + + #[tokio::test] + async fn it_saves_4plebs_image() { + use std::{env, fs}; + let client = Client::builder().user_agent("reqwest").build().unwrap(); + let workpath = env::current_dir().unwrap().join("1614942709612.jpg"); + let url = "https://img.4plebs.org/boards/x/image/1614/94/1614942709612.jpg"; + let answer = save_image(url, workpath.to_str().unwrap(), &client) + .await + .unwrap(); + assert_eq!(workpath.to_str().unwrap(), answer); + fs::remove_file(answer).unwrap(); } } |