diff options
| -rw-r--r-- | Cargo.toml | 2 | ||||
| -rw-r--r-- | src/bin.rs | 50 | ||||
| -rw-r--r-- | src/lib.rs | 158 |
3 files changed, 138 insertions, 72 deletions
@@ -12,7 +12,7 @@ keywords = ["cli", "4chan", "download", "downloader", "crawler"] categories = ["command-line-utilities"] [lib] -name = "chandownloader" +name = "chan_downloader" path = "src/lib.rs" [[bin]] @@ -1,9 +1,14 @@ #[macro_use] extern crate clap; +use std::env; +use std::fs::create_dir_all; + use clap::App; +use indicatif::{ProgressBar, ProgressStyle}; +use reqwest::Client; -use chandownloader::download_thread; +use chan_downloader::{get_image_links, get_page_content, get_thread_infos, save_image}; fn main() { let yaml = load_yaml!("cli.yml"); @@ -13,3 +18,46 @@ fn main() { let output = matches.value_of("output").unwrap_or("downloads"); download_thread(thread, &output); } + +fn download_thread(thread_link: &str, output: &str) { + let client = Client::new(); + let workpath = env::current_dir().unwrap(); + + let (board_name, thread_id) = get_thread_infos(thread_link); + + let directory = workpath.join(output).join(board_name).join(thread_id); + if !directory.exists() { + match create_dir_all(&directory) { + Ok(_) => {} + Err(err) => eprintln!("Failed to create new directory: {}", err), + } + } + + match get_page_content(thread_link, &client) { + Ok(page_string) => { + let (links_iter, number_of_links) = get_image_links(page_string.as_str()); + let pb = ProgressBar::new(number_of_links as u64); + pb.set_style(ProgressStyle::default_bar() + .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} {msg} ({eta})") + .progress_chars("#>-")); + pb.tick(); + for cap in links_iter.step_by(2) { + let img_path = directory.join(&cap[2]); + if !img_path.exists() { + match save_image( + format!("https:{}", &cap[1]).as_str(), + img_path.to_str().unwrap(), + &client, + ) { + Ok(_) => {} + Err(err) => eprintln!("Error: {}", err), + } + } + pb.set_message(&cap[2].to_string()); + pb.inc(1); + } + pb.finish_with_message("Done"); + } + Err(err) => eprintln!("Error: {}", err), + } +} @@ -1,93 +1,111 @@ +//! # chan_downloader +//! +//! `chan_downloader` is a collection of utilities to +//! download images/webms from a 4chan thread + #[macro_use] extern crate lazy_static; extern crate regex; extern crate reqwest; -use indicatif::{ProgressBar, ProgressStyle}; -use regex::Regex; -use reqwest::{Client, Error}; - -use std::env; -use std::fs::create_dir_all; use std::fs::File; use std::io::copy; -fn load(url: &str, client: &Client) -> Result<String, Error> { - let mut response = client.get(url).send()?; - Ok(response.text().unwrap()) -} +use regex::{CaptureMatches, Regex}; +use reqwest::{Client, Error}; -fn save_image(url: &str, name: &str, client: &Client) -> Result<String, Error> { +/// Saves the image from the url to the given path. +/// Returns the path on success +/// +/// # Examples +/// +/// ``` +/// use reqwest::Client; +/// use std::env; +/// let client = Client::new(); +/// let workpath = env::current_dir().unwrap(); +/// let url = "https://i.4cdn.org/wg/1489266570954.jpg"; +/// let answer = chan_downloader::save_image(url, workpath, client); +/// +/// assert_eq!(url, answer); +/// ``` +pub fn save_image(url: &str, path: &str, client: &Client) -> Result<String, Error> { let mut response = client.get(url).send()?; if response.status().is_success() { - let mut dest = File::create(name).unwrap(); + let mut dest = File::create(path).unwrap(); copy(&mut response, &mut dest).unwrap(); } - Ok(String::from(name)) + Ok(String::from(path)) } -pub fn download_thread(thread_link: &str, output: &str) { - let client = Client::new(); - let workpath = env::current_dir().unwrap(); +/// Returns the page content from the given url. +/// +/// # Examples +/// +/// ``` +/// use reqwest::Client; +/// let client = Client::new(); +/// let url = "https://boards.4chan.org/wg/thread/6872254"; +/// match chan_downloader::get_page_content(url, client) { +/// Ok(page) => println!("Content: {}", page), +/// Err(err) => eprintln!("Error: {}", err), +/// } +/// ``` +pub fn get_page_content(url: &str, client: &Client) -> Result<String, Error> { + let mut response = client.get(url).send()?; + Ok(response.text().unwrap()) +} + +/// Returns the board name and thread id. +/// +/// # Examples +/// +/// ``` +/// let url = "https://boards.4chan.org/wg/thread/6872254"; +/// let (board_name, thread_id) = chan_downloader::get_thread_infos(url); +/// +/// assert_eq!(board_name, "wg"); +/// assert_eq!(thread_id, "6872254"); +/// ``` +pub fn get_thread_infos(url: &str) -> (&str, &str) { + let url_vec: Vec<&str> = url.split('/').collect(); + let board_name = url_vec[3]; + let thread_vec: Vec<&str> = url_vec[5].split('#').collect(); + let thread_id = thread_vec[0]; + (board_name, thread_id) +} +/// Returns the links and the number of links from a page. +/// Note that the links are doubled +/// +/// # Examples +/// +/// ``` +/// use reqwest::Client; +/// let client = Client::new(); +/// let url = "https://boards.4chan.org/wg/thread/6872254"; +/// match chan_downloader::get_page_content(url, client) { +/// Ok(page_string) => { +/// let (links_iter, number_of_links) = get_image_links(page_string.as_str()); + +/// assert_eq!(number_of_links, 4); +/// +/// for cap in links_iter.step_by(2) { +/// println!("{} and {}", &cap[1], &cap[2]); +/// } +/// }, +/// Err(err) => eprintln!("Error: {}", err), +/// } +/// ``` +pub fn get_image_links(page_content: &str) -> (CaptureMatches, usize) { lazy_static! { static ref RE: Regex = Regex::new(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))") .unwrap(); } - let url_vec: Vec<&str> = thread_link.split('/').collect(); - let board = url_vec[3]; - let thread_vec: Vec<&str> = url_vec[5].split('#').collect(); - let mut thread = thread_vec[0]; - - if url_vec.len() > 6 { - let thread_tmp_vec: Vec<&str> = url_vec[6].split('#').collect(); - let thread_tmp = thread_tmp_vec[0]; - - let path = workpath.join(output).join(board).join(thread_tmp); - - if path.exists() { - thread = thread_tmp; - } - } - - let directory = workpath.join(output).join(board).join(thread); - if !directory.exists() { - match create_dir_all(&directory) { - Ok(_) => {} - Err(err) => eprintln!("Failed to create new directory: {}", err), - } - } - - match load(thread_link, &client) { - Ok(page_string) => { - let links_iter = RE.captures_iter(page_string.as_str()); - let number_of_links = RE.captures_iter(page_string.as_str()).count() / 2; - let pb = ProgressBar::new(number_of_links as u64); - pb.set_style(ProgressStyle::default_bar() - .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} {msg} ({eta})") - .progress_chars("#>-")); - - pb.tick(); - for cap in links_iter.step_by(2) { - let img_path = directory.join(&cap[2]); - if !img_path.exists() { - match save_image( - format!("{}{}", "https:", &cap[1]).as_str(), - img_path.to_str().unwrap(), - &client, - ) { - Ok(_) => {} - Err(err) => eprintln!("Error: {}", err), - } - } - pb.set_message(&cap[2].to_string()); - pb.inc(1); - } - pb.finish_with_message("Done"); - } - Err(err) => eprintln!("Error: {}", err), - } -}
\ No newline at end of file + let links_iter = RE.captures_iter(page_content); + let number_of_links = RE.captures_iter(page_content).count() / 2; + (links_iter, number_of_links) +} |