aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.toml2
-rw-r--r--src/bin.rs50
-rw-r--r--src/lib.rs158
3 files changed, 138 insertions, 72 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 6442ae7..b012c88 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ keywords = ["cli", "4chan", "download", "downloader", "crawler"]
categories = ["command-line-utilities"]
[lib]
-name = "chandownloader"
+name = "chan_downloader"
path = "src/lib.rs"
[[bin]]
diff --git a/src/bin.rs b/src/bin.rs
index 79e02c3..da32445 100644
--- a/src/bin.rs
+++ b/src/bin.rs
@@ -1,9 +1,14 @@
#[macro_use]
extern crate clap;
+use std::env;
+use std::fs::create_dir_all;
+
use clap::App;
+use indicatif::{ProgressBar, ProgressStyle};
+use reqwest::Client;
-use chandownloader::download_thread;
+use chan_downloader::{get_image_links, get_page_content, get_thread_infos, save_image};
fn main() {
let yaml = load_yaml!("cli.yml");
@@ -13,3 +18,46 @@ fn main() {
let output = matches.value_of("output").unwrap_or("downloads");
download_thread(thread, &output);
}
+
+fn download_thread(thread_link: &str, output: &str) {
+ let client = Client::new();
+ let workpath = env::current_dir().unwrap();
+
+ let (board_name, thread_id) = get_thread_infos(thread_link);
+
+ let directory = workpath.join(output).join(board_name).join(thread_id);
+ if !directory.exists() {
+ match create_dir_all(&directory) {
+ Ok(_) => {}
+ Err(err) => eprintln!("Failed to create new directory: {}", err),
+ }
+ }
+
+ match get_page_content(thread_link, &client) {
+ Ok(page_string) => {
+ let (links_iter, number_of_links) = get_image_links(page_string.as_str());
+ let pb = ProgressBar::new(number_of_links as u64);
+ pb.set_style(ProgressStyle::default_bar()
+ .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} {msg} ({eta})")
+ .progress_chars("#>-"));
+ pb.tick();
+ for cap in links_iter.step_by(2) {
+ let img_path = directory.join(&cap[2]);
+ if !img_path.exists() {
+ match save_image(
+ format!("https:{}", &cap[1]).as_str(),
+ img_path.to_str().unwrap(),
+ &client,
+ ) {
+ Ok(_) => {}
+ Err(err) => eprintln!("Error: {}", err),
+ }
+ }
+ pb.set_message(&cap[2].to_string());
+ pb.inc(1);
+ }
+ pb.finish_with_message("Done");
+ }
+ Err(err) => eprintln!("Error: {}", err),
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 6bf5f16..c7f0e9f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,93 +1,111 @@
+//! # chan_downloader
+//!
+//! `chan_downloader` is a collection of utilities to
+//! download images/webms from a 4chan thread
+
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate reqwest;
-use indicatif::{ProgressBar, ProgressStyle};
-use regex::Regex;
-use reqwest::{Client, Error};
-
-use std::env;
-use std::fs::create_dir_all;
use std::fs::File;
use std::io::copy;
-fn load(url: &str, client: &Client) -> Result<String, Error> {
- let mut response = client.get(url).send()?;
- Ok(response.text().unwrap())
-}
+use regex::{CaptureMatches, Regex};
+use reqwest::{Client, Error};
-fn save_image(url: &str, name: &str, client: &Client) -> Result<String, Error> {
+/// Saves the image from the url to the given path.
+/// Returns the path on success
+///
+/// # Examples
+///
+/// ```
+/// use reqwest::Client;
+/// use std::env;
+/// let client = Client::new();
+/// let workpath = env::current_dir().unwrap();
+/// let url = "https://i.4cdn.org/wg/1489266570954.jpg";
+/// let answer = chan_downloader::save_image(url, workpath, client);
+///
+/// assert_eq!(url, answer);
+/// ```
+pub fn save_image(url: &str, path: &str, client: &Client) -> Result<String, Error> {
let mut response = client.get(url).send()?;
if response.status().is_success() {
- let mut dest = File::create(name).unwrap();
+ let mut dest = File::create(path).unwrap();
copy(&mut response, &mut dest).unwrap();
}
- Ok(String::from(name))
+ Ok(String::from(path))
}
-pub fn download_thread(thread_link: &str, output: &str) {
- let client = Client::new();
- let workpath = env::current_dir().unwrap();
+/// Returns the page content from the given url.
+///
+/// # Examples
+///
+/// ```
+/// use reqwest::Client;
+/// let client = Client::new();
+/// let url = "https://boards.4chan.org/wg/thread/6872254";
+/// match chan_downloader::get_page_content(url, client) {
+/// Ok(page) => println!("Content: {}", page),
+/// Err(err) => eprintln!("Error: {}", err),
+/// }
+/// ```
+pub fn get_page_content(url: &str, client: &Client) -> Result<String, Error> {
+ let mut response = client.get(url).send()?;
+ Ok(response.text().unwrap())
+}
+
+/// Returns the board name and thread id.
+///
+/// # Examples
+///
+/// ```
+/// let url = "https://boards.4chan.org/wg/thread/6872254";
+/// let (board_name, thread_id) = chan_downloader::get_thread_infos(url);
+///
+/// assert_eq!(board_name, "wg");
+/// assert_eq!(thread_id, "6872254");
+/// ```
+pub fn get_thread_infos(url: &str) -> (&str, &str) {
+ let url_vec: Vec<&str> = url.split('/').collect();
+ let board_name = url_vec[3];
+ let thread_vec: Vec<&str> = url_vec[5].split('#').collect();
+ let thread_id = thread_vec[0];
+ (board_name, thread_id)
+}
+/// Returns the links and the number of links from a page.
+/// Note that the links are doubled
+///
+/// # Examples
+///
+/// ```
+/// use reqwest::Client;
+/// let client = Client::new();
+/// let url = "https://boards.4chan.org/wg/thread/6872254";
+/// match chan_downloader::get_page_content(url, client) {
+/// Ok(page_string) => {
+/// let (links_iter, number_of_links) = get_image_links(page_string.as_str());
+
+/// assert_eq!(number_of_links, 4);
+///
+/// for cap in links_iter.step_by(2) {
+/// println!("{} and {}", &cap[1], &cap[2]);
+/// }
+/// },
+/// Err(err) => eprintln!("Error: {}", err),
+/// }
+/// ```
+pub fn get_image_links(page_content: &str) -> (CaptureMatches, usize) {
lazy_static! {
static ref RE: Regex =
Regex::new(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))")
.unwrap();
}
- let url_vec: Vec<&str> = thread_link.split('/').collect();
- let board = url_vec[3];
- let thread_vec: Vec<&str> = url_vec[5].split('#').collect();
- let mut thread = thread_vec[0];
-
- if url_vec.len() > 6 {
- let thread_tmp_vec: Vec<&str> = url_vec[6].split('#').collect();
- let thread_tmp = thread_tmp_vec[0];
-
- let path = workpath.join(output).join(board).join(thread_tmp);
-
- if path.exists() {
- thread = thread_tmp;
- }
- }
-
- let directory = workpath.join(output).join(board).join(thread);
- if !directory.exists() {
- match create_dir_all(&directory) {
- Ok(_) => {}
- Err(err) => eprintln!("Failed to create new directory: {}", err),
- }
- }
-
- match load(thread_link, &client) {
- Ok(page_string) => {
- let links_iter = RE.captures_iter(page_string.as_str());
- let number_of_links = RE.captures_iter(page_string.as_str()).count() / 2;
- let pb = ProgressBar::new(number_of_links as u64);
- pb.set_style(ProgressStyle::default_bar()
- .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} {msg} ({eta})")
- .progress_chars("#>-"));
-
- pb.tick();
- for cap in links_iter.step_by(2) {
- let img_path = directory.join(&cap[2]);
- if !img_path.exists() {
- match save_image(
- format!("{}{}", "https:", &cap[1]).as_str(),
- img_path.to_str().unwrap(),
- &client,
- ) {
- Ok(_) => {}
- Err(err) => eprintln!("Error: {}", err),
- }
- }
- pb.set_message(&cap[2].to_string());
- pb.inc(1);
- }
- pb.finish_with_message("Done");
- }
- Err(err) => eprintln!("Error: {}", err),
- }
-} \ No newline at end of file
+ let links_iter = RE.captures_iter(page_content);
+ let number_of_links = RE.captures_iter(page_content).count() / 2;
+ (links_iter, number_of_links)
+}