diff options
Diffstat (limited to 'src/lib.rs')
| -rw-r--r-- | src/lib.rs | 166 |
1 files changed, 114 insertions, 52 deletions
@@ -3,21 +3,23 @@ //! `chan_downloader` is a collection of utilities to //! download images/webms from a 4chan thread -#[macro_use] -extern crate lazy_static; -extern crate regex; -extern crate reqwest; - -use std::fs::File; -use std::io::{copy, Cursor}; - use log::info; -use regex::Regex; -use reqwest::Error; -use reqwest::Client; +use reqwest::{Client, Error}; +use std::{ + fs::File, + io::{self, Cursor}, +}; + +/// Represents a 4chan thread +#[derive(Debug)] +pub struct Thread { + pub board: String, + pub id: u32, +} +#[derive(Debug)] pub struct Link { - pub url: String, + pub url: String, pub name: String, } @@ -28,13 +30,14 @@ pub struct Link { /// /// ``` /// use reqwest::Client; -/// use std::env; -/// use std::fs::remove_file; +/// use std::{env, fs::remove_file}; /// let client = Client::builder().user_agent("reqwest").build().unwrap(); /// let workpath = env::current_dir().unwrap().join("1489266570954.jpg"); /// let url = "https://i.4cdn.org/wg/1489266570954.jpg"; /// async { -/// let answer = chan_downloader::save_image(url, workpath.to_str().unwrap(), &client).await.unwrap(); +/// let answer = chan_downloader::save_image(url, workpath.to_str().unwrap(), &client) +/// .await +/// .unwrap(); /// assert_eq!(workpath.to_str().unwrap(), answer); /// remove_file(answer).unwrap(); /// }; @@ -45,8 +48,8 @@ pub async fn save_image(url: &str, path: &str, client: &Client) -> Result<String if response.status().is_success() { let mut dest = File::create(path).unwrap(); - let mut content = Cursor::new(response.bytes().await?); - copy(&mut content, &mut dest).unwrap(); + let mut content = Cursor::new(response.bytes().await?); + io::copy(&mut content, &mut dest).unwrap(); } info!("Saved image to: {}", path); Ok(String::from(path)) @@ -57,19 +60,21 @@ pub async fn save_image(url: &str, path: &str, client: &Client) -> Result<String /// # Examples /// /// ``` -/// use std::io; /// use reqwest::Client; +/// use std::io; /// let client = Client::builder().user_agent("reqwest").build().unwrap(); /// let url = "https://raw.githubusercontent.com/mariot/chan-downloader/master/.gitignore"; /// async { -/// let result = chan_downloader::get_page_content(url, &client).await.unwrap(); +/// let result = chan_downloader::get_page_content(url, &client) +/// .await +/// .unwrap(); /// assert_eq!(result, "/target/\nCargo.lock\n**/*.rs.bk\n"); /// }; /// ``` pub async fn get_page_content(url: &str, client: &Client) -> Result<String, Error> { info!(target: "page_events", "Loading page: {}", url); let response = client.get(url).send().await?; - let content = response.text().await?; + let content = response.text().await?; info!("Loaded page: {}", url); Ok(content) } @@ -80,19 +85,24 @@ pub async fn get_page_content(url: &str, client: &Client) -> Result<String, Erro /// /// ``` /// let url = "https://boards.4chan.org/wg/thread/6872254"; -/// let (board_name, thread_id) = chan_downloader::get_thread_infos(url); +/// let thread = chan_downloader::get_thread_info(url); /// -/// assert_eq!(board_name, "wg"); -/// assert_eq!(thread_id, "6872254"); +/// assert_eq!(thread.board, "wg"); +/// assert_eq!(thread.id, 6872254); /// ``` -pub fn get_thread_infos(url: &str) -> (&str, &str) { - info!(target: "thread_events", "Getting thread infos from: {}", url); +#[must_use] +pub fn get_thread_info(url: &str) -> Thread { + info!(target: "thread_events", "Getting thread info from: {}", url); let url_vec: Vec<&str> = url.split('/').collect(); let board_name = url_vec[3]; let thread_vec: Vec<&str> = url_vec[5].split('#').collect(); let thread_id = thread_vec[0]; - info!("Got thread infos from: {}", url); - (board_name, thread_id) + info!("Got thread info from: {}", url); + + Thread { + board: board_name.to_owned(), + id: thread_id.parse::<u32>().expect("failed to parse thread id"), + } } /// Returns the links and the number of links from a page. @@ -108,7 +118,7 @@ pub fn get_thread_infos(url: &str) -> (&str, &str) { /// match chan_downloader::get_page_content(url, &client).await { /// Ok(page_string) => { /// let links_iter = chan_downloader::get_image_links(page_string.as_str()); -/// +/// /// for link in links_iter { /// println!("{} and {}", link.name, link.url); /// } @@ -117,51 +127,90 @@ pub fn get_thread_infos(url: &str) -> (&str, &str) { /// } /// }; /// ``` +/// +/// Sample image links: +// - https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png +// - https://i.4cdn.org/sp/1661019073822058.jpg +#[must_use] pub fn get_image_links(page_content: &str) -> Vec<Link> { info!(target: "link_events", "Getting image links"); - lazy_static! { - static ref RE: Regex = - Regex::new(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))") - .unwrap(); - } + let reg = regex!( + r"(//i(?:s|mg)?(?:\d*)?\.(?:4cdn|4chan|4plebs)\.org/(?:\w+/){1,3}(?:\d+/){0,2}(\d+\.(?:jpg|png|gif|webm)))" + ); - let links_iter = RE.captures_iter(page_content); - let number_of_links = RE.captures_iter(page_content).count() / 2; + let links_iter = reg.captures_iter(page_content); + let number_of_links = reg.captures_iter(page_content).count() / 2; info!("Got {} image links from page", number_of_links); let mut links_v: Vec<Link> = Vec::new(); for cap in links_iter.step_by(2) { - links_v.push(Link{ url: String::from(&cap[1]), name: String::from(&cap[2]) }); + links_v.push(Link { + url: String::from(&cap[1]), + name: String::from(&cap[2]), + }); } links_v } +/// Initialize a [`Regex`] once +#[macro_export] +macro_rules! regex { + ($re:expr $(,)?) => {{ + static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new(); + RE.get_or_init(|| regex::Regex::new($re).unwrap()) + }}; +} + #[cfg(test)] mod tests { use super::*; + use reqwest::Client; #[test] - fn it_gets_thread_infos() { + fn it_gets_4chan_thread_info() { let url = "https://boards.4chan.org/wg/thread/6872254"; - let (board_name, thread_id) = get_thread_infos(url); - assert_eq!(board_name, "wg"); - assert_eq!(thread_id, "6872254"); + let thread = get_thread_info(url); + assert_eq!(thread.board, "wg"); + assert_eq!(thread.id, 6872254); } #[test] - fn it_gets_image_links() { - let links_iter = get_image_links(" - <a href=\"//i.4cdn.org/wg/1489266570954.jpg\" target=\"_blank\">stickyop.jpg</a> - <a href=\"//i.4cdn.org/wg/1489266570954.jpg\" target=\"_blank\">stickyop.jpg</a> - "); + fn it_gets_4plebs_thread_info() { + let url = "https://archive.4plebs.org/x/thread/32661196"; + let thread = get_thread_info(url); + assert_eq!(thread.board, "x"); + assert_eq!(thread.id, 32661196); + } + + #[test] + fn it_gets_4chan_image_links() { + let links_iter = get_image_links( + r#" + <a href="//i.4cdn.org/wg/1489266570954.jpg" target="_blank">stickyop.jpg</a> + <a href="//i.4cdn.org/wg/1489266570954.jpg" target="_blank">stickyop.jpg</a> + "#, + ); for link in links_iter { assert_eq!(link.url, "//i.4cdn.org/wg/1489266570954.jpg"); assert_eq!(link.name, "1489266570954.jpg"); } } + #[test] + fn it_gets_4plebs_image_links() { + let links_iter = get_image_links( + r#" + <a href="https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png" target="_blank"></a> + <a href="https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png" target="_blank"></a> + "#, + ); + for link in links_iter { + assert_eq!(link.url, "//img.4plebs.org/boards/x/image/1660/66/1660662319160984.png"); + assert_eq!(link.name, "1660662319160984.png"); + } + } + #[tokio::test] async fn it_gets_page_content() { - use reqwest::Client; let client = Client::builder().user_agent("reqwest").build().unwrap(); let url = "https://raw.githubusercontent.com/mariot/chan-downloader/master/.gitignore"; let result = get_page_content(url, &client).await.unwrap(); @@ -169,15 +218,28 @@ mod tests { } #[tokio::test] - async fn it_saves_image() { - use reqwest::Client; - use std::env; - use std::fs::remove_file; + async fn it_saves_4chan_image() { + use std::{env, fs}; let client = Client::builder().user_agent("reqwest").build().unwrap(); let workpath = env::current_dir().unwrap().join("1489266570954.jpg"); let url = "https://i.4cdn.org/wg/1489266570954.jpg"; - let answer = save_image(url, workpath.to_str().unwrap(), &client).await.unwrap(); + let answer = save_image(url, workpath.to_str().unwrap(), &client) + .await + .unwrap(); + assert_eq!(workpath.to_str().unwrap(), answer); + fs::remove_file(answer).unwrap(); + } + + #[tokio::test] + async fn it_saves_4plebs_image() { + use std::{env, fs}; + let client = Client::builder().user_agent("reqwest").build().unwrap(); + let workpath = env::current_dir().unwrap().join("1614942709612.jpg"); + let url = "https://img.4plebs.org/boards/x/image/1614/94/1614942709612.jpg"; + let answer = save_image(url, workpath.to_str().unwrap(), &client) + .await + .unwrap(); assert_eq!(workpath.to_str().unwrap(), answer); - remove_file(answer).unwrap(); + fs::remove_file(answer).unwrap(); } } |