diff options
Diffstat (limited to 'src/lib.rs')
| -rw-r--r-- | src/lib.rs | 76 |
1 files changed, 43 insertions, 33 deletions
@@ -3,21 +3,15 @@ //! `chan_downloader` is a collection of utilities to //! download images/webms from a 4chan thread -#[macro_use] -extern crate lazy_static; -extern crate regex; -extern crate reqwest; - -use std::fs::File; -use std::io::{copy, Cursor}; - use log::info; -use regex::Regex; -use reqwest::Error; -use reqwest::Client; +use reqwest::{Client, Error}; +use std::{ + fs::File, + io::{copy, Cursor}, +}; pub struct Link { - pub url: String, + pub url: String, pub name: String, } @@ -28,13 +22,14 @@ pub struct Link { /// /// ``` /// use reqwest::Client; -/// use std::env; -/// use std::fs::remove_file; +/// use std::{env, fs::remove_file}; /// let client = Client::builder().user_agent("reqwest").build().unwrap(); /// let workpath = env::current_dir().unwrap().join("1489266570954.jpg"); /// let url = "https://i.4cdn.org/wg/1489266570954.jpg"; /// async { -/// let answer = chan_downloader::save_image(url, workpath.to_str().unwrap(), &client).await.unwrap(); +/// let answer = chan_downloader::save_image(url, workpath.to_str().unwrap(), &client) +/// .await +/// .unwrap(); /// assert_eq!(workpath.to_str().unwrap(), answer); /// remove_file(answer).unwrap(); /// }; @@ -45,7 +40,7 @@ pub async fn save_image(url: &str, path: &str, client: &Client) -> Result<String if response.status().is_success() { let mut dest = File::create(path).unwrap(); - let mut content = Cursor::new(response.bytes().await?); + let mut content = Cursor::new(response.bytes().await?); copy(&mut content, &mut dest).unwrap(); } info!("Saved image to: {}", path); @@ -57,19 +52,21 @@ pub async fn save_image(url: &str, path: &str, client: &Client) -> Result<String /// # Examples /// /// ``` -/// use std::io; /// use reqwest::Client; +/// use std::io; /// let client = Client::builder().user_agent("reqwest").build().unwrap(); /// let url = "https://raw.githubusercontent.com/mariot/chan-downloader/master/.gitignore"; /// async { -/// let result = chan_downloader::get_page_content(url, &client).await.unwrap(); +/// let result = chan_downloader::get_page_content(url, &client) +/// .await +/// .unwrap(); /// assert_eq!(result, "/target/\nCargo.lock\n**/*.rs.bk\n"); /// }; /// ``` pub async fn get_page_content(url: &str, client: &Client) -> Result<String, Error> { info!(target: "page_events", "Loading page: {}", url); let response = client.get(url).send().await?; - let content = response.text().await?; + let content = response.text().await?; info!("Loaded page: {}", url); Ok(content) } @@ -85,6 +82,7 @@ pub async fn get_page_content(url: &str, client: &Client) -> Result<String, Erro /// assert_eq!(board_name, "wg"); /// assert_eq!(thread_id, "6872254"); /// ``` +#[must_use] pub fn get_thread_infos(url: &str) -> (&str, &str) { info!(target: "thread_events", "Getting thread infos from: {}", url); let url_vec: Vec<&str> = url.split('/').collect(); @@ -108,7 +106,7 @@ pub fn get_thread_infos(url: &str) -> (&str, &str) { /// match chan_downloader::get_page_content(url, &client).await { /// Ok(page_string) => { /// let links_iter = chan_downloader::get_image_links(page_string.as_str()); -/// +/// /// for link in links_iter { /// println!("{} and {}", link.name, link.url); /// } @@ -117,24 +115,33 @@ pub fn get_thread_infos(url: &str) -> (&str, &str) { /// } /// }; /// ``` +#[must_use] pub fn get_image_links(page_content: &str) -> Vec<Link> { info!(target: "link_events", "Getting image links"); - lazy_static! { - static ref RE: Regex = - Regex::new(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))") - .unwrap(); - } + let reg = regex!(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))"); - let links_iter = RE.captures_iter(page_content); - let number_of_links = RE.captures_iter(page_content).count() / 2; + let links_iter = reg.captures_iter(page_content); + let number_of_links = reg.captures_iter(page_content).count() / 2; info!("Got {} image links from page", number_of_links); let mut links_v: Vec<Link> = Vec::new(); for cap in links_iter.step_by(2) { - links_v.push(Link{ url: String::from(&cap[1]), name: String::from(&cap[2]) }); + links_v.push(Link { + url: String::from(&cap[1]), + name: String::from(&cap[2]), + }); } links_v } +/// Initialize a [`Regex`] once +#[macro_export] +macro_rules! regex { + ($re:expr $(,)?) => {{ + static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new(); + RE.get_or_init(|| regex::Regex::new($re).unwrap()) + }}; +} + #[cfg(test)] mod tests { use super::*; @@ -149,10 +156,12 @@ mod tests { #[test] fn it_gets_image_links() { - let links_iter = get_image_links(" + let links_iter = get_image_links( + " <a href=\"//i.4cdn.org/wg/1489266570954.jpg\" target=\"_blank\">stickyop.jpg</a> <a href=\"//i.4cdn.org/wg/1489266570954.jpg\" target=\"_blank\">stickyop.jpg</a> - "); + ", + ); for link in links_iter { assert_eq!(link.url, "//i.4cdn.org/wg/1489266570954.jpg"); assert_eq!(link.name, "1489266570954.jpg"); @@ -171,12 +180,13 @@ mod tests { #[tokio::test] async fn it_saves_image() { use reqwest::Client; - use std::env; - use std::fs::remove_file; + use std::{env, fs::remove_file}; let client = Client::builder().user_agent("reqwest").build().unwrap(); let workpath = env::current_dir().unwrap().join("1489266570954.jpg"); let url = "https://i.4cdn.org/wg/1489266570954.jpg"; - let answer = save_image(url, workpath.to_str().unwrap(), &client).await.unwrap(); + let answer = save_image(url, workpath.to_str().unwrap(), &client) + .await + .unwrap(); assert_eq!(workpath.to_str().unwrap(), answer); remove_file(answer).unwrap(); } |