1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
//! # chan_downloader
//!
//! `chan_downloader` is a collection of utilities to
//! download images/webms from a 4chan thread
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate reqwest;
use std::fs::File;
use std::io::copy;
use regex::{CaptureMatches, Regex};
use reqwest::{Client, Error};
/// Saves the image from the url to the given path.
/// Returns the path on success
///
/// # Examples
///
/// ```
/// use reqwest::Client;
/// use std::env;
/// let client = Client::new();
/// let workpath = env::current_dir().unwrap();
/// let url = "https://i.4cdn.org/wg/1489266570954.jpg";
/// let answer = chan_downloader::save_image(url, workpath, client);
///
/// assert_eq!(url, answer);
/// ```
pub fn save_image(url: &str, path: &str, client: &Client) -> Result<String, Error> {
let mut response = client.get(url).send()?;
if response.status().is_success() {
let mut dest = File::create(path).unwrap();
copy(&mut response, &mut dest).unwrap();
}
Ok(String::from(path))
}
/// Returns the page content from the given url.
///
/// # Examples
///
/// ```
/// use reqwest::Client;
/// let client = Client::new();
/// let url = "https://boards.4chan.org/wg/thread/6872254";
/// match chan_downloader::get_page_content(url, client) {
/// Ok(page) => println!("Content: {}", page),
/// Err(err) => eprintln!("Error: {}", err),
/// }
/// ```
pub fn get_page_content(url: &str, client: &Client) -> Result<String, Error> {
let mut response = client.get(url).send()?;
Ok(response.text().unwrap())
}
/// Returns the board name and thread id.
///
/// # Examples
///
/// ```
/// let url = "https://boards.4chan.org/wg/thread/6872254";
/// let (board_name, thread_id) = chan_downloader::get_thread_infos(url);
///
/// assert_eq!(board_name, "wg");
/// assert_eq!(thread_id, "6872254");
/// ```
pub fn get_thread_infos(url: &str) -> (&str, &str) {
let url_vec: Vec<&str> = url.split('/').collect();
let board_name = url_vec[3];
let thread_vec: Vec<&str> = url_vec[5].split('#').collect();
let thread_id = thread_vec[0];
(board_name, thread_id)
}
/// Returns the links and the number of links from a page.
/// Note that the links are doubled
///
/// # Examples
///
/// ```
/// use reqwest::Client;
/// let client = Client::new();
/// let url = "https://boards.4chan.org/wg/thread/6872254";
/// match chan_downloader::get_page_content(url, client) {
/// Ok(page_string) => {
/// let (links_iter, number_of_links) = get_image_links(page_string.as_str());
/// assert_eq!(number_of_links, 4);
///
/// for cap in links_iter.step_by(2) {
/// println!("{} and {}", &cap[1], &cap[2]);
/// }
/// },
/// Err(err) => eprintln!("Error: {}", err),
/// }
/// ```
pub fn get_image_links(page_content: &str) -> (CaptureMatches, usize) {
lazy_static! {
static ref RE: Regex =
Regex::new(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))")
.unwrap();
}
let links_iter = RE.captures_iter(page_content);
let number_of_links = RE.captures_iter(page_content).count() / 2;
(links_iter, number_of_links)
}
|