aboutsummaryrefslogtreecommitdiff
path: root/src/lib.rs
blob: 11cea2cf2798602dbb56571ec0261a74d891c170 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
//! # chan_downloader
//!
//! `chan_downloader` is a collection of utilities to
//! download images/webms from a 4chan thread

use log::info;
use reqwest::{Client, Error};
use std::{
    fs::File,
    io::{self, Cursor},
};

/// Represents a 4chan thread
#[derive(Debug)]
pub struct Thread {
    pub board: String,
    pub id:    u32,
}

#[derive(Debug)]
pub struct Link {
    pub url:  String,
    pub name: String,
}

/// Saves the image from the url to the given path.
/// Returns the path on success
///
/// # Examples
///
/// ```
/// use reqwest::Client;
/// use std::{env, fs::remove_file};
/// let client = Client::builder().user_agent("reqwest").build().unwrap();
/// let workpath = env::current_dir().unwrap().join("1489266570954.jpg");
/// let url = "https://i.4cdn.org/wg/1489266570954.jpg";
/// async {
///     let answer = chan_downloader::save_image(url, workpath.to_str().unwrap(), &client)
///         .await
///         .unwrap();
///     assert_eq!(workpath.to_str().unwrap(), answer);
///     remove_file(answer).unwrap();
/// };
/// ```
pub async fn save_image(url: &str, path: &str, client: &Client) -> Result<String, Error> {
    info!(target: "image_events", "Saving image to: {}", path);
    let response = client.get(url).send().await?;

    if response.status().is_success() {
        let mut dest = File::create(path).unwrap();
        let mut content = Cursor::new(response.bytes().await?);
        io::copy(&mut content, &mut dest).unwrap();
    }
    info!("Saved image to: {}", path);
    Ok(String::from(path))
}

/// Returns the page content from the given url.
///
/// # Examples
///
/// ```
/// use reqwest::Client;
/// use std::io;
/// let client = Client::builder().user_agent("reqwest").build().unwrap();
/// let url = "https://raw.githubusercontent.com/mariot/chan-downloader/master/.gitignore";
/// async {
///     let result = chan_downloader::get_page_content(url, &client)
///         .await
///         .unwrap();
///     assert_eq!(result, "/target/\nCargo.lock\n**/*.rs.bk\n");
/// };
/// ```
pub async fn get_page_content(url: &str, client: &Client) -> Result<String, Error> {
    info!(target: "page_events", "Loading page: {}", url);
    let response = client.get(url).send().await?;
    let content = response.text().await?;
    info!("Loaded page: {}", url);
    Ok(content)
}

/// Returns the board name and thread id.
///
/// # Examples
///
/// ```
/// let url = "https://boards.4chan.org/wg/thread/6872254";
/// let thread = chan_downloader::get_thread_info(url);
///
/// assert_eq!(thread.board, "wg");
/// assert_eq!(thread.id, 6872254);
/// ```
#[must_use]
pub fn get_thread_info(url: &str) -> Thread {
    info!(target: "thread_events", "Getting thread info from: {}", url);
    let url_vec: Vec<&str> = url.split('/').collect();
    let board_name = url_vec[3];
    let thread_vec: Vec<&str> = url_vec[5].split('#').collect();
    let thread_id = thread_vec[0];
    info!("Got thread info from: {}", url);

    Thread {
        board: board_name.to_owned(),
        id:    thread_id.parse::<u32>().expect("failed to parse thread id"),
    }
}

/// Returns the links and the number of links from a page.
/// Note that the links are doubled
///
/// # Examples
///
/// ```
/// use reqwest::Client;
/// let client = Client::builder().user_agent("reqwest").build().unwrap();
/// let url = "https://boards.4chan.org/wg/thread/6872254";
/// async {
///     match chan_downloader::get_page_content(url, &client).await {
///         Ok(page_string) => {
///             let links_iter = chan_downloader::get_image_links(page_string.as_str());
///
///             for link in links_iter {
///                 println!("{} and {}", link.name, link.url);
///             }
///         },
///         Err(err) => eprintln!("Error: {}", err),
///     }
/// };
/// ```
///
/// Sample image links:
//    - https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png
//    - https://i.4cdn.org/sp/1661019073822058.jpg
#[must_use]
pub fn get_image_links(page_content: &str) -> Vec<Link> {
    info!(target: "link_events", "Getting image links");
    let reg = regex!(
        r"(//i(?:s|mg)?(?:\d*)?\.(?:4cdn|4chan|4plebs)\.org/(?:\w+/){1,3}(?:\d+/){0,2}(\d+\.(?:jpg|png|gif|webm)))"
    );

    let links_iter = reg.captures_iter(page_content);
    let number_of_links = reg.captures_iter(page_content).count() / 2;
    info!("Got {} image links from page", number_of_links);
    let mut links_v: Vec<Link> = Vec::new();
    for cap in links_iter.step_by(2) {
        links_v.push(Link {
            url:  String::from(&cap[1]),
            name: String::from(&cap[2]),
        });
    }
    links_v
}

/// Initialize a [`Regex`] once
#[macro_export]
macro_rules! regex {
    ($re:expr $(,)?) => {{
        static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new();
        RE.get_or_init(|| regex::Regex::new($re).unwrap())
    }};
}

#[cfg(test)]
mod tests {
    use super::*;
    use reqwest::Client;

    #[test]
    fn it_gets_4chan_thread_info() {
        let url = "https://boards.4chan.org/wg/thread/6872254";
        let thread = get_thread_info(url);
        assert_eq!(thread.board, "wg");
        assert_eq!(thread.id, 6872254);
    }

    #[test]
    fn it_gets_4plebs_thread_info() {
        let url = "https://archive.4plebs.org/x/thread/32661196";
        let thread = get_thread_info(url);
        assert_eq!(thread.board, "x");
        assert_eq!(thread.id, 32661196);
    }

    #[test]
    fn it_gets_4chan_image_links() {
        let links_iter = get_image_links(
            r#"
            <a href="//i.4cdn.org/wg/1489266570954.jpg" target="_blank">stickyop.jpg</a>
            <a href="//i.4cdn.org/wg/1489266570954.jpg" target="_blank">stickyop.jpg</a>
        "#,
        );
        for link in links_iter {
            assert_eq!(link.url, "//i.4cdn.org/wg/1489266570954.jpg");
            assert_eq!(link.name, "1489266570954.jpg");
        }
    }

    #[test]
    fn it_gets_4plebs_image_links() {
        let links_iter = get_image_links(
            r#"
            <a href="https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png" target="_blank"></a>
            <a href="https://img.4plebs.org/boards/x/image/1660/66/1660662319160984.png" target="_blank"></a>
        "#,
        );
        for link in links_iter {
            assert_eq!(link.url, "//img.4plebs.org/boards/x/image/1660/66/1660662319160984.png");
            assert_eq!(link.name, "1660662319160984.png");
        }
    }

    #[tokio::test]
    async fn it_gets_page_content() {
        let client = Client::builder().user_agent("reqwest").build().unwrap();
        let url = "https://raw.githubusercontent.com/mariot/chan-downloader/master/.gitignore";
        let result = get_page_content(url, &client).await.unwrap();
        assert_eq!(result, "/target/\nCargo.lock\n**/*.rs.bk\n.idea/");
    }

    #[tokio::test]
    async fn it_saves_4chan_image() {
        use std::{env, fs};
        let client = Client::builder().user_agent("reqwest").build().unwrap();
        let workpath = env::current_dir().unwrap().join("1489266570954.jpg");
        let url = "https://i.4cdn.org/wg/1489266570954.jpg";
        let answer = save_image(url, workpath.to_str().unwrap(), &client)
            .await
            .unwrap();
        assert_eq!(workpath.to_str().unwrap(), answer);
        fs::remove_file(answer).unwrap();
    }

    #[tokio::test]
    async fn it_saves_4plebs_image() {
        use std::{env, fs};
        let client = Client::builder().user_agent("reqwest").build().unwrap();
        let workpath = env::current_dir().unwrap().join("1614942709612.jpg");
        let url = "https://img.4plebs.org/boards/x/image/1614/94/1614942709612.jpg";
        let answer = save_image(url, workpath.to_str().unwrap(), &client)
            .await
            .unwrap();
        assert_eq!(workpath.to_str().unwrap(), answer);
        fs::remove_file(answer).unwrap();
    }
}