1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
|
// TODO: Implement --preserve-filenames
// This would preserve the filenames that are given to the files on the
// given website. It can be accomplished, by using their API.
// Example API URLs:
// 4plebs: https://archive.4plebs.org/_/api/chan/thread?board=x&num=32661196
// 4chan: https://a.4cdn.org/po/thread/570368.json
use futures::stream::StreamExt;
use std::{
env,
fs::create_dir_all,
io::Write,
path::{Path, PathBuf},
sync::{Mutex, Once},
thread,
time::{Duration, Instant},
};
use anyhow::{anyhow, Context, Error, Result};
use chan_downloader::{get_image_links, get_page_content, get_thread_info, save_image};
use clap::{
crate_authors,
crate_description,
crate_version,
value_parser,
AppSettings,
Arg,
ArgAction,
ColorChoice,
Command,
ValueHint,
};
use env_logger::fmt::Color as LogColor;
use indicatif::{ProgressBar, ProgressStyle};
use log::{error, info, LevelFilter};
use once_cell::sync::Lazy;
use reqwest::Client;
static DOWNLOADED_FILES: Lazy<Mutex<Vec<String>>> = Lazy::new(|| Mutex::new(Vec::new()));
/// Run `initialize_logging` one time
///
/// The place where this is used should only be ran once,
/// but this is a precaution
static ONCE: Once = Once::new();
fn main() -> Result<()> {
let matches = build_app().get_matches();
let verbosity = matches.get_one::<u8>("verbose").expect("Count always defaulted");
initialize_logging(*verbosity);
let thread = matches
.get_one::<String>("thread")
.context("failed to get 'thread' value")?;
let output = matches
.get_one::<String>("output")
.map_or_else(|| String::from("downloads"), Clone::clone);
let reload = matches.contains_id("reload");
let interval = matches.get_one::<u64>("interval").unwrap_or(&5_u64);
let limit = matches.get_one::<u64>("limit").unwrap_or(&120_u64);
let concurrent = matches.get_one::<usize>("concurrent").unwrap_or(&2_usize);
info!("Downloading images from {} to {}", thread, output);
let directory = create_directory(thread, &output)?;
let start = Instant::now();
let wait_time = Duration::from_secs(60 * interval);
let limit_time = if reload {
Duration::from_secs(60 * limit)
} else {
Duration::from_secs(0)
};
loop {
let load_start = Instant::now();
explore_thread(thread, &directory, *concurrent).unwrap();
let runtime = start.elapsed();
let load_runtime = load_start.elapsed();
if runtime > limit_time {
info!("Runtime exceeded, exiting.");
break;
};
if let Some(remaining) = wait_time.checked_sub(load_runtime) {
info!("Schedule slice has time left over; sleeping for {:?}", remaining);
thread::sleep(remaining);
}
info!("Downloader executed one more time for {:?}", load_runtime);
}
Ok(())
}
/// Initialize logging for this crate
fn initialize_logging(verbosity: u8) {
ONCE.call_once(|| {
env_logger::Builder::new()
.format_timestamp(None)
.format(|buf, record| {
let mut style = buf.style();
let level_style = match record.level() {
log::Level::Warn => style.set_color(LogColor::Yellow),
log::Level::Info => style.set_color(LogColor::Green),
log::Level::Debug => style.set_color(LogColor::Magenta),
log::Level::Trace => style.set_color(LogColor::Cyan),
log::Level::Error => style.set_color(LogColor::Red),
};
let mut style = buf.style();
let target_style = style.set_color(LogColor::Ansi256(14));
writeln!(
buf,
" {}: {} {}",
level_style.value(record.level()),
target_style.value(record.target()),
record.args()
)
})
.filter(None, match &verbosity {
1 => LevelFilter::Warn,
2 => LevelFilter::Info,
3 => LevelFilter::Debug,
4 => LevelFilter::Trace,
_ => LevelFilter::Off,
})
.init();
});
}
fn mark_as_downloaded(file: &str) -> Result<&str, &str> {
let mut db = DOWNLOADED_FILES
.lock()
.map_err(|_| "Failed to acquire MutexGuard")?;
db.push(file.to_string());
Ok(file)
}
#[tokio::main]
async fn explore_thread(thread_link: &str, directory: &Path, concurrent: usize) -> Result<(), Error> {
let start = Instant::now();
let client = Client::builder().user_agent("reqwest").build()?;
match get_page_content(thread_link, &client).await {
Ok(page_string) => {
info!("Loaded content from {}", thread_link);
let links_vec = get_image_links(page_string.as_str());
let pb = ProgressBar::new(links_vec.len() as u64);
pb.set_style(
ProgressStyle::default_bar()
.template(
"{spinner:.green.bold} [{elapsed_precise}] [{bar:40.cyan.bold/blue}] \
{pos}/{len} {msg} ({eta})",
)
.context("failed to build progress bar")?
.progress_chars("#>-"),
);
pb.tick();
let fetches = futures::stream::iter(links_vec.into_iter().map(|link| {
let client = &client;
let pb = &pb;
async move {
let img_path = directory.join(link.name);
let image_path = img_path.to_str().unwrap();
let has_been_downloaded = async {
let db = DOWNLOADED_FILES
.lock()
.map_err(|_| String::from("Failed to acquire MutexGuard"))
.unwrap();
db.contains(&String::from(image_path))
}
.await;
if has_been_downloaded {
info!("Image {} previously downloaded. Skipped", img_path.display());
} else if !img_path.exists() {
match save_image(format!("https:{}", link.url).as_str(), image_path, client).await
{
Ok(path) => {
info!("Saved image to {}", &path);
let result = mark_as_downloaded(&path).unwrap();
info!("{} added to downloaded files", result);
},
Err(err) => {
error!("Couldn't save image {}", image_path);
eprintln!("Error: {}", err);
},
}
} else {
info!("Image {} already exists. Skipped", img_path.display());
let result = mark_as_downloaded(image_path).unwrap();
info!("{} added to downloaded files", result);
}
pb.inc(1);
}
}))
.buffer_unordered(concurrent)
.collect::<Vec<()>>();
fetches.await;
pb.finish_with_message("Done");
info!("Done in {:?}", start.elapsed());
},
Err(e) => {
error!("Failed to get content from {}", thread_link);
eprintln!("Error: {}", e);
return Err(anyhow!(e));
},
}
Ok(())
}
fn create_directory(thread_link: &str, output: &str) -> Result<PathBuf> {
let workpath = env::current_dir()?;
info!("Working from {}", workpath.display());
let thread = get_thread_info(thread_link);
let directory = workpath
.join(output)
.join(thread.board)
.join(format!("{}", thread.id));
if !directory.exists() {
match create_dir_all(&directory) {
Ok(_) => {
info!("Created directory {}", directory.display());
},
Err(err) => {
error!("Failed to create new directory: {}", err);
eprintln!("Failed to create new directory: {}", err);
return Err(anyhow!(err));
},
}
}
info!("Downloaded: {} in {}", thread_link, output);
Ok(directory)
}
/// Build the command-line application
fn build_app() -> Command<'static> {
log::debug!("Building application");
Command::new("chan-downloader")
.bin_name("chan-downloader")
.version(crate_version!())
.author(crate_authors!())
.about(crate_description!())
.color(if env::var_os("NO_COLOR").is_none() {
ColorChoice::Auto
} else {
ColorChoice::Never
})
.setting(AppSettings::DeriveDisplayOrder)
.infer_long_args(true)
.dont_collapse_args_in_usage(true)
.arg(
Arg::new("thread")
.short('t')
.long("thread")
.required(true)
.takes_value(true)
.value_name("URL")
.value_parser(clap::builder::NonEmptyStringValueParser::new())
.help("URL of the thread"),
)
.arg(
Arg::new("output")
.short('o')
.long("output")
.takes_value(true)
.value_name("DIRECTORY")
.value_hint(ValueHint::DirPath)
.help("Output directory (Default is 'downloads')"),
)
// .arg(
// Arg::new("preserve_filenames")
// .short('p')
// .long("preserve-filenames")
// .takes_value(false)
// .help("Preserve the filenames that are found on 4chan/4plebs"),
// )
.arg(
Arg::new("reload")
.short('r')
.long("reload")
.takes_value(false)
.help("Reload thread every t minutes to get new images"),
)
.arg(
Arg::new("interval")
.short('i')
.long("interval")
.takes_value(true)
.value_name("INTERVAL")
.value_parser(value_parser!(u64))
.help("Time between each reload (in minutes. Default is 5)"),
)
.arg(
Arg::new("limit")
.short('l')
.long("limit")
.takes_value(true)
.value_name("LIMIT")
.value_parser(value_parser!(u64))
.help("Time limit for execution (in minutes. Default is 120)"),
)
.arg(
Arg::new("concurrent")
.short('c')
.long("concurrent")
.takes_value(true)
.value_name("NUM-REQUESTS")
.value_parser(value_parser!(usize))
.help("Number of concurrent requests (Default is 2)"),
)
.arg(
Arg::new("verbose")
.short('v')
.long("verbose")
.takes_value(false)
.hide(true)
.action(ArgAction::Count)
.help("Display debugging messages"),
)
}
#[test]
fn verify_app() {
build_app().debug_assert();
}
|