diff options
| author | Mariot Tsitoara <[email protected]> | 2019-07-08 03:10:00 +0200 |
|---|---|---|
| committer | Mariot Tsitoara <[email protected]> | 2019-07-08 03:10:00 +0200 |
| commit | 098579d0e361ed781cacd388a0873e045183bd4a (patch) | |
| tree | d4c155031d573aa18f2317b816b2a31ed6d99271 | |
| download | chan-downloader-098579d0e361ed781cacd388a0873e045183bd4a.tar.xz chan-downloader-098579d0e361ed781cacd388a0873e045183bd4a.zip | |
initial commit
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | Cargo.toml | 18 | ||||
| -rw-r--r-- | LICENSE | 21 | ||||
| -rw-r--r-- | README.md | 16 | ||||
| -rw-r--r-- | src/cli.yml | 12 | ||||
| -rw-r--r-- | src/main.rs | 92 |
6 files changed, 162 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cd23ebe --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target/ +Cargo.lock +**/*.rs.bk diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..5453e6e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "4chan-downloader" +description = "CLI to download all images/webms of a 4chan thread" +version = "0.1.0" +authors = ["Mariot Tsitoara <[email protected]>"] +edition = "2018" +license = "MIT" +readme = "README.md" +homepage = "https://github.com/mariot/4chan-downloader" +repository = "https://github.com/mariot/4chan-downloader" +keywords = ["cli", "4chan", "download", "downloader", "crawler"] +categories = ["command-line-utilities"] + +[dependencies] +clap = {version = "2.32", features = ["yaml"]} +regex = "1.1.2" +reqwest = "0.9.12" +tempdir = "0.3.7" @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Mariot Tsitoara + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..6ab5456 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +4chan-downloader +================ + +CLI to download all images/webms of a 4chan thread + +``` +USAGE: + 4chan-downloader --thread <thread> + +FLAGS: + -h, --help Prints help information + -V, --version Prints version information + +OPTIONS: + -t, --thread <thread> URL of the thread +``` diff --git a/src/cli.yml b/src/cli.yml new file mode 100644 index 0000000..d4dead2 --- /dev/null +++ b/src/cli.yml @@ -0,0 +1,12 @@ +name: 4chan-downloader +version: "0.1.0" +author: "Mariot Tsitoara <[email protected]>" +about: CLI to download all images/webms of a 4chan thread +args: + - thread: + short: t + required: true + long: thread + value_name: thread + help: URL of the thread + takes_value: true diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e91ff7f --- /dev/null +++ b/src/main.rs @@ -0,0 +1,92 @@ +#[macro_use] +extern crate clap; +extern crate regex; +extern crate reqwest; +extern crate tempdir; + +use clap::{App, ArgMatches}; +use regex::Regex; +use reqwest::{Client, StatusCode}; +use tempdir::TempDir; + +use std::env; +use std::fs::create_dir_all; +use std::fs::File; +use std::io::copy; + + +fn main() { + let yaml = load_yaml!("cli.yml"); + let matches = App::from_yaml(yaml).get_matches(); + + let thread = matches.value_of("thread").unwrap(); + let client = Client::new(); + download_thread(thread, &matches, &client); +} + +fn load(url: &str, client: &Client) -> reqwest::Response { + client.get(url).send().unwrap() +} + +fn save_image(url: &str, name: &str, client: &Client) -> Result<(String), String> { + let tmp_dir = TempDir::new("inb4404_temp"); + let mut response = client.get(url).send().unwrap(); + + let file_name = match response.status() { + StatusCode::OK => { + let mut dest = { + tmp_dir.unwrap().path().join(name); + File::create(name).unwrap() + }; + copy(&mut response, &mut dest).unwrap(); + name + }, + StatusCode::NOT_FOUND => { + return Err(String::from("File not found")); + } + s => return Err(String::from(format!("Received response status: {:?}", s))), + }; + Ok(String::from(file_name)) +} + +fn download_thread(thread_link: &str, matches: &ArgMatches, client: &Client) { + let workpath = env::current_dir().unwrap(); + let re = Regex::new(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))").unwrap(); + + let url_vec: Vec<&str> = thread_link.split('/').collect(); + let board = url_vec[3]; + let thread_vec: Vec<&str> = url_vec[5].split('#').collect(); + let mut thread = thread_vec[0]; + + if url_vec.len() > 6 { + let thread_tmp_vec: Vec<&str> = url_vec[6].split('#').collect(); + let thread_tmp = thread_tmp_vec[0]; + + let path = workpath.join("downloads").join(board).join(thread_tmp); + + if matches.is_present("names") || path.exists() { + thread = thread_tmp; + } + } + + let directory = workpath.join("downloads").join(board).join(thread); + if !directory.exists() { + match create_dir_all(&directory) { + Ok(_) => println!("Created new directory: {}", directory.display()), + Err(err) => eprintln!("Failed to create new directory: {}", err) + } + } else { + println!("Using existing directory: {}", directory.display()) + } + + let mut thread_page = load(thread_link, client); + for cap in re.captures_iter(thread_page.text().unwrap().as_str()).step_by(2) { + let img_path = directory.join(&cap[2]); + if !img_path.exists() { + match save_image(format!("{}{}", "https:", &cap[1]).as_str(), img_path.to_str().unwrap(), client) { + Ok(name) => println!("New file: {}", name), + Err(err) => eprintln!("Error: {}", err), + } + } + } +} |