aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMariot Tsitoara <[email protected]>2019-07-08 03:10:00 +0200
committerMariot Tsitoara <[email protected]>2019-07-08 03:10:00 +0200
commit098579d0e361ed781cacd388a0873e045183bd4a (patch)
treed4c155031d573aa18f2317b816b2a31ed6d99271
downloadchan-downloader-098579d0e361ed781cacd388a0873e045183bd4a.tar.xz
chan-downloader-098579d0e361ed781cacd388a0873e045183bd4a.zip
initial commit
-rw-r--r--.gitignore3
-rw-r--r--Cargo.toml18
-rw-r--r--LICENSE21
-rw-r--r--README.md16
-rw-r--r--src/cli.yml12
-rw-r--r--src/main.rs92
6 files changed, 162 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cd23ebe
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/target/
+Cargo.lock
+**/*.rs.bk
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..5453e6e
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "4chan-downloader"
+description = "CLI to download all images/webms of a 4chan thread"
+version = "0.1.0"
+authors = ["Mariot Tsitoara <[email protected]>"]
+edition = "2018"
+license = "MIT"
+readme = "README.md"
+homepage = "https://github.com/mariot/4chan-downloader"
+repository = "https://github.com/mariot/4chan-downloader"
+keywords = ["cli", "4chan", "download", "downloader", "crawler"]
+categories = ["command-line-utilities"]
+
+[dependencies]
+clap = {version = "2.32", features = ["yaml"]}
+regex = "1.1.2"
+reqwest = "0.9.12"
+tempdir = "0.3.7"
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..031ac3e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Mariot Tsitoara
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6ab5456
--- /dev/null
+++ b/README.md
@@ -0,0 +1,16 @@
+4chan-downloader
+================
+
+CLI to download all images/webms of a 4chan thread
+
+```
+USAGE:
+ 4chan-downloader --thread <thread>
+
+FLAGS:
+ -h, --help Prints help information
+ -V, --version Prints version information
+
+OPTIONS:
+ -t, --thread <thread> URL of the thread
+```
diff --git a/src/cli.yml b/src/cli.yml
new file mode 100644
index 0000000..d4dead2
--- /dev/null
+++ b/src/cli.yml
@@ -0,0 +1,12 @@
+name: 4chan-downloader
+version: "0.1.0"
+author: "Mariot Tsitoara <[email protected]>"
+about: CLI to download all images/webms of a 4chan thread
+args:
+ - thread:
+ short: t
+ required: true
+ long: thread
+ value_name: thread
+ help: URL of the thread
+ takes_value: true
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..e91ff7f
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,92 @@
+#[macro_use]
+extern crate clap;
+extern crate regex;
+extern crate reqwest;
+extern crate tempdir;
+
+use clap::{App, ArgMatches};
+use regex::Regex;
+use reqwest::{Client, StatusCode};
+use tempdir::TempDir;
+
+use std::env;
+use std::fs::create_dir_all;
+use std::fs::File;
+use std::io::copy;
+
+
+fn main() {
+ let yaml = load_yaml!("cli.yml");
+ let matches = App::from_yaml(yaml).get_matches();
+
+ let thread = matches.value_of("thread").unwrap();
+ let client = Client::new();
+ download_thread(thread, &matches, &client);
+}
+
+fn load(url: &str, client: &Client) -> reqwest::Response {
+ client.get(url).send().unwrap()
+}
+
+fn save_image(url: &str, name: &str, client: &Client) -> Result<(String), String> {
+ let tmp_dir = TempDir::new("inb4404_temp");
+ let mut response = client.get(url).send().unwrap();
+
+ let file_name = match response.status() {
+ StatusCode::OK => {
+ let mut dest = {
+ tmp_dir.unwrap().path().join(name);
+ File::create(name).unwrap()
+ };
+ copy(&mut response, &mut dest).unwrap();
+ name
+ },
+ StatusCode::NOT_FOUND => {
+ return Err(String::from("File not found"));
+ }
+ s => return Err(String::from(format!("Received response status: {:?}", s))),
+ };
+ Ok(String::from(file_name))
+}
+
+fn download_thread(thread_link: &str, matches: &ArgMatches, client: &Client) {
+ let workpath = env::current_dir().unwrap();
+ let re = Regex::new(r"(//i(?:s)?\d*\.(?:4cdn|4chan)\.org/\w+/(\d+\.(?:jpg|png|gif|webm)))").unwrap();
+
+ let url_vec: Vec<&str> = thread_link.split('/').collect();
+ let board = url_vec[3];
+ let thread_vec: Vec<&str> = url_vec[5].split('#').collect();
+ let mut thread = thread_vec[0];
+
+ if url_vec.len() > 6 {
+ let thread_tmp_vec: Vec<&str> = url_vec[6].split('#').collect();
+ let thread_tmp = thread_tmp_vec[0];
+
+ let path = workpath.join("downloads").join(board).join(thread_tmp);
+
+ if matches.is_present("names") || path.exists() {
+ thread = thread_tmp;
+ }
+ }
+
+ let directory = workpath.join("downloads").join(board).join(thread);
+ if !directory.exists() {
+ match create_dir_all(&directory) {
+ Ok(_) => println!("Created new directory: {}", directory.display()),
+ Err(err) => eprintln!("Failed to create new directory: {}", err)
+ }
+ } else {
+ println!("Using existing directory: {}", directory.display())
+ }
+
+ let mut thread_page = load(thread_link, client);
+ for cap in re.captures_iter(thread_page.text().unwrap().as_str()).step_by(2) {
+ let img_path = directory.join(&cap[2]);
+ if !img_path.exists() {
+ match save_image(format!("{}{}", "https:", &cap[1]).as_str(), img_path.to_str().unwrap(), client) {
+ Ok(name) => println!("New file: {}", name),
+ Err(err) => eprintln!("Error: {}", err),
+ }
+ }
+ }
+}