forked from spider-rs/spider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.rs
41 lines (33 loc) · 1.08 KB
/
scrape.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
//! `cargo run --example scrape`
extern crate env_logger;
extern crate spider;
use env_logger::Env;
use spider::tokio;
use spider::website::Website;
#[tokio::main]
async fn main() {
use std::io::{stdout, Write};
let env = Env::default()
.filter_or("RUST_LOG", "info")
.write_style_or("RUST_LOG_STYLE", "always");
env_logger::init_from_env(env);
let target = "https://jeffmendez.com";
let mut website: Website = Website::new(target);
website.configuration.respect_robots_txt = true;
website.configuration.delay = 15; // Defaults to 250 ms
website.configuration.user_agent = Some(Box::new("SpiderBot".into())); // Defaults to spider/x.y.z, where x.y.z is the library version
website.scrape().await;
let mut lock = stdout().lock();
let separator = "-".repeat(target.len());
for page in website.get_pages().unwrap().iter() {
writeln!(
lock,
"{}\n{}\n\n{}\n\n{}",
separator,
page.get_url(),
page.get_html(),
separator
)
.unwrap();
}
}