From 20355716d25b3910278009f7ca5cf236ca2a1ea2 Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1729@users.noreply.github.com> Date: Wed, 25 Oct 2023 14:49:25 +0200 Subject: [PATCH 1/3] feat: Yahoo account recovery via headless --- .vscode/settings.json | 3 + backend/README.md | 30 ++-- backend/src/check.rs | 2 - cli/src/main.rs | 14 +- core/src/smtp/connect.rs | 2 +- core/src/smtp/error.rs | 12 +- core/src/smtp/headless.rs | 69 +++++++++ core/src/smtp/mod.rs | 53 +++++-- .../smtp/outlook/{hotmail.rs => headless.rs} | 89 +++-------- core/src/smtp/outlook/mod.rs | 2 +- core/src/smtp/{yahoo.rs => yahoo/api.rs} | 78 ++-------- core/src/smtp/yahoo/headless.rs | 140 ++++++++++++++++++ core/src/smtp/yahoo/mod.rs | 73 +++++++++ core/src/util/input_output.rs | 48 ++++-- 14 files changed, 422 insertions(+), 193 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 core/src/smtp/headless.rs rename core/src/smtp/outlook/{hotmail.rs => headless.rs} (59%) rename core/src/smtp/{yahoo.rs => yahoo/api.rs} (69%) create mode 100644 core/src/smtp/yahoo/headless.rs create mode 100644 core/src/smtp/yahoo/mod.rs diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..2a5ae794a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "rust-analyzer.cargo.features": ["headless"] +} diff --git a/backend/README.md b/backend/README.md index 81a67c967..a8ea89369 100644 --- a/backend/README.md +++ b/backend/README.md @@ -48,21 +48,21 @@ Then send a `POST http://localhost:8080/v0/check_email` request with the followi These are the environment variables used to configure the HTTP server. To pass them to the Docker container, use the `-e {ENV_VAR}={VALUE}` flag. -| Env Var | Required? | Description | Default | -| ----------------------------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------- | -| `RUST_LOG` | No | One of `trace,debug,warn,error,info`. 💡 PRO TIP: `RUST_LOG=debug` is very handful for debugging purposes. | not defined | -| `RCH_ENABLE_BULK` | No | If set to `1`, then bulk verification endpoints will be added to the backend. | 0 | -| `DATABASE_URL` | Yes if `RCH_ENABLE_BULK==1` | [Bulk] Database connection string for storing results and task queue | not defined | -| `RCH_DATABASE_MAX_CONNECTIONS` | No | [Bulk] Connections created for the database pool | 5 | -| `RCH_MINIMUM_TASK_CONCURRENCY` | No | [Bulk] Minimum number of concurrent running tasks below which more tasks are fetched | 10 | -| `RCH_MAXIMUM_CONCURRENT_TASK_FETCH` | No | [Bulk] Maximum number of tasks fetched at once | 20 | -| `RCH_HTTP_HOST` | No | The host name to bind the HTTP server to. | `127.0.0.1` | -| `PORT` | No | The port to bind the HTTP server to, often populated by the cloud provider. | `8080` | -| `RCH_SENTRY_DSN` | No | If set, bug reports will be sent to this [Sentry](https://sentry.io) DSN. | not defined | -| `RCH_HEADER_SECRET` | No | If set, then all HTTP requests must have the `x-reacher-secret` header set to this value. This is used to protect the backend against public unwanted HTTP requests. | undefined | -| `RCH_FROM_EMAIL` | No | Email to use in the `` SMTP step. Can be overwritten by each API request's `from_email` field. | reacher.email@gmail.com | -| `RCH_HELLO_NAME` | No | Name to use in the `` SMTP step. Can be overwritten by each API request's `hello_name` field. | gmail.com | -| `RCH_HOTMAIL_USE_HEADLESS` | No | Set to a running WebDriver process endpoint (e.g. `http://localhost:4444`) to use a headless navigator to Hotmail's password recovery page to check Hotmail/Outlook addresses. We recommend `chromedriver` as it allows parallel requests. | not defined | +| Env Var | Required? | Description | Default | +| ----------------------------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------- | +| `RUST_LOG` | No | One of `trace,debug,warn,error,info`. 💡 PRO TIP: `RUST_LOG=debug` is very handful for debugging purposes. | not defined | +| `RCH_ENABLE_BULK` | No | If set to `1`, then bulk verification endpoints will be added to the backend. | 0 | +| `DATABASE_URL` | Yes if `RCH_ENABLE_BULK==1` | [Bulk] Database connection string for storing results and task queue | not defined | +| `RCH_DATABASE_MAX_CONNECTIONS` | No | [Bulk] Connections created for the database pool | 5 | +| `RCH_MINIMUM_TASK_CONCURRENCY` | No | [Bulk] Minimum number of concurrent running tasks below which more tasks are fetched | 10 | +| `RCH_MAXIMUM_CONCURRENT_TASK_FETCH` | No | [Bulk] Maximum number of tasks fetched at once | 20 | +| `RCH_HTTP_HOST` | No | The host name to bind the HTTP server to. | `127.0.0.1` | +| `PORT` | No | The port to bind the HTTP server to, often populated by the cloud provider. | `8080` | +| `RCH_SENTRY_DSN` | No | If set, bug reports will be sent to this [Sentry](https://sentry.io) DSN. | not defined | +| `RCH_HEADER_SECRET` | No | If set, then all HTTP requests must have the `x-reacher-secret` header set to this value. This is used to protect the backend against public unwanted HTTP requests. | undefined | +| `RCH_FROM_EMAIL` | No | Email to use in the `` SMTP step. Can be overwritten by each API request's `from_email` field. | reacher.email@gmail.com | +| `RCH_HELLO_NAME` | No | Name to use in the `` SMTP step. Can be overwritten by each API request's `hello_name` field. | gmail.com | +| `RCH_WEBDRIVER_ADDR` | No | Set to a running WebDriver process endpoint (e.g. `http://localhost:9515`) to use a headless navigator to password recovery pages to check Yahoo and Hotmail/Outlook addresses. We recommend `chromedriver` as it allows parallel requests. | not defined | ## REST API Documentation diff --git a/backend/src/check.rs b/backend/src/check.rs index b44bfe84b..dcd7cfd15 100644 --- a/backend/src/check.rs +++ b/backend/src/check.rs @@ -26,7 +26,6 @@ use super::sentry_util; /// Same as `check-if-email-exists`'s check email, but adds some additional /// inputs and error handling. pub async fn check_email(input: CheckEmailInput) -> CheckEmailOutput { - let hotmail_use_headless = env::var("RCH_HOTMAIL_USE_HEADLESS").ok(); let from_email = env::var("RCH_FROM_EMAIL").unwrap_or_else(|_| CheckEmailInput::default().from_email); let hello_name = @@ -35,7 +34,6 @@ pub async fn check_email(input: CheckEmailInput) -> CheckEmailOutput { let input = CheckEmailInput { // If we want to override core check-if-email-exists's default values // for CheckEmailInput for the backend, we do it here. - hotmail_use_headless, from_email, hello_name, ..input diff --git a/cli/src/main.rs b/cli/src/main.rs index 0b2bda120..caa82f7c1 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -58,15 +58,22 @@ pub struct Cli { #[clap(long, env, default_value = "true", parse(try_from_str))] pub yahoo_use_api: bool, + /// For Yahoo addresses, use a headless browser to connect to the + /// Yahoo account recovery page. Requires a webdriver instance + /// listening on RCH_WEBDRIVER_ADDR. + #[clap(long, env)] + pub yahoo_use_headless: bool, + /// For Gmail email addresses, use Gmail's API instead of connecting /// directly to their SMTP servers. #[clap(long, env, default_value = "false", parse(try_from_str))] pub gmail_use_api: bool, /// For Hotmail addresses, use a headless browser to connect to the - /// Microsoft account recovery page. + /// Microsoft account recovery page. Requires a webdriver instance + /// listening on RCH_WEBDRIVER_ADDR. #[clap(long, env)] - pub hotmail_use_headless: Option, + pub hotmail_use_headless: bool, /// For Microsoft 365 email addresses, use OneDrive's API instead of /// connecting directly to their SMTP servers. @@ -100,10 +107,11 @@ async fn main() -> Result<(), Box> { .set_hello_name(CONF.hello_name.clone()) .set_smtp_port(CONF.smtp_port) .set_yahoo_use_api(CONF.yahoo_use_api) + .set_yahoo_use_headless(CONF.yahoo_use_headless) .set_gmail_use_api(CONF.gmail_use_api) .set_microsoft365_use_api(CONF.microsoft365_use_api) .set_check_gravatar(CONF.check_gravatar) - .set_hotmail_use_headless(CONF.hotmail_use_headless.clone()) + .set_hotmail_use_headless(CONF.hotmail_use_headless) .set_haveibeenpwned_api_key(CONF.haveibeenpwned_api_key.clone()); if let Some(proxy_host) = &CONF.proxy_host { diff --git a/core/src/smtp/connect.rs b/core/src/smtp/connect.rs index 2a5f39cb0..ea35a9024 100644 --- a/core/src/smtp/connect.rs +++ b/core/src/smtp/connect.rs @@ -374,7 +374,7 @@ pub async fn check_smtp_with_retry( // be non-callable, as this function only deals with actual SMTP // connection errors. #[cfg(feature = "headless")] - Err(SmtpError::HotmailError(_)) => result, + Err(SmtpError::HeadlessError(_)) => result, Err(SmtpError::YahooError(_)) => result, Err(SmtpError::GmailError(_)) => result, // Only retry if the SMTP error was unknown. diff --git a/core/src/smtp/error.rs b/core/src/smtp/error.rs index 1ba1b066a..adcad624a 100644 --- a/core/src/smtp/error.rs +++ b/core/src/smtp/error.rs @@ -16,7 +16,7 @@ use super::gmail::GmailError; #[cfg(feature = "headless")] -use super::outlook::hotmail::HotmailError; +use super::headless::HeadlessError; use super::outlook::microsoft365::Microsoft365Error; use super::parser; use super::yahoo::YahooError; @@ -41,9 +41,11 @@ pub enum SmtpError { GmailError(GmailError), /// Error when verifying a Hotmail email via headless browser. #[cfg(feature = "headless")] - HotmailError(HotmailError), + HeadlessError(HeadlessError), /// Error when verifying a Microsoft 365 email via HTTP request. Microsoft365Error(Microsoft365Error), + /// Headless Navigator not running. + NoHeadlessNavigator, /// Email is in the `skipped_domains` parameter. SkippedDomain(String), } @@ -67,9 +69,9 @@ impl From for SmtpError { } #[cfg(feature = "headless")] -impl From for SmtpError { - fn from(e: HotmailError) -> Self { - SmtpError::HotmailError(e) +impl From for SmtpError { + fn from(e: HeadlessError) -> Self { + SmtpError::HeadlessError(e) } } diff --git a/core/src/smtp/headless.rs b/core/src/smtp/headless.rs new file mode 100644 index 000000000..72ef4256a --- /dev/null +++ b/core/src/smtp/headless.rs @@ -0,0 +1,69 @@ +// check-if-email-exists +// Copyright (C) 2018-2023 Reacher + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use fantoccini::{ + error::{CmdError, NewSessionError}, + Client, ClientBuilder, +}; +use serde::Serialize; +use serde_json::Map; + +use crate::util::ser_with_display::ser_with_display; + +#[derive(Debug, Serialize)] +pub enum HeadlessError { + #[serde(serialize_with = "ser_with_display")] + Cmd(CmdError), + #[serde(serialize_with = "ser_with_display")] + NewSession(NewSessionError), +} + +impl From for HeadlessError { + fn from(e: CmdError) -> Self { + Self::Cmd(e) + } +} + +impl From for HeadlessError { + fn from(e: NewSessionError) -> Self { + Self::NewSession(e) + } +} + +pub async fn create_headless_client(webdriver: &str) -> Result { + // Running in a Docker container, I run into the following error: + // Failed to move to new namespace: PID namespaces supported, Network namespace supported, but failed: errno = Operation not permitted + // In searching around I found a few different workarounds: + // - Enable namespaces: https://github.com/jessfraz/dockerfiles/issues/65#issuecomment-266532289 + // - Run it with a custom seccomp: https://github.com/jessfraz/dockerfiles/issues/65#issuecomment-217214671 + // - Run with --no-sandbox: https://github.com/karma-runner/karma-chrome-launcher/issues/125#issuecomment-312668593 + // For now I went with the --no-sandbox. + // + // TODO Look into security implications... + let mut caps = Map::new(); + let opts = serde_json::json!({ + "args": ["--headless", "--disable-gpu", "--no-sandbox", "--disable-dev-shm-usage"], + }); + caps.insert("goog:chromeOptions".to_string(), opts); + + // Connect to WebDriver instance that is listening on `webdriver` + let c = ClientBuilder::native() + .capabilities(caps) + .connect(webdriver) + .await?; + + Ok(c) +} diff --git a/core/src/smtp/mod.rs b/core/src/smtp/mod.rs index 2a8014ac0..4b30072f6 100644 --- a/core/src/smtp/mod.rs +++ b/core/src/smtp/mod.rs @@ -17,12 +17,15 @@ mod connect; mod error; mod gmail; +#[cfg(feature = "headless")] +mod headless; mod http_api; mod outlook; mod parser; mod yahoo; use std::default::Default; +use std::env; use async_smtp::EmailAddress; use serde::{Deserialize, Serialize}; @@ -62,7 +65,7 @@ pub async fn check_smtp( domain: &str, input: &CheckEmailInput, ) -> Result { - let host = host.to_string(); + let host: String = host.to_string(); if input.skipped_domains.iter().any(|d| host.contains(d)) { return Err(SmtpError::SkippedDomain(format!( @@ -70,17 +73,45 @@ pub async fn check_smtp( ))); } - if input.yahoo_use_api && is_yahoo(&host) { - return yahoo::check_yahoo(to_email, input) - .await - .map_err(|err| err.into()); + // Headless checks. Please note that they take precedence over API checks. + #[cfg(feature = "headless")] + { + let webdriver_addr = env::var("RCH_WEBDRIVER_ADDR"); + + if is_outlook(&host) { + match &webdriver_addr { + Ok(a) => { + return outlook::headless::check_password_recovery( + to_email.to_string().as_str(), + a, + ) + .await + .map_err(|err| err.into()); + } + _ => return Err(SmtpError::NoHeadlessNavigator), + } + } else if is_yahoo(&host) { + match &webdriver_addr { + Ok(a) => { + return yahoo::check_headless(to_email.to_string().as_str(), a) + .await + .map_err(|err| err.into()); + } + _ => return Err(SmtpError::NoHeadlessNavigator), + } + } } + + // API checks if input.gmail_use_api && is_gmail(&host) { return gmail::check_gmail(to_email, input) .await .map_err(|err| err.into()); - } - if input.microsoft365_use_api && is_microsoft365(&host) { + } else if input.yahoo_use_api && is_yahoo(&host) { + return yahoo::check_api(to_email, input) + .await + .map_err(|err| err.into()); + } else if input.microsoft365_use_api && is_microsoft365(&host) { match outlook::microsoft365::check_microsoft365_api(to_email, input).await { Ok(Some(smtp_details)) => return Ok(smtp_details), // Continue in the event of an error/ambiguous result. @@ -95,14 +126,6 @@ pub async fn check_smtp( _ => {} } } - #[cfg(feature = "headless")] - if let Some(webdriver) = &input.hotmail_use_headless { - if is_outlook(&host) { - return outlook::hotmail::check_password_recovery(to_email, webdriver) - .await - .map_err(|err| err.into()); - } - } check_smtp_with_retry(to_email, &host, port, domain, input, input.retries).await } diff --git a/core/src/smtp/outlook/hotmail.rs b/core/src/smtp/outlook/headless.rs similarity index 59% rename from core/src/smtp/outlook/hotmail.rs rename to core/src/smtp/outlook/headless.rs index e6bf5b126..156cdb10c 100644 --- a/core/src/smtp/outlook/hotmail.rs +++ b/core/src/smtp/outlook/headless.rs @@ -16,46 +16,26 @@ use std::{thread::sleep, time::Duration}; -use async_smtp::EmailAddress; use async_std::prelude::FutureExt; -use fantoccini::{ - error::{CmdError, NewSessionError}, - ClientBuilder, Locator, -}; +use fantoccini::Locator; use futures::TryFutureExt; -use serde::Serialize; -use serde_json::Map; - -use crate::{smtp::SmtpDetails, util::ser_with_display::ser_with_display, LOG_TARGET}; - -#[derive(Debug, Serialize)] -pub enum HotmailError { - #[serde(serialize_with = "ser_with_display")] - Cmd(CmdError), - #[serde(serialize_with = "ser_with_display")] - NewSession(NewSessionError), -} -impl From for HotmailError { - fn from(e: CmdError) -> Self { - Self::Cmd(e) - } -} - -impl From for HotmailError { - fn from(e: NewSessionError) -> Self { - Self::NewSession(e) - } -} +use crate::{ + smtp::{ + headless::{create_headless_client, HeadlessError}, + SmtpDetails, + }, + LOG_TARGET, +}; /// Check if a Hotmail/Outlook email exists by connecting to the password /// recovery page https://account.live.com/password/reset using a headless /// browser. Make sure you have a WebDriver server running locally before /// running this, or this will error. pub async fn check_password_recovery( - to_email: &EmailAddress, + to_email: &str, webdriver: &str, -) -> Result { +) -> Result { let to_email = to_email.to_string(); log::debug!( target: LOG_TARGET, @@ -63,26 +43,7 @@ pub async fn check_password_recovery( to_email, ); - // Running in a Docker container, I run into the following error: - // Failed to move to new namespace: PID namespaces supported, Network namespace supported, but failed: errno = Operation not permitted - // In searching around I found a few different workarounds: - // - Enable namespaces: https://github.com/jessfraz/dockerfiles/issues/65#issuecomment-266532289 - // - Run it with a custom seccomp: https://github.com/jessfraz/dockerfiles/issues/65#issuecomment-217214671 - // - Run with --no-sandbox: https://github.com/karma-runner/karma-chrome-launcher/issues/125#issuecomment-312668593 - // For now I went with the --no-sandbox. - // - // TODO Look into security implications... - let mut caps = Map::new(); - let opts = serde_json::json!({ - "args": ["--headless", "--disable-gpu", "--no-sandbox", "--disable-dev-shm-usage"], - }); - caps.insert("goog:chromeOptions".to_string(), opts); - - // Connect to WebDriver instance that is listening on `webdriver` - let c = ClientBuilder::native() - .capabilities(caps) - .connect(webdriver) - .await?; + let c = create_headless_client(webdriver).await?; // Navigate to Microsoft password recovery page. c.goto("https://account.live.com/password/reset").await?; @@ -147,9 +108,7 @@ pub async fn check_password_recovery( #[cfg(test)] mod tests { use super::check_password_recovery; - use async_smtp::EmailAddress; use async_std::prelude::FutureExt; - use std::str::FromStr; // Ignoring this test as it requires a local process of WebDriver running on // "http://localhost:4444". To debug the headless password recovery page, @@ -158,26 +117,20 @@ mod tests { #[tokio::test] #[ignore] async fn test_hotmail_address() { - // This email does not exist. - let email = EmailAddress::from_str("test42134@hotmail.com").unwrap(); - // Run 10 headless sessions with the above fake email (not deliverable). + // Run 10 headless sessions with dummy emails. // It should not error. for _ in 0..10 { - let res = check_password_recovery(&email, "http://localhost:4444") + // This email does not exist. + let res = check_password_recovery(&"test42134@hotmail.com", "http://localhost:4444") .await .unwrap(); - assert!(!res.is_deliverable) - } + assert!(!res.is_deliverable); - // This email does exist. - let email = EmailAddress::from_str("test@hotmail.com").unwrap(); - // Run 10 headless sessions with the above fake email (not deliverable). - // It should not error. - for _ in 0..10 { - let res = check_password_recovery(&email, "http://localhost:4444") + // This email does exist. + let res = check_password_recovery("test@hotmail.com", "http://localhost:4444") .await .unwrap(); - assert!(res.is_deliverable) + assert!(res.is_deliverable); } } @@ -189,10 +142,8 @@ mod tests { #[ignore] async fn test_parallel() { // This email does not exist. - let email = EmailAddress::from_str("foo@bar.baz").unwrap(); - - let f1 = check_password_recovery(&email, "http://localhost:4444"); - let f2 = check_password_recovery(&email, "http://localhost:4444"); + let f1 = check_password_recovery("foo@bar.baz", "http://localhost:4444"); + let f2 = check_password_recovery("foo@bar.baz", "http://localhost:4444"); let f = f1.try_join(f2).await; assert!(f.is_ok(), "{:?}", f); diff --git a/core/src/smtp/outlook/mod.rs b/core/src/smtp/outlook/mod.rs index 6a881b019..1b7fe215c 100644 --- a/core/src/smtp/outlook/mod.rs +++ b/core/src/smtp/outlook/mod.rs @@ -1,5 +1,5 @@ #[cfg(feature = "headless")] -pub mod hotmail; +pub mod headless; pub mod microsoft365; /// Check if a MX host is from outlook (includes @hotmail.*, @outlook.* and diff --git a/core/src/smtp/yahoo.rs b/core/src/smtp/yahoo/api.rs similarity index 69% rename from core/src/smtp/yahoo.rs rename to core/src/smtp/yahoo/api.rs index 08eb8f6c9..2f7e60d89 100644 --- a/core/src/smtp/yahoo.rs +++ b/core/src/smtp/yahoo/api.rs @@ -14,19 +14,14 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use super::SmtpDetails; +use super::YahooError; use crate::{ - smtp::http_api::create_client, - util::{ - constants::LOG_TARGET, input_output::CheckEmailInput, ser_with_display::ser_with_display, - }, + smtp::{http_api::create_client, SmtpDetails}, + util::{constants::LOG_TARGET, input_output::CheckEmailInput}, }; use async_smtp::EmailAddress; use regex::Regex; -use reqwest::Error as ReqwestError; use serde::{Deserialize, Serialize}; -use serde_json::error::Error as SerdeError; -use std::fmt; const SIGNUP_PAGE: &str = "https://login.yahoo.com/account/create?specId=yidReg&lang=en-US&src=&done=https%3A%2F%2Fwww.yahoo.com&display=login"; const SIGNUP_API: &str = "https://login.yahoo.com/account/module/create?validateField=yid"; @@ -79,56 +74,20 @@ struct FormResponse { errors: Vec, } -/// Possible errors when checking Yahoo email addresses. -#[derive(Debug, Serialize)] -pub enum YahooError { - /// Cannot find "acrumb" field in cookie. - NoAcrumb, - /// Cannot find "sessionIndex" hidden input in body - NoSessionIndex, - /// Cannot find cookie in Yahoo response. - NoCookie, - /// Error when serializing or deserializing HTTP requests and responses. - #[serde(serialize_with = "ser_with_display")] - ReqwestError(ReqwestError), - /// Error when serializing or deserializing HTTP requests and responses. - #[serde(serialize_with = "ser_with_display")] - SerdeError(SerdeError), -} - -impl fmt::Display for YahooError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - // Customize so only `x` and `y` are denoted. - write!(f, "{self:?}") - } -} - -impl From for YahooError { - fn from(error: ReqwestError) -> Self { - YahooError::ReqwestError(error) - } -} - -impl From for YahooError { - fn from(error: SerdeError) -> Self { - YahooError::SerdeError(error) - } -} - /// Use well-crafted HTTP requests to verify if a Yahoo email address exists. /// Inspired by https://github.com/hbattat/verifyEmail. -pub async fn check_yahoo( +pub async fn check_api( to_email: &EmailAddress, input: &CheckEmailInput, ) -> Result { - let response = create_client(input, "yahoo")? + let res = create_client(input, "yahoo")? .get(SIGNUP_PAGE) .header("User-Agent", USER_AGENT) .send() .await?; // Get the cookies from the response. - let cookies = match response.headers().get("Set-Cookie") { + let cookies = match res.headers().get("Set-Cookie") { Some(x) => x.to_owned(), _ => { return Err(YahooError::NoCookie); @@ -138,18 +97,11 @@ pub async fn check_yahoo( let to_email = to_email.to_string(); log::debug!( target: LOG_TARGET, - "[email={}] Yahoo 1st response: {:?}", - to_email, - response - ); - log::debug!( - target: LOG_TARGET, - "[email={}] Yahoo cookies: {:?}", + "[email={}] Yahoo succesfully got cookies after response", to_email, - cookies ); - let body = response.text().await?; + let body = res.text().await?; let username = to_email .split('@') @@ -182,7 +134,7 @@ pub async fn check_yahoo( }; // Mimic a real HTTP request. - let response = create_client(input, "yahoo")? + let res = create_client(input, "yahoo")? .post(SIGNUP_API) .header("Origin", "https://login.yahoo.com") .header("X-Requested-With", "XMLHttpRequest") @@ -210,10 +162,10 @@ pub async fn check_yahoo( target: LOG_TARGET, "[email={}] Yahoo 2nd response: {:?}", to_email, - response + res ); - let username_exists = response.errors.iter().any(|item| { + let username_exists = res.errors.iter().any(|item| { item.name == "userId" && (item.error == "IDENTIFIER_NOT_AVAILABLE" || item.error == "IDENTIFIER_EXISTS") }); @@ -224,11 +176,3 @@ pub async fn check_yahoo( ..Default::default() }) } - -/// Check if the MX host is from Yahoo. -/// Examples: -/// - mta7.am0.yahoodns.net. -/// - mx-eu.mail.am0.yahoodns.net. -pub fn is_yahoo(host: &str) -> bool { - host.to_lowercase().ends_with(".yahoodns.net.") -} diff --git a/core/src/smtp/yahoo/headless.rs b/core/src/smtp/yahoo/headless.rs new file mode 100644 index 000000000..a3f13dc96 --- /dev/null +++ b/core/src/smtp/yahoo/headless.rs @@ -0,0 +1,140 @@ +// check-if-email-exists +// Copyright (C) 2018-2023 Reacher + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::{thread::sleep, time::Duration}; + +use async_std::prelude::FutureExt; +use fantoccini::Locator; +use futures::TryFutureExt; + +use crate::smtp::headless::{create_headless_client, HeadlessError}; +use crate::{smtp::SmtpDetails, LOG_TARGET}; + +/// Check if a Hotmail/Outlook email exists by connecting to the password +/// recovery page https://account.live.com/password/reset using a headless +/// browser. Make sure you have a WebDriver server running locally before +/// running this, or this will error. +pub async fn check_headless(to_email: &str, webdriver: &str) -> Result { + log::debug!( + target: LOG_TARGET, + "[email={}] Using Yahoo password recovery in headless navigator", + to_email, + ); + + let c = create_headless_client(webdriver).await?; + + // Navigate to Microsoft password recovery page. + c.goto("https://login.yahoo.com/forgot").await?; + + // Wait for network/javascript/dom to make the input-box available + // and click it. + let input = c.wait().for_element(Locator::Id("username")).await?; + // Sometimes I get "input ... is not reachable by keyboard". Addind this + // small sleep helps. + sleep(Duration::from_millis(200)); + input.send_keys(to_email).await?; + + // Click on "Continue" + c.find(Locator::Css("button[name=\"verifyYid\"]")) + .await? + .click() + .await?; + + // Depending on what answers we have on the account recovery page, return + // the relevant (is_deliverable, is_disabled) tuple. + + // "Sorry, we don't recognise that email address or phone number". + let f1 = c + .wait() + .for_element(Locator::Css(".error-msg")) + .and_then(|_| async { Ok((false, false)) }); + // "This account has been deactivated due to inactivity, but we would love to welcome you back!" + let f2 = c + .wait() + .for_element(Locator::Css(".ctx-account_is_locked")) + .and_then(|_| async { Ok((false, true)) }); + // Recaptcha + let f3 = c + .wait() + .for_element(Locator::Css(".recaptcha-challenge")) + .and_then(|_| async { Ok((true, false)) }); + // "Enter verification code sent to your email address" + let f4 = c + .wait() + .for_element(Locator::Id("email-verify-challenge")) + .and_then(|_| async { Ok((true, false)) }); + + let (is_deliverable, is_disabled) = f1.try_race(f2).try_race(f3).try_race(f4).await?; + + if is_deliverable { + log::debug!( + target: LOG_TARGET, + "[email={}] Did not find error message in password recovery, email exists", + to_email, + ); + } else { + log::debug!( + target: LOG_TARGET, + "[email={}] Found error message in password recovery, email does not exist", + to_email, + ); + } + + c.close().await?; + + Ok(SmtpDetails { + can_connect_smtp: true, + has_full_inbox: false, + is_catch_all: false, + is_deliverable, + is_disabled, + }) +} + +#[cfg(test)] +mod tests { + use super::check_headless; + + // Ignoring this test as it requires a local process of WebDriver running on + // "http://localhost:9515". To debug the headless password recovery page, + // run chromedriver and remove the "#[ignore]". + // Also see: https://github.com/jonhoo/fantoccini + #[tokio::test] + async fn test_yahoo_address() { + // Run 5 headless sessions with the below dummy emails. + for _ in 0..5 { + // Email does not exist. + let res = check_headless("test42134@yahoo.com", "http://localhost:9515") + .await + .unwrap(); + assert!(!res.is_deliverable); + + // Disabled email. + let res = check_headless("amaury@yahoo.com", "http://localhost:9515") + .await + .unwrap(); + assert!(!res.is_deliverable); + assert!(res.is_disabled); + + // OK email. + let res = check_headless("test2@yahoo.com", "http://localhost:9515") + .await + .unwrap(); + assert!(res.is_deliverable); + assert!(!res.is_disabled); + } + } +} diff --git a/core/src/smtp/yahoo/mod.rs b/core/src/smtp/yahoo/mod.rs new file mode 100644 index 000000000..6ce620f13 --- /dev/null +++ b/core/src/smtp/yahoo/mod.rs @@ -0,0 +1,73 @@ +// check-if-email-exists +// Copyright (C) 2018-2023 Reacher + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +mod api; +#[cfg(feature = "headless")] +mod headless; + +use crate::util::ser_with_display::ser_with_display; +use reqwest::Error as ReqwestError; +use serde::Serialize; +use serde_json::error::Error as SerdeError; +use std::fmt; + +pub use api::check_api; +#[cfg(feature = "headless")] +pub use headless::check_headless; + +/// Possible errors when checking Yahoo email addresses. +#[derive(Debug, Serialize)] +pub enum YahooError { + /// Cannot find "acrumb" field in cookie. + NoAcrumb, + /// Cannot find "sessionIndex" hidden input in body + NoSessionIndex, + /// Cannot find cookie in Yahoo response. + NoCookie, + /// Error when serializing or deserializing HTTP requests and responses. + #[serde(serialize_with = "ser_with_display")] + ReqwestError(ReqwestError), + /// Error when serializing or deserializing HTTP requests and responses. + #[serde(serialize_with = "ser_with_display")] + SerdeError(SerdeError), +} + +impl fmt::Display for YahooError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // Customize so only `x` and `y` are denoted. + write!(f, "{self:?}") + } +} + +impl From for YahooError { + fn from(error: ReqwestError) -> Self { + YahooError::ReqwestError(error) + } +} + +impl From for YahooError { + fn from(error: SerdeError) -> Self { + YahooError::SerdeError(error) + } +} + +/// Check if the MX host is from Yahoo. +/// Examples: +/// - mta7.am0.yahoodns.net. +/// - mx-eu.mail.am0.yahoodns.net. +pub fn is_yahoo(host: &str) -> bool { + host.to_lowercase().ends_with(".yahoodns.net.") +} diff --git a/core/src/util/input_output.rs b/core/src/util/input_output.rs index ae0dd9201..05cad843d 100644 --- a/core/src/util/input_output.rs +++ b/core/src/util/input_output.rs @@ -101,8 +101,19 @@ pub struct CheckEmailInput { /// For Yahoo email addresses, use Yahoo's API instead of connecting /// directly to their SMTP servers. /// - /// Defaults to true. + /// Defaults to false. pub yahoo_use_api: bool, + /// For Yahoo email addresses, use Yahoo's account recovery page instead + /// of connecting directly to their SMTP servers. + /// + /// This assumes you have a WebDriver compatible process running, then pass + /// its endpoint, usually http://localhost:4444, into the environment + /// variable RCH_WEBDRIVER_ADDR. We recommend running chromedriver (and not + /// geckodriver) as it allows parallel requests. + /// + /// Defaults to true. + #[cfg(feature = "headless")] + pub yahoo_use_headless: bool, /// For Gmail email addresses, use Gmail's API instead of connecting /// directly to their SMTP servers. /// @@ -122,13 +133,15 @@ pub struct CheckEmailInput { pub haveibeenpwned_api_key: Option, /// For Hotmail/Outlook email addresses, use a headless navigator /// connecting to the password recovery page instead of the SMTP server. + /// /// This assumes you have a WebDriver compatible process running, then pass - /// its endpoint, usually http://localhost:4444. We recommend running - /// chromedriver (and not geckodriver) as it allows parallel requests. + /// its endpoint, usually http://localhost:4444, into the environment + /// variable RCH_WEBDRIVER_ADDR. We recommend running chromedriver (and not + /// geckodriver) as it allows parallel requests. /// - /// Defaults to None. + /// Defaults to true. #[cfg(feature = "headless")] - pub hotmail_use_headless: Option, + pub hotmail_use_headless: bool, /// Number of retries of SMTP connections to do. /// /// Defaults to 2 to avoid greylisting. @@ -165,12 +178,14 @@ impl Default for CheckEmailInput { from_email: "reacher.email@gmail.com".into(), // Unused, owned by Reacher hello_name: "gmail.com".into(), #[cfg(feature = "headless")] - hotmail_use_headless: None, + hotmail_use_headless: true, proxy: None, smtp_port: 25, smtp_security: SmtpSecurity::default(), smtp_timeout: Some(Duration::from_secs(12)), - yahoo_use_api: true, + yahoo_use_api: false, + #[cfg(feature = "headless")] + yahoo_use_headless: true, gmail_use_api: false, microsoft365_use_api: false, check_gravatar: false, @@ -285,6 +300,14 @@ impl CheckEmailInput { self } + /// Set whether or not to use a headless navigator to navigate to Yahoo's + /// password recovery page to check if an email exists. + #[cfg(feature = "headless")] + pub fn set_yahoo_use_headless(&mut self, use_headless: bool) -> &mut CheckEmailInput { + self.yahoo_use_headless = use_headless; + self + } + /// Set whether to use Gmail's API or connecting directly to their SMTP /// servers. Defaults to false. pub fn set_gmail_use_api(&mut self, use_api: bool) -> &mut CheckEmailInput { @@ -307,21 +330,16 @@ impl CheckEmailInput { } /// Whether to haveibeenpwned' API for the given email - /// check only if the api_key is set + /// check only if the api_key is set. pub fn set_haveibeenpwned_api_key(&mut self, api_key: Option) -> &mut CheckEmailInput { self.haveibeenpwned_api_key = api_key; self } /// Set whether or not to use a headless navigator to navigate to Hotmail's - /// password recovery page to check if an email exists. If set to - /// `Some()`, this endpoint must point to a WebDriver process, - /// usually listening on http://localhost:4444. Defaults to None. + /// password recovery page to check if an email exists. #[cfg(feature = "headless")] - pub fn set_hotmail_use_headless( - &mut self, - use_headless: Option, - ) -> &mut CheckEmailInput { + pub fn set_hotmail_use_headless(&mut self, use_headless: bool) -> &mut CheckEmailInput { self.hotmail_use_headless = use_headless; self } From f5cbf6491a273af497a7b00189b328463c73dc6e Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1729@users.noreply.github.com> Date: Wed, 25 Oct 2023 14:53:00 +0200 Subject: [PATCH 2/3] Fix docker --- .env.example | 2 +- Dockerfile | 2 +- core/src/smtp/outlook/headless.rs | 10 +++++----- core/src/util/input_output.rs | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index 1b028ff88..a2b2c8903 100644 --- a/.env.example +++ b/.env.example @@ -7,7 +7,7 @@ RUST_LOG=reacher=info SQLX_OFFLINE=1 -RCH_HOTMAIL_USE_HEADLESS=http://localhost:4444 +RCH_WEBDRIVER_ADDR=http://localhost:9515 # To enable bulk email verification, set the value below to 1, and fill out all # other env variables below. diff --git a/Dockerfile b/Dockerfile index 121ac20dc..17dfc6084 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,7 @@ USER chrome ENV RUST_LOG=reacher=info ENV RCH_HTTP_HOST=0.0.0.0 ENV PORT=8080 -ENV RCH_HOTMAIL_USE_HEADLESS=http://localhost:9515 +ENV RCH_WEBDRIVER_ADDR=http://localhost:9515 # Bulk verification is disabled by default. Set to 1 to enable it. ENV RCH_ENABLE_BULK=0 diff --git a/core/src/smtp/outlook/headless.rs b/core/src/smtp/outlook/headless.rs index 156cdb10c..1eb1dd8fc 100644 --- a/core/src/smtp/outlook/headless.rs +++ b/core/src/smtp/outlook/headless.rs @@ -111,7 +111,7 @@ mod tests { use async_std::prelude::FutureExt; // Ignoring this test as it requires a local process of WebDriver running on - // "http://localhost:4444". To debug the headless password recovery page, + // "http://localhost:9515". To debug the headless password recovery page, // run chromedriver and remove the "#[ignore]". // Also see: https://github.com/jonhoo/fantoccini #[tokio::test] @@ -121,13 +121,13 @@ mod tests { // It should not error. for _ in 0..10 { // This email does not exist. - let res = check_password_recovery(&"test42134@hotmail.com", "http://localhost:4444") + let res = check_password_recovery(&"test42134@hotmail.com", "http://localhost:9515") .await .unwrap(); assert!(!res.is_deliverable); // This email does exist. - let res = check_password_recovery("test@hotmail.com", "http://localhost:4444") + let res = check_password_recovery("test@hotmail.com", "http://localhost:9515") .await .unwrap(); assert!(res.is_deliverable); @@ -142,8 +142,8 @@ mod tests { #[ignore] async fn test_parallel() { // This email does not exist. - let f1 = check_password_recovery("foo@bar.baz", "http://localhost:4444"); - let f2 = check_password_recovery("foo@bar.baz", "http://localhost:4444"); + let f1 = check_password_recovery("foo@bar.baz", "http://localhost:9515"); + let f2 = check_password_recovery("foo@bar.baz", "http://localhost:9515"); let f = f1.try_join(f2).await; assert!(f.is_ok(), "{:?}", f); diff --git a/core/src/util/input_output.rs b/core/src/util/input_output.rs index 05cad843d..9246178b2 100644 --- a/core/src/util/input_output.rs +++ b/core/src/util/input_output.rs @@ -107,7 +107,7 @@ pub struct CheckEmailInput { /// of connecting directly to their SMTP servers. /// /// This assumes you have a WebDriver compatible process running, then pass - /// its endpoint, usually http://localhost:4444, into the environment + /// its endpoint, usually http://localhost:9515, into the environment /// variable RCH_WEBDRIVER_ADDR. We recommend running chromedriver (and not /// geckodriver) as it allows parallel requests. /// @@ -135,7 +135,7 @@ pub struct CheckEmailInput { /// connecting to the password recovery page instead of the SMTP server. /// /// This assumes you have a WebDriver compatible process running, then pass - /// its endpoint, usually http://localhost:4444, into the environment + /// its endpoint, usually http://localhost:9515, into the environment /// variable RCH_WEBDRIVER_ADDR. We recommend running chromedriver (and not /// geckodriver) as it allows parallel requests. /// From 50c8b6241fe8363eaf6f62180405b09019161aff Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1729@users.noreply.github.com> Date: Wed, 25 Oct 2023 14:56:29 +0200 Subject: [PATCH 3/3] Ignore tests --- core/src/smtp/outlook/headless.rs | 4 ++-- core/src/smtp/yahoo/headless.rs | 1 + core/src/util/input_output.rs | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/core/src/smtp/outlook/headless.rs b/core/src/smtp/outlook/headless.rs index 1eb1dd8fc..46612cdb4 100644 --- a/core/src/smtp/outlook/headless.rs +++ b/core/src/smtp/outlook/headless.rs @@ -115,7 +115,7 @@ mod tests { // run chromedriver and remove the "#[ignore]". // Also see: https://github.com/jonhoo/fantoccini #[tokio::test] - #[ignore] + #[ignore = "Run a webdriver server locally to test this"] async fn test_hotmail_address() { // Run 10 headless sessions with dummy emails. // It should not error. @@ -139,7 +139,7 @@ mod tests { // but will fail with geckodriver. // ref: https://github.com/jonhoo/fantoccini/issues/111#issuecomment-727650629 #[tokio::test] - #[ignore] + #[ignore = "Run a **chromedriver** server locally to test this"] async fn test_parallel() { // This email does not exist. let f1 = check_password_recovery("foo@bar.baz", "http://localhost:9515"); diff --git a/core/src/smtp/yahoo/headless.rs b/core/src/smtp/yahoo/headless.rs index a3f13dc96..ca0be87a9 100644 --- a/core/src/smtp/yahoo/headless.rs +++ b/core/src/smtp/yahoo/headless.rs @@ -113,6 +113,7 @@ mod tests { // run chromedriver and remove the "#[ignore]". // Also see: https://github.com/jonhoo/fantoccini #[tokio::test] + #[ignore = "Run a webdriver server locally to test this"] async fn test_yahoo_address() { // Run 5 headless sessions with the below dummy emails. for _ in 0..5 { diff --git a/core/src/util/input_output.rs b/core/src/util/input_output.rs index 9246178b2..3c6dbfd10 100644 --- a/core/src/util/input_output.rs +++ b/core/src/util/input_output.rs @@ -183,6 +183,9 @@ impl Default for CheckEmailInput { smtp_port: 25, smtp_security: SmtpSecurity::default(), smtp_timeout: Some(Duration::from_secs(12)), + #[cfg(not(feature = "headless"))] + yahoo_use_api: true, + #[cfg(feature = "headless")] yahoo_use_api: false, #[cfg(feature = "headless")] yahoo_use_headless: true,