Skip to content

Commit

Permalink
feat: Yahoo account recovery via headless (#1364)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: `input.hotmail_use_headless` is now a bool instead of a string. Pass the webdriver address as an environment variable `RCH_WEBDRIVER_ADDR` now.
  • Loading branch information
amaury1093 authored Oct 25, 2023
1 parent 8f152b8 commit 6f0f12b
Show file tree
Hide file tree
Showing 16 changed files with 430 additions and 197 deletions.
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

RUST_LOG=reacher=info
SQLX_OFFLINE=1
RCH_HOTMAIL_USE_HEADLESS=http://localhost:4444
RCH_WEBDRIVER_ADDR=http://localhost:9515

# To enable bulk email verification, set the value below to 1, and fill out all
# other env variables below.
Expand Down
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"rust-analyzer.cargo.features": ["headless"]
}
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ USER chrome
ENV RUST_LOG=reacher=info
ENV RCH_HTTP_HOST=0.0.0.0
ENV PORT=8080
ENV RCH_HOTMAIL_USE_HEADLESS=http://localhost:9515
ENV RCH_WEBDRIVER_ADDR=http://localhost:9515
# Bulk verification is disabled by default. Set to 1 to enable it.
ENV RCH_ENABLE_BULK=0

Expand Down
30 changes: 15 additions & 15 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,21 @@ Then send a `POST http://localhost:8080/v0/check_email` request with the followi

These are the environment variables used to configure the HTTP server. To pass them to the Docker container, use the `-e {ENV_VAR}={VALUE}` flag.

| Env Var | Required? | Description | Default |
| ----------------------------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------- |
| `RUST_LOG` | No | One of `trace,debug,warn,error,info`. 💡 PRO TIP: `RUST_LOG=debug` is very handful for debugging purposes. | not defined |
| `RCH_ENABLE_BULK` | No | If set to `1`, then bulk verification endpoints will be added to the backend. | 0 |
| `DATABASE_URL` | Yes if `RCH_ENABLE_BULK==1` | [Bulk] Database connection string for storing results and task queue | not defined |
| `RCH_DATABASE_MAX_CONNECTIONS` | No | [Bulk] Connections created for the database pool | 5 |
| `RCH_MINIMUM_TASK_CONCURRENCY` | No | [Bulk] Minimum number of concurrent running tasks below which more tasks are fetched | 10 |
| `RCH_MAXIMUM_CONCURRENT_TASK_FETCH` | No | [Bulk] Maximum number of tasks fetched at once | 20 |
| `RCH_HTTP_HOST` | No | The host name to bind the HTTP server to. | `127.0.0.1` |
| `PORT` | No | The port to bind the HTTP server to, often populated by the cloud provider. | `8080` |
| `RCH_SENTRY_DSN` | No | If set, bug reports will be sent to this [Sentry](https://sentry.io) DSN. | not defined |
| `RCH_HEADER_SECRET` | No | If set, then all HTTP requests must have the `x-reacher-secret` header set to this value. This is used to protect the backend against public unwanted HTTP requests. | undefined |
| `RCH_FROM_EMAIL` | No | Email to use in the `<MAIL FROM:>` SMTP step. Can be overwritten by each API request's `from_email` field. | [email protected] |
| `RCH_HELLO_NAME` | No | Name to use in the `<EHLO>` SMTP step. Can be overwritten by each API request's `hello_name` field. | gmail.com |
| `RCH_HOTMAIL_USE_HEADLESS` | No | Set to a running WebDriver process endpoint (e.g. `http://localhost:4444`) to use a headless navigator to Hotmail's password recovery page to check Hotmail/Outlook addresses. We recommend `chromedriver` as it allows parallel requests. | not defined |
| Env Var | Required? | Description | Default |
| ----------------------------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------- |
| `RUST_LOG` | No | One of `trace,debug,warn,error,info`. 💡 PRO TIP: `RUST_LOG=debug` is very handful for debugging purposes. | not defined |
| `RCH_ENABLE_BULK` | No | If set to `1`, then bulk verification endpoints will be added to the backend. | 0 |
| `DATABASE_URL` | Yes if `RCH_ENABLE_BULK==1` | [Bulk] Database connection string for storing results and task queue | not defined |
| `RCH_DATABASE_MAX_CONNECTIONS` | No | [Bulk] Connections created for the database pool | 5 |
| `RCH_MINIMUM_TASK_CONCURRENCY` | No | [Bulk] Minimum number of concurrent running tasks below which more tasks are fetched | 10 |
| `RCH_MAXIMUM_CONCURRENT_TASK_FETCH` | No | [Bulk] Maximum number of tasks fetched at once | 20 |
| `RCH_HTTP_HOST` | No | The host name to bind the HTTP server to. | `127.0.0.1` |
| `PORT` | No | The port to bind the HTTP server to, often populated by the cloud provider. | `8080` |
| `RCH_SENTRY_DSN` | No | If set, bug reports will be sent to this [Sentry](https://sentry.io) DSN. | not defined |
| `RCH_HEADER_SECRET` | No | If set, then all HTTP requests must have the `x-reacher-secret` header set to this value. This is used to protect the backend against public unwanted HTTP requests. | undefined |
| `RCH_FROM_EMAIL` | No | Email to use in the `<MAIL FROM:>` SMTP step. Can be overwritten by each API request's `from_email` field. | [email protected] |
| `RCH_HELLO_NAME` | No | Name to use in the `<EHLO>` SMTP step. Can be overwritten by each API request's `hello_name` field. | gmail.com |
| `RCH_WEBDRIVER_ADDR` | No | Set to a running WebDriver process endpoint (e.g. `http://localhost:9515`) to use a headless navigator to password recovery pages to check Yahoo and Hotmail/Outlook addresses. We recommend `chromedriver` as it allows parallel requests. | not defined |

## REST API Documentation

Expand Down
2 changes: 0 additions & 2 deletions backend/src/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ use super::sentry_util;
/// Same as `check-if-email-exists`'s check email, but adds some additional
/// inputs and error handling.
pub async fn check_email(input: CheckEmailInput) -> CheckEmailOutput {
let hotmail_use_headless = env::var("RCH_HOTMAIL_USE_HEADLESS").ok();
let from_email =
env::var("RCH_FROM_EMAIL").unwrap_or_else(|_| CheckEmailInput::default().from_email);
let hello_name =
Expand All @@ -35,7 +34,6 @@ pub async fn check_email(input: CheckEmailInput) -> CheckEmailOutput {
let input = CheckEmailInput {
// If we want to override core check-if-email-exists's default values
// for CheckEmailInput for the backend, we do it here.
hotmail_use_headless,
from_email,
hello_name,
..input
Expand Down
14 changes: 11 additions & 3 deletions cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,22 @@ pub struct Cli {
#[clap(long, env, default_value = "true", parse(try_from_str))]
pub yahoo_use_api: bool,

/// For Yahoo addresses, use a headless browser to connect to the
/// Yahoo account recovery page. Requires a webdriver instance
/// listening on RCH_WEBDRIVER_ADDR.
#[clap(long, env)]
pub yahoo_use_headless: bool,

/// For Gmail email addresses, use Gmail's API instead of connecting
/// directly to their SMTP servers.
#[clap(long, env, default_value = "false", parse(try_from_str))]
pub gmail_use_api: bool,

/// For Hotmail addresses, use a headless browser to connect to the
/// Microsoft account recovery page.
/// Microsoft account recovery page. Requires a webdriver instance
/// listening on RCH_WEBDRIVER_ADDR.
#[clap(long, env)]
pub hotmail_use_headless: Option<String>,
pub hotmail_use_headless: bool,

/// For Microsoft 365 email addresses, use OneDrive's API instead of
/// connecting directly to their SMTP servers.
Expand Down Expand Up @@ -100,10 +107,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
.set_hello_name(CONF.hello_name.clone())
.set_smtp_port(CONF.smtp_port)
.set_yahoo_use_api(CONF.yahoo_use_api)
.set_yahoo_use_headless(CONF.yahoo_use_headless)
.set_gmail_use_api(CONF.gmail_use_api)
.set_microsoft365_use_api(CONF.microsoft365_use_api)
.set_check_gravatar(CONF.check_gravatar)
.set_hotmail_use_headless(CONF.hotmail_use_headless.clone())
.set_hotmail_use_headless(CONF.hotmail_use_headless)
.set_haveibeenpwned_api_key(CONF.haveibeenpwned_api_key.clone());

if let Some(proxy_host) = &CONF.proxy_host {
Expand Down
2 changes: 1 addition & 1 deletion core/src/smtp/connect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ pub async fn check_smtp_with_retry(
// be non-callable, as this function only deals with actual SMTP
// connection errors.
#[cfg(feature = "headless")]
Err(SmtpError::HotmailError(_)) => result,
Err(SmtpError::HeadlessError(_)) => result,
Err(SmtpError::YahooError(_)) => result,
Err(SmtpError::GmailError(_)) => result,
// Only retry if the SMTP error was unknown.
Expand Down
12 changes: 7 additions & 5 deletions core/src/smtp/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

use super::gmail::GmailError;
#[cfg(feature = "headless")]
use super::outlook::hotmail::HotmailError;
use super::headless::HeadlessError;
use super::outlook::microsoft365::Microsoft365Error;
use super::parser;
use super::yahoo::YahooError;
Expand All @@ -41,9 +41,11 @@ pub enum SmtpError {
GmailError(GmailError),
/// Error when verifying a Hotmail email via headless browser.
#[cfg(feature = "headless")]
HotmailError(HotmailError),
HeadlessError(HeadlessError),
/// Error when verifying a Microsoft 365 email via HTTP request.
Microsoft365Error(Microsoft365Error),
/// Headless Navigator not running.
NoHeadlessNavigator,
/// Email is in the `skipped_domains` parameter.
SkippedDomain(String),
}
Expand All @@ -67,9 +69,9 @@ impl From<GmailError> for SmtpError {
}

#[cfg(feature = "headless")]
impl From<HotmailError> for SmtpError {
fn from(e: HotmailError) -> Self {
SmtpError::HotmailError(e)
impl From<HeadlessError> for SmtpError {
fn from(e: HeadlessError) -> Self {
SmtpError::HeadlessError(e)
}
}

Expand Down
69 changes: 69 additions & 0 deletions core/src/smtp/headless.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// check-if-email-exists
// Copyright (C) 2018-2023 Reacher

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.

// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use fantoccini::{
error::{CmdError, NewSessionError},
Client, ClientBuilder,
};
use serde::Serialize;
use serde_json::Map;

use crate::util::ser_with_display::ser_with_display;

#[derive(Debug, Serialize)]
pub enum HeadlessError {
#[serde(serialize_with = "ser_with_display")]
Cmd(CmdError),
#[serde(serialize_with = "ser_with_display")]
NewSession(NewSessionError),
}

impl From<CmdError> for HeadlessError {
fn from(e: CmdError) -> Self {
Self::Cmd(e)
}
}

impl From<NewSessionError> for HeadlessError {
fn from(e: NewSessionError) -> Self {
Self::NewSession(e)
}
}

pub async fn create_headless_client(webdriver: &str) -> Result<Client, HeadlessError> {
// Running in a Docker container, I run into the following error:
// Failed to move to new namespace: PID namespaces supported, Network namespace supported, but failed: errno = Operation not permitted
// In searching around I found a few different workarounds:
// - Enable namespaces: https://github.com/jessfraz/dockerfiles/issues/65#issuecomment-266532289
// - Run it with a custom seccomp: https://github.com/jessfraz/dockerfiles/issues/65#issuecomment-217214671
// - Run with --no-sandbox: https://github.com/karma-runner/karma-chrome-launcher/issues/125#issuecomment-312668593
// For now I went with the --no-sandbox.
//
// TODO Look into security implications...
let mut caps = Map::new();
let opts = serde_json::json!({
"args": ["--headless", "--disable-gpu", "--no-sandbox", "--disable-dev-shm-usage"],
});
caps.insert("goog:chromeOptions".to_string(), opts);

// Connect to WebDriver instance that is listening on `webdriver`
let c = ClientBuilder::native()
.capabilities(caps)
.connect(webdriver)
.await?;

Ok(c)
}
53 changes: 38 additions & 15 deletions core/src/smtp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@
mod connect;
mod error;
mod gmail;
#[cfg(feature = "headless")]
mod headless;
mod http_api;
mod outlook;
mod parser;
mod yahoo;

use std::default::Default;
use std::env;

use async_smtp::EmailAddress;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -62,25 +65,53 @@ pub async fn check_smtp(
domain: &str,
input: &CheckEmailInput,
) -> Result<SmtpDetails, SmtpError> {
let host = host.to_string();
let host: String = host.to_string();

if input.skipped_domains.iter().any(|d| host.contains(d)) {
return Err(SmtpError::SkippedDomain(format!(
"Reacher currently cannot verify emails from @{domain}"
)));
}

if input.yahoo_use_api && is_yahoo(&host) {
return yahoo::check_yahoo(to_email, input)
.await
.map_err(|err| err.into());
// Headless checks. Please note that they take precedence over API checks.
#[cfg(feature = "headless")]
{
let webdriver_addr = env::var("RCH_WEBDRIVER_ADDR");

if is_outlook(&host) {
match &webdriver_addr {
Ok(a) => {
return outlook::headless::check_password_recovery(
to_email.to_string().as_str(),
a,
)
.await
.map_err(|err| err.into());
}
_ => return Err(SmtpError::NoHeadlessNavigator),
}
} else if is_yahoo(&host) {
match &webdriver_addr {
Ok(a) => {
return yahoo::check_headless(to_email.to_string().as_str(), a)
.await
.map_err(|err| err.into());
}
_ => return Err(SmtpError::NoHeadlessNavigator),
}
}
}

// API checks
if input.gmail_use_api && is_gmail(&host) {
return gmail::check_gmail(to_email, input)
.await
.map_err(|err| err.into());
}
if input.microsoft365_use_api && is_microsoft365(&host) {
} else if input.yahoo_use_api && is_yahoo(&host) {
return yahoo::check_api(to_email, input)
.await
.map_err(|err| err.into());
} else if input.microsoft365_use_api && is_microsoft365(&host) {
match outlook::microsoft365::check_microsoft365_api(to_email, input).await {
Ok(Some(smtp_details)) => return Ok(smtp_details),
// Continue in the event of an error/ambiguous result.
Expand All @@ -95,14 +126,6 @@ pub async fn check_smtp(
_ => {}
}
}
#[cfg(feature = "headless")]
if let Some(webdriver) = &input.hotmail_use_headless {
if is_outlook(&host) {
return outlook::hotmail::check_password_recovery(to_email, webdriver)
.await
.map_err(|err| err.into());
}
}

check_smtp_with_retry(to_email, &host, port, domain, input, input.retries).await
}
Expand Down
Loading

0 comments on commit 6f0f12b

Please sign in to comment.