diff --git a/.github/ISSUE_TEMPLATE/issue_form.yml b/.github/ISSUE_TEMPLATE/issue_form.yml index bd7997053..3f52a6d8f 100644 --- a/.github/ISSUE_TEMPLATE/issue_form.yml +++ b/.github/ISSUE_TEMPLATE/issue_form.yml @@ -9,11 +9,19 @@ body: - type: input id: email attributes: - label: Email Provider + label: Email to check description: To preserve privacy, don't use a full email. But if relevant, please tell us which provider is used (e.g. gmail, hotmail...) placeholder: ex. example.com validations: required: false + - type: input + id: server + attributes: + label: From where did you run check-if-email-exists? + description: Was it from Reacher dashboard? Or give the provider where you installed check-if-email-exists + placeholder: ex. Reacher Dashboard, or OVH, Heroku, Digital Ocean... + validations: + required: false - type: input id: version attributes: diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c629dbd6..dcdf4b4b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,56 @@ All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. +### [0.9.1](https://github.com/reacherhq/check-if-email-exists/compare/v0.9.0...v0.9.1) (2023-10-08) + + +### Features + +* **#289:** add haveibeenpwned check ([#1253](https://github.com/reacherhq/check-if-email-exists/issues/1253)) ([166dbd2](https://github.com/reacherhq/check-if-email-exists/commit/166dbd2cc878e30c51538b919abc1aaea4465c45)), closes [#289](https://github.com/reacherhq/check-if-email-exists/issues/289) +* add email address normalisation ([#1206](https://github.com/reacherhq/check-if-email-exists/issues/1206)) ([f8ec348](https://github.com/reacherhq/check-if-email-exists/commit/f8ec348883cd4f4a20a8acbb38d54b69e798222b)), closes [#952](https://github.com/reacherhq/check-if-email-exists/issues/952) +* add Microsoft 365 HTTP API validation ([#1194](https://github.com/reacherhq/check-if-email-exists/issues/1194)) ([5d3c49f](https://github.com/reacherhq/check-if-email-exists/commit/5d3c49f41ef1369efe2a9e63b24543e281ae0776)), closes [#937](https://github.com/reacherhq/check-if-email-exists/issues/937) +* Add skipped domains ([#1293](https://github.com/reacherhq/check-if-email-exists/issues/1293)) ([29119fa](https://github.com/reacherhq/check-if-email-exists/commit/29119fa72027c9830396bbdf3e90f08c0c89d7a7)) +* Add suggestions for syntax errors ([#1192](https://github.com/reacherhq/check-if-email-exists/issues/1192)) ([2d385f3](https://github.com/reacherhq/check-if-email-exists/commit/2d385f30f7a62ab2706599fbb89fb50275cffb5f)) +* additional Gmail validation ([#1193](https://github.com/reacherhq/check-if-email-exists/issues/1193)) ([49c8f5c](https://github.com/reacherhq/check-if-email-exists/commit/49c8f5c3b4a3db04533d06d7267b0f15ebda3285)), closes [#937](https://github.com/reacherhq/check-if-email-exists/issues/937) +* **backend:** Add header secret to protect against public requests ([#1158](https://github.com/reacherhq/check-if-email-exists/issues/1158)) ([fa6a56b](https://github.com/reacherhq/check-if-email-exists/commit/fa6a56b62f4b3aeeec704cfe4882755998d40833)) +* **core:** Add check for antispam MX records ([#1257](https://github.com/reacherhq/check-if-email-exists/issues/1257)) ([c9771da](https://github.com/reacherhq/check-if-email-exists/commit/c9771da66c7869a4d0a255e2e2536f2863e8958c)) +* **core:** Add check gravatar image ([#1188](https://github.com/reacherhq/check-if-email-exists/issues/1188)) ([6a26035](https://github.com/reacherhq/check-if-email-exists/commit/6a26035327ab681a65a4f4ba284e155f00680e89)) +* **core:** Add Hotmail checks via headless password recovery ([#1165](https://github.com/reacherhq/check-if-email-exists/issues/1165)) ([7517ed9](https://github.com/reacherhq/check-if-email-exists/commit/7517ed98ba966158deebba6a1a4745c931bfed18)) +* **core:** Fix disabled accts on hanmail.net ([#1339](https://github.com/reacherhq/check-if-email-exists/issues/1339)) ([90393c8](https://github.com/reacherhq/check-if-email-exists/commit/90393c8dda39267da7eb5efe6f112c8f25a593f4)) +* **core:** Skip catch-all for known domains ([#1336](https://github.com/reacherhq/check-if-email-exists/issues/1336)) ([c40a46c](https://github.com/reacherhq/check-if-email-exists/commit/c40a46c4555129346bd9efa444a483bf25b679fe)) +* **core:** Update default MAIL-FROM and HELO ([743a811](https://github.com/reacherhq/check-if-email-exists/commit/743a8111b4831ee19e7ac887c39a8da2775acd4c)) +* Set default timeout to 10s ([#1251](https://github.com/reacherhq/check-if-email-exists/issues/1251)) ([d04f84c](https://github.com/reacherhq/check-if-email-exists/commit/d04f84cc1e7b30e02d3717ab1af9f680cdb2c27f)) + + +### Bug Fixes + +* **backend:** Fix CI priting ([748940c](https://github.com/reacherhq/check-if-email-exists/commit/748940ca2fa7fb59aac8e07a408a22d1ab688527)) +* **backend:** Fix deploy to docker ([20fcfa6](https://github.com/reacherhq/check-if-email-exists/commit/20fcfa6032e4614dc459a34183958fde63199acf)) +* **backend:** Fix dockerfile ([f0ed49f](https://github.com/reacherhq/check-if-email-exists/commit/f0ed49f50238c1c71a130f3db19ec047af00b8df)) +* **backend:** Improve sentry error messages ([#1155](https://github.com/reacherhq/check-if-email-exists/issues/1155)) ([d90d998](https://github.com/reacherhq/check-if-email-exists/commit/d90d998d1cb189fed3f888659aa08fd4fabf6e93)) +* **backend:** Redact email in sentry bug tracking ([2c2d1d8](https://github.com/reacherhq/check-if-email-exists/commit/2c2d1d88c0086196bc09359e32c96638124d9539)) +* **cli:** Update flags default values ([a4fe57e](https://github.com/reacherhq/check-if-email-exists/commit/a4fe57e9ab89659e12182719ccb12fb2cdcb5f2e)) +* **core:** Add more invalid parsing and improve logging ([#1156](https://github.com/reacherhq/check-if-email-exists/issues/1156)) ([b5ae9f8](https://github.com/reacherhq/check-if-email-exists/commit/b5ae9f8ad910b77ad6a179ecb5d4b633011ed2f4)) +* **core:** Default SMTP timeout to 15 ([0d4fa4d](https://github.com/reacherhq/check-if-email-exists/commit/0d4fa4d8f662ecfd3fa2e0359322f324a8ef86db)) +* **core:** Don't use headless on Microsoft 465 addresses ([#1196](https://github.com/reacherhq/check-if-email-exists/issues/1196)) ([0c3c21d](https://github.com/reacherhq/check-if-email-exists/commit/0c3c21daf6ea79875835121fb86ab7c0c86d55eb)) +* **core:** Fix default CheckEmailInput ([09215a1](https://github.com/reacherhq/check-if-email-exists/commit/09215a13ac3525861e6cd1dea3fc71c13dfffe52)) +* **core:** Fix hotmail headless option parsing ([6ddc3b9](https://github.com/reacherhq/check-if-email-exists/commit/6ddc3b96da0d01b02711d62873ad0d0df6bf1b33)) +* **core:** Fix hotmail headless with authenticator ([51cdb2e](https://github.com/reacherhq/check-if-email-exists/commit/51cdb2e3c13a433fff92f1d3dcf1bfcb90f6ce7b)) +* **core:** Fix MX random record selection ([#1263](https://github.com/reacherhq/check-if-email-exists/issues/1263)) ([9fae593](https://github.com/reacherhq/check-if-email-exists/commit/9fae593b8590ad5efb3e7d16bbd25cc05c228cb9)) +* **core:** Improve invalid parser ([#1166](https://github.com/reacherhq/check-if-email-exists/issues/1166)) ([bb46004](https://github.com/reacherhq/check-if-email-exists/commit/bb460046bf1cb031fee706d836c8a737157f803c)) +* **core:** Improve parser and headless hotmail runner ([#1167](https://github.com/reacherhq/check-if-email-exists/issues/1167)) ([0de33a5](https://github.com/reacherhq/check-if-email-exists/commit/0de33a5f265105a769c7ca6125df0fd4f88b89e2)) +* **core:** Improve parser from Sentry errors ([fbaf588](https://github.com/reacherhq/check-if-email-exists/commit/fbaf58824a339e546d50c2125a459161769dda6e)) +* **core:** Improve parser's `is_invalid` ([#1159](https://github.com/reacherhq/check-if-email-exists/issues/1159)) ([ec1c4d5](https://github.com/reacherhq/check-if-email-exists/commit/ec1c4d5e5d4c94d75d255a0699402f75eb29f7ab)) +* **core:** No sandbox in headless Hotmail check ([0590438](https://github.com/reacherhq/check-if-email-exists/commit/0590438310f3c052b2748a8c408e0d8dbfb777b7)) +* **core:** Remove antispam check ([#1337](https://github.com/reacherhq/check-if-email-exists/issues/1337)) ([06f18ed](https://github.com/reacherhq/check-if-email-exists/commit/06f18edf7aee5640b3725feedfa7b7f213da83a8)) +* **core:** Yahoo add back IDENTIFIER_EXISTS ([2b63556](https://github.com/reacherhq/check-if-email-exists/commit/2b635564efb37b0aa891bbba77244e6cf2d611bb)) +* **core:** yahoo api changes: yid is userId now, sessionIndex is required and fo… ([#1314](https://github.com/reacherhq/check-if-email-exists/issues/1314)) ([0209111](https://github.com/reacherhq/check-if-email-exists/commit/02091115026520596fc5b4b2a6757169e91cba15)) +* Don't auto-fetch Chrome, install in Docker ([84fcc0d](https://github.com/reacherhq/check-if-email-exists/commit/84fcc0de40567126ce3a385934086450c3a89ccf)) +* split Microsoft 365/Hotmail functionality ([#1204](https://github.com/reacherhq/check-if-email-exists/issues/1204)) ([e987b13](https://github.com/reacherhq/check-if-email-exists/commit/e987b13a5ccd98d28fb756f1bf41427c337750c4)) +* Switch back to upstream fast-socks ([#1164](https://github.com/reacherhq/check-if-email-exists/issues/1164)) ([db356f1](https://github.com/reacherhq/check-if-email-exists/commit/db356f19374843ca135de8ebd8a6c34bfeb017a8)) +* TLS accept unsafe ([778692b](https://github.com/reacherhq/check-if-email-exists/commit/778692bce760c0a1e1201dd3e11b41e7ccb7e2e8)) +* Use chromedriver instead of gecko for parallel requests ([e282e28](https://github.com/reacherhq/check-if-email-exists/commit/e282e28aeb7259d800f7faad97173c3a216095a4)) + ## [0.9.0](https://github.com/reacherhq/check-if-email-exists/compare/v0.8.32...v0.9.0) (2022-08-15) diff --git a/Cargo.lock b/Cargo.lock index 772172184..51e35205f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -170,7 +170,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.2", + "syn 2.0.12", ] [[package]] @@ -183,7 +183,7 @@ dependencies = [ "async-trait", "base64 0.13.0", "bufstream", - "fast-socks5", + "fast-socks5 0.8.2", "futures", "hostname", "log", @@ -396,12 +396,13 @@ dependencies = [ "async-std", "async-std-resolver", "fantoccini", - "fast-socks5", + "fast-socks5 0.9.1", "futures", "levenshtein", "log", "mailchecker", "md5", + "once_cell", "pwned", "rand", "regex", @@ -808,9 +809,9 @@ dependencies = [ [[package]] name = "fast-socks5" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2687b5a6108f18ba8621e0e618a3be1dcc2768632dad24b7cea1f87975375a9" +checksum = "961ce1761191c157145a8c9f0c3ceabecd3a729d65c9a8d443674eaee3420f7e" dependencies = [ "anyhow", "log", @@ -819,6 +820,20 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "fast-socks5" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa6a4a9ab1f87e2d8e9345ce944de7c45d065ee89f5942512f32698e48a8429a" +dependencies = [ + "anyhow", + "async-trait", + "log", + "thiserror", + "tokio", + "tokio-stream", +] + [[package]] name = "fast_chemail" version = "0.9.6" @@ -2169,22 +2184,22 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.157" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707de5fcf5df2b5788fca98dd7eab490bc2fd9b7ef1404defc462833b83f25ca" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.157" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78997f4555c22a7971214540c4a661291970619afd56de19f77e0de86296e1e5" +checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.2", + "syn 2.0.12", ] [[package]] @@ -2485,9 +2500,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.2" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59d3276aee1fa0c33612917969b5172b5be2db051232a6e4826f1a1a9191b045" +checksum = "79d9531f94112cfc3e4c8f5f02cb2b58f72c97b7efd85f70203cc6d8efda5927" dependencies = [ "proc-macro2", "quote", @@ -2614,7 +2629,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.2", + "syn 2.0.12", ] [[package]] diff --git a/README.md b/README.md index e3646fcc4..ac73dfe83 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ [![Docs](https://docs.rs/check-if-email-exists/badge.svg)](https://docs.rs/check-if-email-exists) [![Docker](https://img.shields.io/docker/v/reacherhq/backend?color=0db7ed&label=docker&sort=date)](https://hub.docker.com/r/reacherhq/backend) [![Actions Status](https://github.com/reacherhq/check-if-email-exists/workflows/pr/badge.svg)](https://github.com/reacherhq/check-if-email-exists/actions) -[![Github Sponsor](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&link=https://github.com/sponsors/amaurym)](https://github.com/sponsors/amaurym)

diff --git a/backend/README.md b/backend/README.md index 72c6db051..6621118c1 100644 --- a/backend/README.md +++ b/backend/README.md @@ -55,7 +55,6 @@ These are the environment variables used to configure the HTTP server. To pass t | `RCH_HTTP_HOST` | No | The host name to bind the HTTP server to. | `127.0.0.1` | | `PORT` | No | The port to bind the HTTP server to, often populated by the cloud provider. | `8080` | | `RCH_HOTMAIL_USE_HEADLESS` | No | Set to a running WebDriver process endpoint (e.g. `http://localhost:4444`) to use a headless navigator to Hotmail's password recovery page to check Hotmail/Outlook addresses. We recommend `chromedriver` as it allows parallel requests. | not defined | -| `RCH_SMTP_TIMEOUT` | No | The default timeout of each SMTP connection, in seconds. Can be overwritten in each request using the `smtp_timeout` field. | 10s | | `RCH_SENTRY_DSN` | No | If set, bug reports will be sent to this [Sentry](https://sentry.io) DSN. | not defined | | `RCH_HEADER_SECRET` | No | If set, then all HTTP requests must have the `x-reacher-secret` header set to this value. This is used to protect the backend against public unwanted HTTP requests. | undefined | | `RCH_DATABASE_MAX_CONNECTIONS` | No | (Bulk) Connections created for the database pool | 5 | diff --git a/backend/src/check.rs b/backend/src/check.rs index aace8f1e9..a3de306f7 100644 --- a/backend/src/check.rs +++ b/backend/src/check.rs @@ -26,6 +26,31 @@ use super::sentry_util; /// Same as `check-if-email-exists`'s check email, but adds some additional /// inputs and error handling. pub async fn check_email(input: CheckEmailInput) -> CheckEmailOutput { + let hotmail_use_headless = env::var("RCH_HOTMAIL_USE_HEADLESS").ok(); + let skipped_domains = vec![ + // on @bluewin.ch + // - mx-v02.bluewin.ch. + ".bluewin.ch.".into(), + // on @bluewin.ch + // - mxbw-bluewin-ch.hdb-cs04.ellb.ch. + "bluewin-ch.".into(), + // on @gmx.de, @gmx.ch, @gmx.net + ".gmx.net.".into(), + // on @icloud.com + ".mail.icloud.com.".into(), + // on @web.de + ".web.de.".into(), + ".zoho.com.".into(), + ]; + + let input = CheckEmailInput { + // If we want to override core check-if-email-exists's default values + // for CheckEmailInput for the backend, we do it here. + hotmail_use_headless, + skipped_domains, + ..input + }; + let res = ciee_check_email(&input).await; sentry_util::log_unknown_errors(&res); diff --git a/backend/src/main.rs b/backend/src/main.rs index 43fa8fb9e..a1858fceb 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -36,7 +36,6 @@ async fn main() -> Result<(), Box> { let is_bulk_enabled = env::var("RCH_ENABLE_BULK").unwrap_or_else(|_| "0".into()) == "1"; if is_bulk_enabled { - log::info!(target: LOG_TARGET, "Bulk endpoints enabled."); let pool = create_db().await?; let _registry = create_job_registry(&pool).await?; let routes = create_routes(Some(pool)); @@ -104,6 +103,11 @@ async fn create_job_registry(pool: &Pool) -> Result take 30s to respond (sometimes only on 2nd attempt, not deterministic), so we skip the catch-all one, and bump the timeout to well over 30s." + } + }, + "rules": { + "SkipCatchAll": { "_comment": "Don't perform catch-all check" }, + "SmtpTimeout45s": { + "_comment": "Set SMTP connection timeout to at least 45s. If the user request set an even higher timeout, take that one. Please note that this timeout is **per SMTP connection**. By default, we try 2 connections per email: if the 1st one failed, then we connect again to avoid potential greylisting, in which case the whole verification takes 1min30s." + } + } +} diff --git a/core/src/rules.rs b/core/src/rules.rs new file mode 100644 index 000000000..50542b312 --- /dev/null +++ b/core/src/rules.rs @@ -0,0 +1,73 @@ +// check-if-email-exists +// Copyright (C) 2018-2022 Reacher + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! Read provider- and domain-specific rules from a JSON, then match each +//! email verification to the domain/provider, and translate those rules into +//! code. +//! +//! IMPORTANT: This is still a beta feature, and probably needs refining. + +use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Debug, Deserialize, Eq, Hash, PartialEq, Serialize)] +pub enum Rule { + /// Don't perform catch-all check. + SkipCatchAll, + /// Set the SMTP timeout to 45s. + SmtpTimeout45s, +} + +#[derive(Debug, Deserialize, Serialize)] +struct RulesByDomain { + rules: Vec, +} + +#[derive(Debug, Deserialize, Serialize)] +struct AllRules { + /// Apply rules by domain name, i.e. after the @ symbol. + by_domain: HashMap, + /// Apply rules by the MX host. Since each domain potentially has multiple + /// MX records, we match by their suffix. + by_mx_suffix: HashMap, +} + +static ALL_RULES: Lazy = + Lazy::new(|| serde_json::from_str::(include_str!("rules.json")).unwrap()); + +fn does_domain_have_rule(domain: &str, rule: &Rule) -> bool { + if let Some(v) = ALL_RULES.by_domain.get(domain) { + return v.rules.contains(rule); + } + + false +} + +fn does_mx_have_rule(host: &str, rule: &Rule) -> bool { + for (k, v) in ALL_RULES.by_mx_suffix.iter() { + if host.ends_with(k) { + return v.rules.contains(rule); + } + } + + false +} + +/// Check if either the domain or the MX host has any given rule. +pub fn has_rule(domain: &str, host: &str, rule: &Rule) -> bool { + does_domain_have_rule(domain, rule) || does_mx_have_rule(host, rule) +} diff --git a/core/src/smtp/connect.rs b/core/src/smtp/connect.rs index 97a579dcd..3df31d921 100644 --- a/core/src/smtp/connect.rs +++ b/core/src/smtp/connect.rs @@ -20,18 +20,18 @@ use async_smtp::{ smtp::{commands::*, extension::ClientId, ServerAddress, Socks5Config}, ClientTlsParameters, EmailAddress, SmtpClient, SmtpTransport, }; -use async_std::future; use rand::rngs::SmallRng; use rand::{distributions::Alphanumeric, Rng, SeedableRng}; use std::iter; use std::str::FromStr; use std::time::Duration; -use trust_dns_proto::rr::Name; - -use super::{gmail::is_gmail, outlook::is_hotmail, parser, yahoo::is_yahoo}; +use super::parser; use super::{SmtpDetails, SmtpError}; -use crate::util::{constants::LOG_TARGET, input_output::CheckEmailInput}; +use crate::{ + rules::{has_rule, Rule}, + util::{constants::LOG_TARGET, input_output::CheckEmailInput}, +}; /// Try to send an smtp command, close and return Err if fails. macro_rules! try_smtp ( @@ -48,17 +48,32 @@ macro_rules! try_smtp ( /// Attempt to connect to host via SMTP, and return SMTP client on success. async fn connect_to_host( - host: &Name, + domain: &str, + host: &str, port: u16, input: &CheckEmailInput, ) -> Result { + let smtp_timeout = if let Some(t) = input.smtp_timeout { + if has_rule(domain, host, &Rule::SmtpTimeout45s) { + log::debug!( + target: LOG_TARGET, + "[email={}] Bumping SMTP timeout to at least 45s", + input.to_email, + ); + Some(t.max(Duration::from_secs(45))) + } else { + input.smtp_timeout + } + } else { + None + }; + // hostname verification fails if it ends with '.', for example, using // SOCKS5 proxies we can `io: incomplete` error. - let host = host.to_string(); let host = host.trim_end_matches('.').to_string(); let security = { - let tls_params = ClientTlsParameters::new( + let tls_params: ClientTlsParameters = ClientTlsParameters::new( host.clone(), TlsConnector::new() .use_sni(true) @@ -77,7 +92,7 @@ async fn connect_to_host( security, ) .hello_name(ClientId::Domain(input.hello_name.clone())) - .timeout(Some(Duration::new(30, 0))); // Set timeout to 30s + .timeout(smtp_timeout); if let Some(proxy) = &input.proxy { let socks5_config = match (&proxy.username, &proxy.password) { @@ -220,11 +235,16 @@ async fn email_deliverable( async fn smtp_is_catch_all( smtp_transport: &mut SmtpTransport, domain: &str, - host: &Name, + host: &str, + input: &CheckEmailInput, ) -> Result { // Skip catch-all check for known providers. - let host = host.to_string(); - if is_gmail(&host) || is_hotmail(&host) || is_yahoo(&host) { + if has_rule(domain, host, &Rule::SkipCatchAll) { + log::debug!( + target: LOG_TARGET, + "[email={}] Skipping catch-all check for [domain={domain}]", + input.to_email + ); return Ok(false); } @@ -247,16 +267,16 @@ async fn smtp_is_catch_all( async fn create_smtp_future( to_email: &EmailAddress, - host: &Name, + host: &str, port: u16, domain: &str, input: &CheckEmailInput, ) -> Result<(bool, Deliverability), SmtpError> { // FIXME If the SMTP is not connectable, we should actually return an // Ok(SmtpDetails { can_connect_smtp: false, ... }). - let mut smtp_transport = connect_to_host(host, port, input).await?; + let mut smtp_transport = connect_to_host(domain, host, port, input).await?; - let is_catch_all = smtp_is_catch_all(&mut smtp_transport, domain, host) + let is_catch_all = smtp_is_catch_all(&mut smtp_transport, domain, host, input) .await .unwrap_or(false); let deliverability = if is_catch_all { @@ -278,11 +298,12 @@ async fn create_smtp_future( if parser::is_err_io_errors(e) { log::debug!( target: LOG_TARGET, - "Got `io: incomplete` error, reconnecting." + "[email={}] Got `io: incomplete` error, reconnecting.", + input.to_email ); let _ = smtp_transport.close().await; - smtp_transport = connect_to_host(host, port, input).await?; + smtp_transport = connect_to_host(domain, host, port, input).await?; result = email_deliverable(&mut smtp_transport, to_email).await; } } @@ -299,17 +320,13 @@ async fn create_smtp_future( /// retries. async fn check_smtp_without_retry( to_email: &EmailAddress, - host: &Name, + host: &str, port: u16, domain: &str, input: &CheckEmailInput, ) -> Result { let fut = create_smtp_future(to_email, host, port, domain, input); - let (is_catch_all, deliverability) = if let Some(smtp_timeout) = input.smtp_timeout { - future::timeout(smtp_timeout, fut).await?? - } else { - fut.await? - }; + let (is_catch_all, deliverability) = fut.await?; Ok(SmtpDetails { can_connect_smtp: true, @@ -325,7 +342,7 @@ async fn check_smtp_without_retry( #[async_recursion] pub async fn check_smtp_with_retry( to_email: &EmailAddress, - host: &Name, + host: &str, port: u16, domain: &str, input: &CheckEmailInput, @@ -376,3 +393,26 @@ pub async fn check_smtp_with_retry( _ => result, } } + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn should_skip_catch_all() { + let smtp_client = SmtpClient::new("gmail.com".into()); + let mut smtp_transport = smtp_client.into_transport(); + + let r = smtp_is_catch_all( + &mut smtp_transport, + "gmail.com", + "alt4.aspmx.l.google.com.", + &CheckEmailInput::default(), + ) + .await; + + assert!(!smtp_transport.is_connected()); // We shouldn't connect to google servers. + assert!(r.is_ok()); + assert_eq!(false, r.unwrap()) + } +} diff --git a/core/src/smtp/error.rs b/core/src/smtp/error.rs index d042135e9..405b83eae 100644 --- a/core/src/smtp/error.rs +++ b/core/src/smtp/error.rs @@ -22,7 +22,6 @@ use super::parser; use super::yahoo::YahooError; use crate::util::ser_with_display::ser_with_display; use async_smtp::smtp::error::Error as AsyncSmtpError; -use async_std::future; use fast_socks5::SocksError; use serde::Serialize; @@ -36,9 +35,6 @@ pub enum SmtpError { /// Error when communicating with SMTP server. #[serde(serialize_with = "ser_with_display")] SmtpError(AsyncSmtpError), - /// Time-out error. - #[serde(serialize_with = "ser_with_display")] - TimeoutError(future::TimeoutError), /// Error when verifying a Yahoo email via HTTP requests. YahooError(YahooError), /// Error when verifying a Gmail email via a HTTP request. @@ -58,12 +54,6 @@ impl From for SmtpError { } } -impl From for SmtpError { - fn from(e: future::TimeoutError) -> Self { - SmtpError::TimeoutError(e) - } -} - impl From for SmtpError { fn from(e: YahooError) -> Self { SmtpError::YahooError(e) diff --git a/core/src/smtp/gmail.rs b/core/src/smtp/gmail.rs index 8ec9c8bca..5e15d4d95 100644 --- a/core/src/smtp/gmail.rs +++ b/core/src/smtp/gmail.rs @@ -76,7 +76,8 @@ pub async fn check_gmail( }) } -/// Check if the MX host is from Gmail. +/// Check if the MX host is from Google, i.e. either a @gmail.com address, or +/// a Google Suite email. pub fn is_gmail(host: &str) -> bool { host.to_lowercase().ends_with(".google.com.") } diff --git a/core/src/smtp/mod.rs b/core/src/smtp/mod.rs index 0b56d4c4e..29abee6ff 100644 --- a/core/src/smtp/mod.rs +++ b/core/src/smtp/mod.rs @@ -34,7 +34,7 @@ pub use error::*; use self::{ gmail::is_gmail, - outlook::{is_hotmail, is_outlook}, + outlook::{is_microsoft365, is_outlook}, yahoo::is_yahoo, }; @@ -62,29 +62,25 @@ pub async fn check_smtp( domain: &str, input: &CheckEmailInput, ) -> Result { - let host_lowercase = host.to_lowercase().to_string(); + let host = host.to_string(); - if input - .skipped_domains - .iter() - .any(|d| host_lowercase.contains(d)) - { + if input.skipped_domains.iter().any(|d| host.contains(d)) { return Err(SmtpError::SkippedDomain(format!( "Reacher currently cannot verify emails from @{domain}" ))); } - if input.yahoo_use_api && is_yahoo(&host_lowercase) { + if input.yahoo_use_api && is_yahoo(&host) { return yahoo::check_yahoo(to_email, input) .await .map_err(|err| err.into()); } - if input.gmail_use_api && is_gmail(&host_lowercase) { + if input.gmail_use_api && is_gmail(&host) { return gmail::check_gmail(to_email, input) .await .map_err(|err| err.into()); } - if input.microsoft365_use_api && is_outlook(&host_lowercase) { + if input.microsoft365_use_api && is_microsoft365(&host) { match outlook::microsoft365::check_microsoft365_api(to_email, input).await { Ok(Some(smtp_details)) => return Ok(smtp_details), // Continue in the event of an error/ambiguous result. @@ -101,23 +97,20 @@ pub async fn check_smtp( } #[cfg(feature = "headless")] if let Some(webdriver) = &input.hotmail_use_headless { - // The password recovery page do not always work with Microsoft 365 - // addresses. So we only test with @hotmail and @outlook addresses. - // ref: https://github.com/reacherhq/check-if-email-exists/issues/1185 - if is_hotmail(&host_lowercase) { + if is_outlook(&host) { return outlook::hotmail::check_password_recovery(to_email, webdriver) .await .map_err(|err| err.into()); } } - check_smtp_with_retry(to_email, host, port, domain, input, input.retries).await + check_smtp_with_retry(to_email, &host, port, domain, input, input.retries).await } #[cfg(test)] mod tests { use super::{check_smtp, CheckEmailInput, SmtpError}; - use async_smtp::EmailAddress; + use async_smtp::{smtp::error::Error, EmailAddress}; use std::{str::FromStr, time::Duration}; use tokio::runtime::Runtime; use trust_dns_proto::rr::Name; @@ -127,13 +120,13 @@ mod tests { let runtime = Runtime::new().unwrap(); let to_email = EmailAddress::from_str("foo@gmail.com").unwrap(); - let host = Name::from_str("gmail.com").unwrap(); + let host = Name::from_str("alt4.aspmx.l.google.com.").unwrap(); let mut input = CheckEmailInput::default(); input.set_smtp_timeout(Some(Duration::from_millis(1))); let res = runtime.block_on(check_smtp(&to_email, &host, 25, "gmail.com", &input)); match res { - Err(SmtpError::TimeoutError(_)) => (), + Err(SmtpError::SmtpError(Error::Io(_))) => (), // ErrorKind == Timeout _ => panic!("check_smtp did not time out"), } } @@ -144,7 +137,8 @@ mod tests { let to_email = EmailAddress::from_str("foo@icloud.com").unwrap(); let host = Name::from_str("mx01.mail.icloud.com.").unwrap(); - let input = CheckEmailInput::default(); + let mut input = CheckEmailInput::default(); + input.set_skipped_domains(vec![".mail.icloud.com.".into()]); let res = runtime.block_on(check_smtp(&to_email, &host, 25, "icloud.com", &input)); match res { diff --git a/core/src/smtp/outlook/mod.rs b/core/src/smtp/outlook/mod.rs index 3c8c9c077..6a881b019 100644 --- a/core/src/smtp/outlook/mod.rs +++ b/core/src/smtp/outlook/mod.rs @@ -4,9 +4,12 @@ pub mod microsoft365; /// Check if a MX host is from outlook (includes @hotmail.*, @outlook.* and /// all Microsoft 365 addresses). +/// +/// After some testing I got: +/// - @outlook.* and @hotmail.* -> end with ".olc.protection.outlook.com." +/// - Microsoft 365 emails -> end with ".mail.protection.outlook.com." pub fn is_outlook(host: &str) -> bool { - host.to_lowercase() - .ends_with(".mail.protection.outlook.com.") + host.to_lowercase().ends_with(".protection.outlook.com.") } /// Check if a MX host is an @hotmail.* or @outlook.* email. @@ -18,8 +21,15 @@ pub fn is_outlook(host: &str) -> bool { /// - *@hotmail.fr -> `eur.olc.protection.outlook.com.` /// - *@hotmail.nl -> `eur.olc.protection.outlook.com.` /// +/// But Microsoft 365 addresses end with "mail.protection.outlook.com." +/// /// So it seems that outlook/hotmail addresses end with `olc.protection.outlook.com.` pub fn is_hotmail(host: &str) -> bool { host.to_lowercase() - .ends_with(".mail.protection.outlook.com.") + .ends_with(".olc.protection.outlook.com.") +} + +/// Check if an address is a Microsoft365 email address. +pub fn is_microsoft365(host: &str) -> bool { + is_outlook(host) && !is_hotmail(host) } diff --git a/core/src/smtp/parser.rs b/core/src/smtp/parser.rs index febbb2dbe..9b9549f1c 100644 --- a/core/src/smtp/parser.rs +++ b/core/src/smtp/parser.rs @@ -95,6 +95,10 @@ pub fn is_invalid(e: &str, email: &EmailAddress) -> bool { || e.contains("address error") // E-mail address is not handled by this system (on @kaimayfair.co.uk) || e.contains("address is not handled") + // permanent: 5.1.1 recipient is not exist (on @sim.com) + || e.contains("recipient is not exist") + // permanent: 5.1.1 Recipient not found. (on @4polymer.com) + || e.contains("recipient not found") } /// Check that the mailbox has a full inbox. @@ -116,6 +120,8 @@ pub fn is_disabled_account(e: &str) -> bool { e.contains("disabled") // 554 delivery error: Sorry your message to cannot be delivered. This account has been disabled or discontinued || e.contains("discontinued") + //550 5.2.1 RACT MY.IP: Mailbox is inactive: (on hanmail.net) + || e.contains("inactive") } /// Check if the error is an IO "incomplete" error. diff --git a/core/src/util/input_output.rs b/core/src/util/input_output.rs index 71b883d7d..06b7ef42a 100644 --- a/core/src/util/input_output.rs +++ b/core/src/util/input_output.rs @@ -176,21 +176,7 @@ impl Default for CheckEmailInput { check_gravatar: false, haveibeenpwned_api_key: None, retries: 2, - skipped_domains: vec![ - // on @bluewin.ch - // - mx-v02.bluewin.ch. - ".bluewin.ch.".into(), - // on @bluewin.ch - // - mxbw-bluewin-ch.hdb-cs04.ellb.ch. - "bluewin-ch.".into(), - // on @gmx.de, @gmx.ch, @gmx.net - ".gmx.net.".into(), - // on @icloud.com - ".mail.icloud.com.".into(), - // on @web.de - ".web.de.".into(), - ".zoho.com.".into(), - ], + skipped_domains: vec![], } } }