diff --git a/artemis/config.py b/artemis/config.py index b81030276..17aa749f0 100644 --- a/artemis/config.py +++ b/artemis/config.py @@ -698,6 +698,28 @@ class Nuclei: "NUCLEI_TEMPLATE_CHUNK_SIZE is 200, three calls will be made with 200 templates each.", ] = get_config("NUCLEI_TEMPLATE_CHUNK_SIZE", default=200, cast=int) + class PlaceholderPageContent: + ENABLE_PLACEHOLDER_PAGE_DETECTOR: Annotated[ + bool, + "Enable or disable placeholder pages detector. Using this feature you may skip vulnerability scanning " + "for websites that aren't built yet, but e.g. contain a hosting provider placeholder page. " + "If the page exists and the specified string is found within it, the page will not be scanned for " + "vulnerabilities. If the page is not marked as a placeholder, a full scan will be performed.", + ] = get_config( + "ENABLE_PLACEHOLDER_PAGE_DETECTOR", + default=False, + cast=bool, + ) + PLACEHOLDER_PAGE_CONTENT_FILENAME: Annotated[ + str, + "Path to placeholder page content file. The file is divided into lines – each line is a string " + "containing a different HTML code element to check.", + ] = get_config( + "PLACEHOLDER_PAGE_CONTENT_FILENAME", + default="/opt/artemis/modules/data/placeholder_page_content.txt", + cast=str, + ) + class PortScanner: PORT_SCANNER_PORT_LIST: Annotated[str, "Chosen list of ports to scan (can be 'short' or 'long')"] = ( get_config("PORT_SCANNER_PORT_LIST", default="short") diff --git a/artemis/module_base.py b/artemis/module_base.py index 43296d026..c8145561e 100644 --- a/artemis/module_base.py +++ b/artemis/module_base.py @@ -19,6 +19,7 @@ from artemis.config import Config from artemis.db import DB from artemis.domains import is_domain +from artemis.placeholder_page_detector import PlaceholderPageDetector from artemis.redis_cache import RedisCache from artemis.resolvers import NoAnswer, ResolutionException, lookup from artemis.resource_lock import FailedToAcquireLockException, ResourceLock @@ -153,6 +154,11 @@ def check_domain_exists(self, domain: str) -> bool: bool: True if the domain exists, False otherwise. """ try: + if Config.Modules.PlaceholderPageContent.ENABLE_PLACEHOLDER_PAGE_DETECTOR: + placeholder_page = PlaceholderPageDetector() + if placeholder_page.is_placeholder(domain): + return False + # Check for NS records try: ns_records = lookup(domain, "NS") diff --git a/artemis/modules/data/placeholder_page_content.txt b/artemis/modules/data/placeholder_page_content.txt new file mode 100644 index 000000000..449efd01f --- /dev/null +++ b/artemis/modules/data/placeholder_page_content.txt @@ -0,0 +1,12 @@ + +Domena newkf.nazwa.pl pozostaje na serwerze nazwa.pl +Tanie domeny, Tani hosting, Helpdesk, Pomoc zdalna - NetStrefa.pl +Strona w budowie +Miejsce w budowie + +LOGONET Sp. z o.o. [C] +HostedWindows.pl +
Witaj w serwisie +Cyber_Folks Lepsza obsługa i wsparcie bez porównania + +Tanie domeny, Tani hosting, Helpdesk, Pomoc zdalna - NetStrefa.pl diff --git a/artemis/placeholder_page_detector.py b/artemis/placeholder_page_detector.py new file mode 100644 index 000000000..3271ffc84 --- /dev/null +++ b/artemis/placeholder_page_detector.py @@ -0,0 +1,41 @@ +from typing import Any + +import requests + +from artemis import http_requests +from artemis.config import Config + +PLACEHOLDER_PAGE_CONTENT_FILENAME = Config.Modules.PlaceholderPageContent.PLACEHOLDER_PAGE_CONTENT_FILENAME + + +PLACEHOLDER_PAGE_CONTENT = [] +with open(PLACEHOLDER_PAGE_CONTENT_FILENAME, "r", encoding="utf-8") as file: + for keyword in file: + PLACEHOLDER_PAGE_CONTENT.append(keyword) + + +class PlaceholderPageDetector: + def __init__(self) -> None: + self.placeholder_content = PLACEHOLDER_PAGE_CONTENT + + @staticmethod + def check_response(domain: str) -> Any: + url = "http://" + domain + try: + response = http_requests.get(url) + except requests.RequestException: + url = "https://" + domain + try: + response = http_requests.get(url) + except requests.RequestException: + return False + return response + + def is_placeholder(self, domain: str) -> bool: + response = self.check_response(domain) + if response: + html_content = response.content + for keywords in self.placeholder_content: + if keywords.strip() in html_content: + return True + return False diff --git a/env.test b/env.test index ca9f85a96..4e2838dc4 100644 --- a/env.test +++ b/env.test @@ -1,3 +1,5 @@ DB_CONN_STR= REDIS_CONN_STR=redis://test-redis:6379/1 POSTGRES_CONN_STR=postgresql://postgres:postgres@postgres-test/artemis +ENABLE_PLACEHOLDER_PAGE_DETECTOR=True +PLACEHOLDER_PAGE_CONTENT_FILENAME=/opt/test/data/test_placeholder_page_content.txt diff --git a/test/data/test_placeholder_page_content.txt b/test/data/test_placeholder_page_content.txt new file mode 100644 index 000000000..449efd01f --- /dev/null +++ b/test/data/test_placeholder_page_content.txt @@ -0,0 +1,12 @@ + +Domena newkf.nazwa.pl pozostaje na serwerze nazwa.pl +Tanie domeny, Tani hosting, Helpdesk, Pomoc zdalna - NetStrefa.pl +Strona w budowie +Miejsce w budowie + +LOGONET Sp. z o.o. [C] +HostedWindows.pl +
Witaj w serwisie
+Cyber_Folks Lepsza obsługa i wsparcie bez porównania + +Tanie domeny, Tani hosting, Helpdesk, Pomoc zdalna - NetStrefa.pl