diff --git a/artemis/config.py b/artemis/config.py index b81030276..17aa749f0 100644 --- a/artemis/config.py +++ b/artemis/config.py @@ -698,6 +698,28 @@ class Nuclei: "NUCLEI_TEMPLATE_CHUNK_SIZE is 200, three calls will be made with 200 templates each.", ] = get_config("NUCLEI_TEMPLATE_CHUNK_SIZE", default=200, cast=int) + class PlaceholderPageContent: + ENABLE_PLACEHOLDER_PAGE_DETECTOR: Annotated[ + bool, + "Enable or disable placeholder pages detector. Using this feature you may skip vulnerability scanning " + "for websites that aren't built yet, but e.g. contain a hosting provider placeholder page. " + "If the page exists and the specified string is found within it, the page will not be scanned for " + "vulnerabilities. If the page is not marked as a placeholder, a full scan will be performed.", + ] = get_config( + "ENABLE_PLACEHOLDER_PAGE_DETECTOR", + default=False, + cast=bool, + ) + PLACEHOLDER_PAGE_CONTENT_FILENAME: Annotated[ + str, + "Path to placeholder page content file. The file is divided into lines – each line is a string " + "containing a different HTML code element to check.", + ] = get_config( + "PLACEHOLDER_PAGE_CONTENT_FILENAME", + default="/opt/artemis/modules/data/placeholder_page_content.txt", + cast=str, + ) + class PortScanner: PORT_SCANNER_PORT_LIST: Annotated[str, "Chosen list of ports to scan (can be 'short' or 'long')"] = ( get_config("PORT_SCANNER_PORT_LIST", default="short") diff --git a/artemis/module_base.py b/artemis/module_base.py index 43296d026..c8145561e 100644 --- a/artemis/module_base.py +++ b/artemis/module_base.py @@ -19,6 +19,7 @@ from artemis.config import Config from artemis.db import DB from artemis.domains import is_domain +from artemis.placeholder_page_detector import PlaceholderPageDetector from artemis.redis_cache import RedisCache from artemis.resolvers import NoAnswer, ResolutionException, lookup from artemis.resource_lock import FailedToAcquireLockException, ResourceLock @@ -153,6 +154,11 @@ def check_domain_exists(self, domain: str) -> bool: bool: True if the domain exists, False otherwise. """ try: + if Config.Modules.PlaceholderPageContent.ENABLE_PLACEHOLDER_PAGE_DETECTOR: + placeholder_page = PlaceholderPageDetector() + if placeholder_page.is_placeholder(domain): + return False + # Check for NS records try: ns_records = lookup(domain, "NS") diff --git a/artemis/modules/data/placeholder_page_content.txt b/artemis/modules/data/placeholder_page_content.txt new file mode 100644 index 000000000..449efd01f --- /dev/null +++ b/artemis/modules/data/placeholder_page_content.txt @@ -0,0 +1,12 @@ + +