diff --git a/CHANGELOG.md b/CHANGELOG.md index c2b921c4..38997441 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Major speedups in report viewing and domain list editing - All unfinished scans are shown in scan monitor, not just the last 30 -- www subdomain discovery has been replaced with crt.sh subdomain discovery - Support for Django 4.2, with psql12 - Minimum python version is now 3.10 - Reports are now stored on disk instead of the database for compression reasons diff --git a/dashboard/diskreports/original/UrlListReport/1.gson b/dashboard/diskreports/original/UrlListReport/1.gson deleted file mode 100644 index 5034031b..00000000 Binary files a/dashboard/diskreports/original/UrlListReport/1.gson and /dev/null differ diff --git a/dashboard/internet_nl_dashboard/scanners/subdomains.py b/dashboard/internet_nl_dashboard/scanners/subdomains.py index 0002ec27..2e0ab492 100644 --- a/dashboard/internet_nl_dashboard/scanners/subdomains.py +++ b/dashboard/internet_nl_dashboard/scanners/subdomains.py @@ -57,9 +57,11 @@ def scan_status(account: Account, urllist_id: int): @app.task(queue="storage") def progress_subdomain_discovery_scans(): scans = SubdomainDiscoveryScan.objects.all().filter(state="requested") + log.error("yolo") tasks = [] for scan in scans: + log.error("swag") update_state(scan.id, "scanning") tasks.append(group(perform_subdomain_scan.si(scan.id) | update_state.si(scan.id, "finished"))) @@ -98,16 +100,9 @@ def perform_subdomain_scan(scan_id: int) -> None: try: toplevel_domains = urllist.urls.filter(computed_subdomain="") - # domains_to_check = [f"www.{url.url}" for url in toplevel_domains] - # add_discovered_subdomains(scan, urllist, domains_to_check) - domains_to_check = [] - for tld in toplevel_domains: - if subdomains := certificate_transparency_scan(tld.url): - domains_to_check.extend([f"{sub}.{tld}" for sub in subdomains]) - - log.debug("Found subdomains:") - log.debug(domains_to_check) + # In the future when ct is stable we can easily support other methods of finding subdomains. + domains_to_check = discover_subdomains_www(toplevel_domains) add_discovered_subdomains(scan, urllist, domains_to_check) @@ -118,6 +113,25 @@ def perform_subdomain_scan(scan_id: int) -> None: raise Exception from my_exception # pylint: disable=broad-exception-raised +def discover_subdomains_www(toplevel_domains) -> List[str]: + return [f"www.{url.url}" for url in toplevel_domains] + + +def discover_subdomains_ctlogs_sectigo(toplevel_domains) -> List[str]: + # Not used for now given the service of sectigo has two issues: + # 1: it's highly unreliable: it might or might not give a result + # 2: a preview is needed of what will be imported, to prevent adding thousands of subdomains + + domains_to_check = [] + for tld in toplevel_domains: + if subdomains := certificate_transparency_scan(tld.url): + domains_to_check.extend([f"{sub}.{tld}" for sub in subdomains]) + + log.debug("Found subdomains:") + log.debug(domains_to_check) + return list(set(domains_to_check)) + + def add_discovered_subdomains(scan, urllist, domains_to_check: List[str]): urls_to_add = [] # This can take a while because it's synchronous. Done that way to see what urls have been added.