Skip to content

Commit

Permalink
rollback crt.sh to www subdomain finder
Browse files Browse the repository at this point in the history
  • Loading branch information
stitch committed Jun 27, 2023
1 parent 1101eb3 commit 73a64d4
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 10 deletions.
1 change: 0 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Major speedups in report viewing and domain list editing
- All unfinished scans are shown in scan monitor, not just the last 30
- www subdomain discovery has been replaced with crt.sh subdomain discovery
- Support for Django 4.2, with psql12
- Minimum python version is now 3.10
- Reports are now stored on disk instead of the database for compression reasons
Expand Down
Binary file removed dashboard/diskreports/original/UrlListReport/1.gson
Binary file not shown.
32 changes: 23 additions & 9 deletions dashboard/internet_nl_dashboard/scanners/subdomains.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ def scan_status(account: Account, urllist_id: int):
@app.task(queue="storage")
def progress_subdomain_discovery_scans():
scans = SubdomainDiscoveryScan.objects.all().filter(state="requested")
log.error("yolo")

tasks = []
for scan in scans:
log.error("swag")
update_state(scan.id, "scanning")
tasks.append(group(perform_subdomain_scan.si(scan.id) | update_state.si(scan.id, "finished")))

Expand Down Expand Up @@ -98,16 +100,9 @@ def perform_subdomain_scan(scan_id: int) -> None:

try:
toplevel_domains = urllist.urls.filter(computed_subdomain="")
# domains_to_check = [f"www.{url.url}" for url in toplevel_domains]
# add_discovered_subdomains(scan, urllist, domains_to_check)

domains_to_check = []
for tld in toplevel_domains:
if subdomains := certificate_transparency_scan(tld.url):
domains_to_check.extend([f"{sub}.{tld}" for sub in subdomains])

log.debug("Found subdomains:")
log.debug(domains_to_check)
# In the future when ct is stable we can easily support other methods of finding subdomains.
domains_to_check = discover_subdomains_www(toplevel_domains)

add_discovered_subdomains(scan, urllist, domains_to_check)

Expand All @@ -118,6 +113,25 @@ def perform_subdomain_scan(scan_id: int) -> None:
raise Exception from my_exception # pylint: disable=broad-exception-raised


def discover_subdomains_www(toplevel_domains) -> List[str]:
return [f"www.{url.url}" for url in toplevel_domains]


def discover_subdomains_ctlogs_sectigo(toplevel_domains) -> List[str]:
# Not used for now given the service of sectigo has two issues:
# 1: it's highly unreliable: it might or might not give a result
# 2: a preview is needed of what will be imported, to prevent adding thousands of subdomains

domains_to_check = []
for tld in toplevel_domains:
if subdomains := certificate_transparency_scan(tld.url):
domains_to_check.extend([f"{sub}.{tld}" for sub in subdomains])

log.debug("Found subdomains:")
log.debug(domains_to_check)
return list(set(domains_to_check))


def add_discovered_subdomains(scan, urllist, domains_to_check: List[str]):
urls_to_add = []
# This can take a while because it's synchronous. Done that way to see what urls have been added.
Expand Down

0 comments on commit 73a64d4

Please sign in to comment.