From e52deb3bdc62a89197fde40f227dce4cf72dd839 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 20 Mar 2023 13:50:27 -0700 Subject: [PATCH 1/3] EC2 Instance sync => new data model --- cartography/data/indexes.cypher | 18 - .../aws_import_ec2_instances_cleanup.json | 68 -- cartography/graph/cleanupbuilder.py | 7 +- cartography/graph/statement.py | 2 +- cartography/intel/aws/ec2/instances.py | 623 ++++++++---------- cartography/models/aws/ec2/instances.py | 83 +++ cartography/models/aws/ec2/keypairs.py | 64 ++ .../models/aws/ec2/networkinterfaces.py | 102 +++ cartography/models/aws/ec2/reservations.py | 43 ++ cartography/models/aws/ec2/securitygroups.py | 64 ++ cartography/models/aws/ec2/subnets.py | 64 ++ cartography/models/aws/ec2/volumes.py | 63 ++ .../intel/aws/ec2/test_ec2_instances.py | 369 +++++++---- .../intel/aws/ec2/test_ec2_volumes.py | 20 +- .../aws/test_resourcegroupstaggingapi.py | 31 +- .../cartography/intel/aws/test_ssm.py | 44 +- 16 files changed, 1091 insertions(+), 574 deletions(-) delete mode 100644 cartography/data/jobs/cleanup/aws_import_ec2_instances_cleanup.json create mode 100644 cartography/models/aws/ec2/instances.py create mode 100644 cartography/models/aws/ec2/keypairs.py create mode 100644 cartography/models/aws/ec2/networkinterfaces.py create mode 100644 cartography/models/aws/ec2/reservations.py create mode 100644 cartography/models/aws/ec2/securitygroups.py create mode 100644 cartography/models/aws/ec2/subnets.py create mode 100644 cartography/models/aws/ec2/volumes.py diff --git a/cartography/data/indexes.cypher b/cartography/data/indexes.cypher index 6029d2460..0227ce9fc 100644 --- a/cartography/data/indexes.cypher +++ b/cartography/data/indexes.cypher @@ -97,27 +97,11 @@ CREATE INDEX IF NOT EXISTS FOR (n:DynamoDBTable) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:DynamoDBTable) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:EBSSnapshot) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:EBSSnapshot) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:EBSVolume) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:EBSVolume) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Instance) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Instance) ON (n.instanceid); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Instance) ON (n.publicdnsname); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Instance) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:EC2KeyPair) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:EC2KeyPair) ON (n.keyfingerprint); -CREATE INDEX IF NOT EXISTS FOR (n:EC2KeyPair) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:EC2PrivateIp) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:EC2PrivateIp) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Reservation) ON (n.reservationid); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Reservation) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:EC2ReservedInstance) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:EC2ReservedInstance) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:EC2SecurityGroup) ON (n.groupid); -CREATE INDEX IF NOT EXISTS FOR (n:EC2SecurityGroup) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:EC2SecurityGroup) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Subnet) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Subnet) ON (n.subnetid); -CREATE INDEX IF NOT EXISTS FOR (n:EC2Subnet) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:ECRImage) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:ECRImage) ON (n.digest); CREATE INDEX IF NOT EXISTS FOR (n:ECRImage) ON (n.lastupdated); @@ -241,8 +225,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancer) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancerV2) ON (n.dnsname); CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancerV2) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancerV2) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:NetworkInterface) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:NetworkInterface) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:NameServer) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:NameServer) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:OktaOrganization) ON (n.id); diff --git a/cartography/data/jobs/cleanup/aws_import_ec2_instances_cleanup.json b/cartography/data/jobs/cleanup/aws_import_ec2_instances_cleanup.json deleted file mode 100644 index 2a744b642..000000000 --- a/cartography/data/jobs/cleanup/aws_import_ec2_instances_cleanup.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "statements": [{ - "query": "MATCH (n:EC2Reservation)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (n:EC2Instance)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (n:EC2Subnet)<-[:PART_OF_SUBNET]-(:EC2Instance)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:EC2Subnet)<-[r:PART_OF_SUBNET]-(:EC2Instance)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EC2Instance)-[r:MEMBER_OF_EC2_RESERVATION]->(:EC2Reservation) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EC2Instance)-[r:MEMBER_OF_EC2_SECURITY_GROUP]->(:EC2SecurityGroup) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (n:NetworkInterface)<-[:NETWORK_INTERFACE]-(:EC2Instance)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EC2Instance)-[r:NETWORK_INTERFACE]->(:NetworkInterface) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:NetworkInterface)-[r:MEMBER_OF_EC2_SECURITY_GROUP]->(:EC2SecurityGroup)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:EC2Instance)-[r:STS_ASSUMEROLE_ALLOW]->(:AWSRole)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(n:EBSVolume)-[:ATTACHED_TO]->(:EC2Instance) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EBSVolume)-[r:ATTACHED_TO]->(:EC2Instance) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[r:RESOURCE]->(:EBSVolume)-[:ATTACHED_TO]->(:EC2Instance) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }], - "name": "cleanup EC2Instance|EC2Subnet|NetworkInterface|IAMInstanceProfile" -} diff --git a/cartography/graph/cleanupbuilder.py b/cartography/graph/cleanupbuilder.py index 5033e8eef..bee0fa651 100644 --- a/cartography/graph/cleanupbuilder.py +++ b/cartography/graph/cleanupbuilder.py @@ -1,9 +1,11 @@ from dataclasses import asdict from string import Template +from typing import Dict from typing import List from cartography.graph.querybuilder import _build_match_clause from cartography.graph.querybuilder import rel_present_on_node_schema +from cartography.models.core.common import PropertyRef from cartography.models.core.nodes import CartographyNodeSchema from cartography.models.core.relationships import CartographyRelSchema from cartography.models.core.relationships import LinkDirection @@ -150,11 +152,12 @@ def _validate_target_node_matcher_for_cleanup_job(tgm: TargetNodeMatcher): class injects the sub resource id via a query kwarg parameter. See GraphJob and GraphStatement classes. This is a private function meant only to be called when we clean up the sub resource relationship. """ - tgm_asdict = asdict(tgm) + tgm_asdict: Dict[str, PropertyRef] = asdict(tgm) for key, prop_ref in tgm_asdict.items(): if not prop_ref.set_in_kwargs: raise ValueError( f"TargetNodeMatcher PropertyRefs in the sub_resource_relationship must have set_in_kwargs=True. " - f"{key} has set_in_kwargs=False, please check.", + f"{key} has set_in_kwargs=False, please check by reviewing the full stack trace to know which object" + f"this message was raised from. Debug information: PropertyRef name = {prop_ref.name}.", ) diff --git a/cartography/graph/statement.py b/cartography/graph/statement.py index 2b0f1c1a6..4c2bf619f 100644 --- a/cartography/graph/statement.py +++ b/cartography/graph/statement.py @@ -117,7 +117,7 @@ def _run_iterative(self, session: neo4j.Session) -> None: """ self.parameters["LIMIT_SIZE"] = self.iterationsize - while True: + while True: # TODO if there are 0 things to clean up, then we will hang forever result: neo4j.Result = session.write_transaction(self._run_noniterative) # Exit if we have finished processing all items diff --git a/cartography/intel/aws/ec2/instances.py b/cartography/intel/aws/ec2/instances.py index c468ef142..320372b89 100644 --- a/cartography/intel/aws/ec2/instances.py +++ b/cartography/intel/aws/ec2/instances.py @@ -1,16 +1,24 @@ import logging import time -from datetime import datetime from typing import Any from typing import Dict from typing import List +from typing import Tuple import boto3 import neo4j -from .util import get_botocore_config +from cartography.client.core.tx import load +from cartography.graph.job import GraphJob +from cartography.intel.aws.ec2.util import get_botocore_config +from cartography.models.aws.ec2.instances import EC2InstanceSchema +from cartography.models.aws.ec2.keypairs import EC2KeyPairSchema +from cartography.models.aws.ec2.networkinterfaces import EC2NetworkInterfaceSchema +from cartography.models.aws.ec2.reservations import EC2ReservationSchema +from cartography.models.aws.ec2.securitygroups import EC2SecurityGroupSchema +from cartography.models.aws.ec2.subnets import EC2SubnetSchema +from cartography.models.aws.ec2.volumes import EBSVolumeSchema from cartography.util import aws_handle_regions -from cartography.util import run_cleanup_job from cartography.util import timeit logger = logging.getLogger(__name__) @@ -21,410 +29,343 @@ def get_ec2_instances(boto3_session: boto3.session.Session, region: str) -> List[Dict]: client = boto3_session.client('ec2', region_name=region, config=get_botocore_config()) paginator = client.get_paginator('describe_instances') - reservations: List[Dict] = [] + reservations: List[Dict[str, Any]] = [] for page in paginator.paginate(): reservations.extend(page['Reservations']) return reservations -def _load_ec2_instance_net_if_tx( - tx: neo4j.Transaction, - instance_data: Dict[str, Any], - update_tag: int, -) -> None: - query = """ - MATCH (instance:EC2Instance{instanceid: $InstanceId}) - UNWIND $Interfaces as interface - MERGE (nic:NetworkInterface{id: interface.NetworkInterfaceId}) - ON CREATE SET nic.firstseen = timestamp() - SET nic.status = interface.Status, - nic.mac_address = interface.MacAddress, - nic.description = interface.Description, - nic.private_dns_name = interface.PrivateDnsName, - nic.private_ip_address = interface.PrivateIpAddress, - nic.lastupdated = $update_tag - - MERGE (instance)-[r:NETWORK_INTERFACE]->(nic) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - - WITH nic, interface - WHERE interface.SubnetId IS NOT NULL - MERGE (subnet:EC2Subnet{subnetid: interface.SubnetId}) - ON CREATE SET subnet.firstseen = timestamp() - SET subnet.lastupdated = $update_tag - - MERGE (nic)-[r:PART_OF_SUBNET]->(subnet) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - - WITH nic, interface - UNWIND interface.Groups as group - MATCH (ec2group:EC2SecurityGroup{groupid: group.GroupId}) - MERGE (nic)-[r:MEMBER_OF_EC2_SECURITY_GROUP]->(ec2group) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - """ - tx.run( - query, - Interfaces=instance_data['NetworkInterfaces'], - InstanceId=instance_data['InstanceId'], - update_tag=update_tag, - ) +def transform_ec2_instances( + reservations: List[Dict[str, Any]], + region: str, + current_aws_account_id: str, +) -> Tuple[ + List[Dict[str, Any]], + List[Dict[str, Any]], + List[Dict[str, Any]], + List[Dict[str, Any]], + List[Dict[str, Any]], + List[Dict[str, Any]], + List[Dict[str, Any]], +]: + reservation_list = [] + instance_list = [] + subnet_list = [] + keypair_list = [] + sg_list = [] + network_interface_list = [] + instance_ebs_volumes_list = [] + + for reservation in reservations: + reservation_id = reservation['ReservationId'] + reservation_list.append({ + 'RequesterId': reservation.get('RequesterId'), + 'ReservationId': reservation['ReservationId'], + 'OwnerId': reservation['OwnerId'], + }) + for instance in reservation['Instances']: + instance_id = instance['InstanceId'] + launch_time = instance.get("LaunchTime") + launch_time_unix = str(time.mktime(launch_time.timetuple())) if launch_time else None + instance_list.append( + { + 'InstanceId': instance_id, + 'ReservationId': reservation_id, + 'PublicDnsName': instance.get("PublicDnsName"), + 'PublicIpAddress': instance.get("PublicIpAddress"), + 'PrivateIpAddress': instance.get("PrivateIpAddress"), + 'ImageId': instance.get("ImageId"), + 'InstanceType': instance.get("InstanceType"), + 'IamInstanceProfile': instance.get("IamInstanceProfile", {}).get("Arn"), + 'MonitoringState': instance.get("Monitoring", {}).get("State"), + 'LaunchTime': instance.get("LaunchTime"), + 'LaunchTimeUnix': launch_time_unix, + 'State': instance.get("State", {}).get("Name"), + 'AvailabilityZone': instance.get("Placement", {}).get("AvailabilityZone"), + 'Tenancy': instance.get("Placement", {}).get("Tenancy"), + 'HostResourceGroupArn': instance.get("Placement", {}).get("HostResourceGroupArn"), + 'Platform': instance.get("Platform"), + 'Architecture': instance.get("Architecture"), + 'EbsOptimized': instance.get("EbsOptimized"), + 'BootMode': instance.get("BootMode"), + 'InstanceLifecycle': instance.get("InstanceLifecycle"), + 'HibernationOptions': instance.get("HibernationOptions", {}).get("Configured"), + }, + ) + subnet_id = instance.get('SubnetId') + if subnet_id: + subnet_list.append( + { + 'SubnetId': subnet_id, + 'InstanceId': instance_id, + }, + ) -@timeit -def load_ec2_instance_network_interfaces(neo4j_session: neo4j.Session, instance_data: Dict, update_tag: int) -> None: - neo4j_session.write_transaction(_load_ec2_instance_net_if_tx, instance_data, update_tag) + if instance.get("KeyName"): + key_name = instance["KeyName"] + key_pair_arn = f'arn:aws:ec2:{region}:{current_aws_account_id}:key-pair/{key_name}' + keypair_list.append({ + 'KeyPairArn': key_pair_arn, + 'KeyName': key_name, + 'InstanceId': instance_id, + }) + + if instance.get("SecurityGroups"): + for group in instance["SecurityGroups"]: + sg_list.append( + { + 'GroupId': group['GroupId'], + 'InstanceId': instance_id, + }, + ) + for network_interface in instance['NetworkInterfaces']: + for security_group in network_interface['Groups']: + network_interface_list.append({ + 'NetworkInterfaceId': network_interface['NetworkInterfaceId'], + 'Status': network_interface['Status'], + 'MacAddress': network_interface['MacAddress'], + 'Description': network_interface['Description'], + 'PrivateDnsName': network_interface['PrivateDnsName'], + 'PrivateIpAddress': network_interface['PrivateIpAddress'], + 'InstanceId': instance_id, + 'SubnetId': subnet_id, + 'GroupId': security_group['GroupId'], + }) + + if 'BlockDeviceMappings' in instance and len(instance['BlockDeviceMappings']) > 0: + for mapping in instance['BlockDeviceMappings']: + if 'VolumeId' in mapping['Ebs']: + instance_ebs_volumes_list.append({ + 'InstanceId': instance_id, + 'VolumeId': mapping['Ebs']['VolumeId'], + 'DeleteOnTermination': mapping['Ebs']['DeleteOnTermination'], + # 'SnapshotId': mapping['Ebs']['SnapshotId'], # TODO check on this + }) + + return ( + reservation_list, + instance_list, + subnet_list, + sg_list, + keypair_list, + network_interface_list, + instance_ebs_volumes_list, + ) -def _load_ec2_reservation_tx( - tx: neo4j.Transaction, - reservation_id: str, - reservation: Dict[str, Any], - current_aws_account_id: str, + +@timeit +def load_instances( + neo4j_session: neo4j.Session, + instance_list: List[Dict[str, Any]], region: str, + current_aws_account_id: str, update_tag: int, ) -> None: - query = """ - MERGE (reservation:EC2Reservation{reservationid: $ReservationId}) - ON CREATE SET reservation.firstseen = timestamp() - SET reservation.ownerid = $OwnerId, - reservation.requesterid = $RequesterId, - reservation.region = $Region, - reservation.lastupdated = $update_tag - WITH reservation - MATCH (awsAccount:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (awsAccount)-[r:RESOURCE]->(reservation) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - """ - tx.run( - query, - ReservationId=reservation_id, - OwnerId=reservation.get("OwnerId"), - RequesterId=reservation.get("RequesterId"), - AWS_ACCOUNT_ID=current_aws_account_id, + logger.info(f"Loading {len(instance_list)} EC2 instances for region '{region}' into graph.") + load( + neo4j_session, + EC2InstanceSchema(), + instance_list, + lastupdated=update_tag, Region=region, - update_tag=update_tag, + AWS_ID=current_aws_account_id, ) -def _load_ec2_instance_tx( - tx: neo4j.Transaction, - instanceid: str, - instance: Dict[str, Any], - reservation_id: str, - monitoring_state: str, - launch_time: datetime, - launch_time_unix: str, - instance_state: str, - current_aws_account_id: str, +@timeit +def load_ec2_reservations( + neo4j_session: neo4j.Session, + reservation_list: List[Dict[str, Any]], region: str, + current_aws_account_id: str, update_tag: int, ) -> None: - query = """ - MERGE (instance:Instance:EC2Instance{id: $InstanceId}) - ON CREATE SET instance.firstseen = timestamp() - SET instance.instanceid = $InstanceId, - instance.publicdnsname = $PublicDnsName, - instance.privateipaddress = $PrivateIpAddress, - instance.publicipaddress = $PublicIpAddress, - instance.imageid = $ImageId, - instance.instancetype = $InstanceType, - instance.monitoringstate = $MonitoringState, - instance.state = $State, - instance.launchtime = $LaunchTime, - instance.launchtimeunix = $LaunchTimeUnix, - instance.region = $Region, - instance.lastupdated = $update_tag, - instance.iaminstanceprofile = $IamInstanceProfile, - instance.availabilityzone = $AvailabilityZone, - instance.tenancy = $Tenancy, - instance.hostresourcegrouparn = $HostResourceGroupArn, - instance.platform = $Platform, - instance.architecture = $Architecture, - instance.ebsoptimized = $EbsOptimized, - instance.bootmode = $BootMode, - instance.instancelifecycle = $InstanceLifecycle, - instance.hibernationoptions = $HibernationOptions - WITH instance - MATCH (rez:EC2Reservation{reservationid: $ReservationId}) - MERGE (instance)-[r:MEMBER_OF_EC2_RESERVATION]->(rez) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - WITH instance - MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (aa)-[r:RESOURCE]->(instance) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - """ - tx.run( - query, - InstanceId=instanceid, - PublicDnsName=instance.get("PublicDnsName"), - PublicIpAddress=instance.get("PublicIpAddress"), - PrivateIpAddress=instance.get("PrivateIpAddress"), - ImageId=instance.get("ImageId"), - InstanceType=instance.get("InstanceType"), - IamInstanceProfile=instance.get("IamInstanceProfile", {}).get("Arn"), - ReservationId=reservation_id, - MonitoringState=monitoring_state, - LaunchTime=str(launch_time), - LaunchTimeUnix=launch_time_unix, - State=instance_state, - AvailabilityZone=instance.get("Placement", {}).get("AvailabilityZone"), - Tenancy=instance.get("Placement", {}).get("Tenancy"), - HostResourceGroupArn=instance.get("Placement", {}).get("HostResourceGroupArn"), - Platform=instance.get("Platform"), - Architecture=instance.get("Architecture"), - EbsOptimized=instance.get("EbsOptimized"), - BootMode=instance.get("BootMode"), - InstanceLifecycle=instance.get("InstanceLifecycle"), - HibernationOptions=instance.get("HibernationOptions", {}).get("Configured"), - AWS_ACCOUNT_ID=current_aws_account_id, + load( + neo4j_session, + EC2ReservationSchema(), + reservation_list, Region=region, - update_tag=update_tag, + AWS_ID=current_aws_account_id, + lastupdated=update_tag, ) -def _load_ec2_subnet_tx(tx: neo4j.Transaction, instanceid: str, subnet_id: str, region: str, update_tag: int) -> None: - query = """ - MATCH (instance:EC2Instance{id: $InstanceId}) - MERGE (subnet:EC2Subnet{subnetid: $SubnetId}) - ON CREATE SET subnet.firstseen = timestamp() - SET subnet.region = $Region, - subnet.lastupdated = $update_tag - MERGE (instance)-[r:PART_OF_SUBNET]->(subnet) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - """ - tx.run( - query, - InstanceId=instanceid, - SubnetId=subnet_id, +@timeit +def load_ec2_subnets( + neo4j_session: neo4j.Session, + subnet_list: List[Dict[str, Any]], + region: str, + current_aws_account_id: str, + update_tag: int, +) -> None: + load( + neo4j_session, + EC2SubnetSchema(), + subnet_list, Region=region, - update_tag=update_tag, + AWS_ID=current_aws_account_id, + lastupdated=update_tag, ) -def _load_ec2_keypairs_tx( - tx: neo4j.Transaction, - key_pair_arn: str, - key_name: str, +@timeit +def load_ec2_key_pairs( + neo4j_session: neo4j.Session, + key_pair_list: List[Dict[str, Any]], region: str, - instanceid: str, current_aws_account_id: str, update_tag: int, ) -> None: - query = """ - MERGE (keypair:KeyPair:EC2KeyPair{arn: $KeyPairARN, id: $KeyPairARN}) - ON CREATE SET keypair.firstseen = timestamp() - SET keypair.keyname = $KeyName, keypair.region = $Region, keypair.lastupdated = $update_tag - WITH keypair - MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (aa)-[r:RESOURCE]->(keypair) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - with keypair - MATCH (instance:EC2Instance{instanceid: $InstanceId}) - MERGE (instance)<-[r:SSH_LOGIN_TO]-(keypair) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - """ - tx.run( - query, - KeyPairARN=key_pair_arn, - KeyName=key_name, + load( + neo4j_session, + EC2KeyPairSchema(), + key_pair_list, Region=region, - InstanceId=instanceid, - AWS_ACCOUNT_ID=current_aws_account_id, - update_tag=update_tag, + AWS_ID=current_aws_account_id, + lastupdated=update_tag, ) -def _load_ec2_security_groups_tx( - tx: neo4j.Transaction, - group_id: str, - group: Dict[str, Any], - instanceid: str, +@timeit +def load_ec2_security_groups( + neo4j_session: neo4j.Session, + sg_list: List[Dict[str, Any]], region: str, current_aws_account_id: str, update_tag: int, ) -> None: - query = """ - MERGE (group:EC2SecurityGroup{id: $GroupId}) - ON CREATE SET group.firstseen = timestamp(), group.groupid = $GroupId - SET group.name = $GroupName, group.region = $Region, group.lastupdated = $update_tag - WITH group - MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (aa)-[r:RESOURCE]->(group) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - WITH group - MATCH (instance:EC2Instance{instanceid: $InstanceId}) - MERGE (instance)-[r:MEMBER_OF_EC2_SECURITY_GROUP]->(group) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - """ - tx.run( - query, - GroupId=group_id, - GroupName=group.get("GroupName"), - InstanceId=instanceid, + load( + neo4j_session, + EC2SecurityGroupSchema(), + sg_list, Region=region, - AWS_ACCOUNT_ID=current_aws_account_id, - update_tag=update_tag, + AWS_ID=current_aws_account_id, + lastupdated=update_tag, ) @timeit -def load_ec2_instances( - neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, +def load_ec2_network_interfaces( + neo4j_session: neo4j.Session, + network_interface_list: List[Dict[str, Any]], + region: str, + current_aws_account_id: str, update_tag: int, ) -> None: - for reservation in data: - reservation_id = reservation["ReservationId"] - neo4j_session.write_transaction( - _load_ec2_reservation_tx, - reservation_id, - reservation, - current_aws_account_id, - region, - update_tag, - ) - - for instance in reservation["Instances"]: - instanceid = instance["InstanceId"] - - monitoring_state = instance.get("Monitoring", {}).get("State") - - instance_state = instance.get("State", {}).get("Name") - - # NOTE this is a hack because we're using a version of Neo4j that doesn't support temporal data types - launch_time = instance.get("LaunchTime") - if launch_time: - launch_time_unix = str(time.mktime(launch_time.timetuple())) - else: - launch_time_unix = "" - - neo4j_session.write_transaction( - _load_ec2_instance_tx, - instanceid, - instance, - reservation_id, - monitoring_state, - launch_time, - launch_time_unix, - instance_state, - current_aws_account_id, - region, - update_tag, - ) - - # SubnetId can return None intermittently so attach only if non-None. - subnet_id = instance.get('SubnetId') - if subnet_id: - neo4j_session.write_transaction(_load_ec2_subnet_tx, instanceid, subnet_id, region, update_tag) - - if instance.get("KeyName"): - key_name = instance["KeyName"] - key_pair_arn = f'arn:aws:ec2:{region}:{current_aws_account_id}:key-pair/{key_name}' - neo4j_session.write_transaction( - _load_ec2_keypairs_tx, - key_pair_arn, - key_name, - region, - instanceid, - current_aws_account_id, - update_tag, - ) - - if instance.get("SecurityGroups"): - for group in instance["SecurityGroups"]: - group_id = group["GroupId"] - neo4j_session.write_transaction( - _load_ec2_security_groups_tx, - group_id, - group, - instanceid, - region, - current_aws_account_id, - update_tag, - ) - - load_ec2_instance_network_interfaces(neo4j_session, instance, update_tag) - instance_ebs_volumes_list = get_ec2_instance_ebs_volumes(instance) - load_ec2_instance_ebs_volumes(neo4j_session, instance_ebs_volumes_list, current_aws_account_id, update_tag) + load( + neo4j_session, + EC2NetworkInterfaceSchema(), + network_interface_list, + Region=region, + AWS_ID=current_aws_account_id, + lastupdated=update_tag, + ) @timeit -def get_ec2_instance_ebs_volumes(instance: Dict) -> List[Dict]: - instance_ebs_volumes_list: List[Dict] = [] - if 'BlockDeviceMappings' in instance and len(instance['BlockDeviceMappings']) > 0: - for mapping in instance['BlockDeviceMappings']: - if 'VolumeId' in mapping['Ebs']: - mapping['InstanceId'] = instance["InstanceId"] - instance_ebs_volumes_list.append(mapping) - return instance_ebs_volumes_list - - -def _load_ec2_instance_ebs_tx( - tx: neo4j.Transaction, - ebs_data: List[Dict[str, Any]], - update_tag: int, +def load_ec2_instance_nodes( + neo4j_session: neo4j.Session, + data: List[Dict], + region: str, current_aws_account_id: str, + update_tag: int, ) -> None: - query = """ - UNWIND $ebs_mappings_list as em - MERGE (vol:EBSVolume{id: em.Ebs.VolumeId}) - ON CREATE SET vol.firstseen = timestamp() - SET vol.lastupdated = $update_tag, - vol.deleteontermination = em.Ebs.DeleteOnTermination, - vol.snapshotid = vol.SnapshotId - WITH vol, em - MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (aa)-[r:RESOURCE]->(vol) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - WITH vol, em - MATCH (instance:EC2Instance{instanceid: em.InstanceId}) - MERGE (vol)-[r:ATTACHED_TO]->(instance) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $update_tag - """ - tx.run( - query, - ebs_mappings_list=ebs_data, - update_tag=update_tag, - AWS_ACCOUNT_ID=current_aws_account_id, + load( + neo4j_session, + EC2InstanceSchema(), + data, + Region=region, + AWS_ID=current_aws_account_id, + lastupdated=update_tag, ) @timeit def load_ec2_instance_ebs_volumes( - neo4j_session: neo4j.Session, ebs_data: List[Dict[str, Any]], current_aws_account_id: str, update_tag: int, + neo4j_session: neo4j.Session, + ebs_data: List[Dict[str, Any]], + region: str, + current_aws_account_id: str, + update_tag: int, ) -> None: - neo4j_session.write_transaction( - _load_ec2_instance_ebs_tx, + load( + neo4j_session, + EBSVolumeSchema(), ebs_data, - update_tag, - current_aws_account_id, + Region=region, + AWS_ID=current_aws_account_id, + lastupdated=update_tag, ) +def load_ec2_instance_data( + neo4j_session: neo4j.Session, + region: str, + current_aws_account_id: str, + update_tag: int, + reservation_list: List[Dict[str, Any]], + instance_list: List[Dict[str, Any]], + subnet_list: List[Dict[str, Any]], + sg_list: List[Dict[str, Any]], + key_pair_list: List[Dict[str, Any]], + nic_list: List[Dict[str, Any]], + ebs_volumes_list: List[Dict[str, Any]], +) -> None: + load_ec2_reservations(neo4j_session, reservation_list, region, current_aws_account_id, update_tag) + load_ec2_instance_nodes(neo4j_session, instance_list, region, current_aws_account_id, update_tag) + load_ec2_subnets(neo4j_session, subnet_list, region, current_aws_account_id, update_tag) + load_ec2_security_groups(neo4j_session, sg_list, region, current_aws_account_id, update_tag) + load_ec2_key_pairs(neo4j_session, key_pair_list, region, current_aws_account_id, update_tag) + load_ec2_network_interfaces(neo4j_session, nic_list, region, current_aws_account_id, update_tag) + load_ec2_instance_ebs_volumes(neo4j_session, ebs_volumes_list, region, current_aws_account_id, update_tag) + + @timeit -def cleanup_ec2_instances(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: - run_cleanup_job('aws_import_ec2_instances_cleanup.json', neo4j_session, common_job_parameters) +def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None: + logger.debug("Running EC2 instance cleanup") + GraphJob.from_node_schema(EC2ReservationSchema(), common_job_parameters).run(neo4j_session) + GraphJob.from_node_schema(EC2InstanceSchema(), common_job_parameters).run(neo4j_session) + GraphJob.from_node_schema(EC2SubnetSchema(), common_job_parameters).run(neo4j_session) + GraphJob.from_node_schema(EC2SecurityGroupSchema(), common_job_parameters).run(neo4j_session) + GraphJob.from_node_schema(EC2KeyPairSchema(), common_job_parameters).run(neo4j_session) + GraphJob.from_node_schema(EC2NetworkInterfaceSchema(), common_job_parameters).run(neo4j_session) + GraphJob.from_node_schema(EBSVolumeSchema(), common_job_parameters).run(neo4j_session) @timeit def sync_ec2_instances( - neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], - current_aws_account_id: str, update_tag: int, common_job_parameters: Dict, + neo4j_session: neo4j.Session, + boto3_session: boto3.session.Session, + regions: List[str], + current_aws_account_id: str, + update_tag: int, + common_job_parameters: Dict[str, Any], ) -> None: for region in regions: logger.info("Syncing EC2 instances for region '%s' in account '%s'.", region, current_aws_account_id) - data = get_ec2_instances(boto3_session, region) - load_ec2_instances(neo4j_session, data, region, current_aws_account_id, update_tag) - cleanup_ec2_instances(neo4j_session, common_job_parameters) + reservations = get_ec2_instances(boto3_session, region) + ( + reservation_list, + instance_list, + subnet_list, + sg_list, + key_pair_list, + nic_list, + ebs_volumes_list, + ) = transform_ec2_instances(reservations, region, current_aws_account_id) + load_ec2_instance_data( + neo4j_session, + region, + current_aws_account_id, + update_tag, + reservation_list, + instance_list, + subnet_list, + sg_list, + key_pair_list, + nic_list, + ebs_volumes_list, + ) + cleanup(neo4j_session, common_job_parameters) diff --git a/cartography/models/aws/ec2/instances.py b/cartography/models/aws/ec2/instances.py new file mode 100644 index 000000000..86f1ecbb6 --- /dev/null +++ b/cartography/models/aws/ec2/instances.py @@ -0,0 +1,83 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class EC2InstanceNodeProperties(CartographyNodeProperties): + # TODO arn: PropertyRef = PropertyRef('Arn', extra_index=True) + id: PropertyRef = PropertyRef('InstanceId') + instanceid: PropertyRef = PropertyRef('InstanceId', extra_index=True) + publicdnsname: PropertyRef = PropertyRef('PublicDnsName', extra_index=True) + privateipaddress: PropertyRef = PropertyRef('PrivateIpAddress') + publicipaddress: PropertyRef = PropertyRef('PublicIpAddress') + imageid: PropertyRef = PropertyRef('ImageId') + instancetype: PropertyRef = PropertyRef('InstanceType') + monitoringstate: PropertyRef = PropertyRef('MonitoringState') + state: PropertyRef = PropertyRef('State') + launchtime: PropertyRef = PropertyRef('LaunchTime') + launchtimeunix: PropertyRef = PropertyRef('LaunchTimeUnix') + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + iaminstanceprofile: PropertyRef = PropertyRef('IamInstanceProfile') + availabilityzone: PropertyRef = PropertyRef('AvailabilityZone') + tenancy: PropertyRef = PropertyRef('Tenancy') + hostresourcegrouparn: PropertyRef = PropertyRef('HostResourceGroupArn') + platform: PropertyRef = PropertyRef('Platform') + architecture: PropertyRef = PropertyRef('Architecture') + ebsoptimized: PropertyRef = PropertyRef('EbsOptimized') + bootmode: PropertyRef = PropertyRef('BootMode') + instancelifecycle: PropertyRef = PropertyRef('InstanceLifecycle') + hibernationoptions: PropertyRef = PropertyRef('HibernationOption') + + +@dataclass(frozen=True) +class EC2InstanceToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2InstanceToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EC2InstanceToAwsAccountRelProperties = EC2InstanceToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class EC2InstanceToEC2ReservationRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2InstanceToEC2Reservation(CartographyRelSchema): + target_node_label: str = 'EC2Reservation' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'reservationid': PropertyRef('ReservationId')}, + ) + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "MEMBER_OF_EC2_RESERVATION" + properties: EC2InstanceToEC2ReservationRelProperties = EC2InstanceToEC2ReservationRelProperties() + + +@dataclass(frozen=True) +class EC2InstanceSchema(CartographyNodeSchema): + label: str = 'EC2Instance' + properties: EC2InstanceNodeProperties = EC2InstanceNodeProperties() + sub_resource_relationship: EC2InstanceToAWSAccount = EC2InstanceToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + EC2InstanceToEC2Reservation(), + ], + ) diff --git a/cartography/models/aws/ec2/keypairs.py b/cartography/models/aws/ec2/keypairs.py new file mode 100644 index 000000000..84b325345 --- /dev/null +++ b/cartography/models/aws/ec2/keypairs.py @@ -0,0 +1,64 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class EC2KeyPairNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('KeyPairArn') + arn: PropertyRef = PropertyRef('KeyPairArn', extra_index=True) + keyname: PropertyRef = PropertyRef('KeyName') + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2KeyPairToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2KeyPairToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EC2KeyPairToAwsAccountRelProperties = EC2KeyPairToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class EC2KeyPairToEC2InstanceRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2KeyPairToEC2Instance(CartographyRelSchema): + target_node_label: str = 'EC2Instance' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('InstanceId')}, + ) + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "SSH_LOGIN_TO" + properties: EC2KeyPairToEC2InstanceRelProperties = EC2KeyPairToEC2InstanceRelProperties() + + +@dataclass(frozen=True) +class EC2KeyPairSchema(CartographyNodeSchema): + label: str = 'EC2KeyPair' + properties: EC2KeyPairNodeProperties = EC2KeyPairNodeProperties() + sub_resource_relationship: EC2KeyPairToAWSAccount = EC2KeyPairToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + EC2KeyPairToEC2Instance(), + ], + ) diff --git a/cartography/models/aws/ec2/networkinterfaces.py b/cartography/models/aws/ec2/networkinterfaces.py new file mode 100644 index 000000000..3c536407c --- /dev/null +++ b/cartography/models/aws/ec2/networkinterfaces.py @@ -0,0 +1,102 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class EC2NetworkInterfaceNodeProperties(CartographyNodeProperties): + # arn: PropertyRef = PropertyRef('Arn', extra_index=True) TODO decide this + id: PropertyRef = PropertyRef('NetworkInterfaceId') + status: PropertyRef = PropertyRef('Status') + mac_address: PropertyRef = PropertyRef('MacAddress') + description: PropertyRef = PropertyRef('Description') + private_dns_name: PropertyRef = PropertyRef('PrivateDnsName') + private_ip_address: PropertyRef = PropertyRef('PrivateIpAddress') + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2NetworkInterfaceToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2NetworkInterfaceToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EC2NetworkInterfaceToAwsAccountRelProperties = EC2NetworkInterfaceToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class EC2NetworkInterfaceToEC2InstanceRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2NetworkInterfaceToEC2Instance(CartographyRelSchema): + target_node_label: str = 'EC2Instance' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('InstanceId')}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "NETWORK_INTERFACE" + properties: EC2NetworkInterfaceToEC2InstanceRelProperties = EC2NetworkInterfaceToEC2InstanceRelProperties() + + +@dataclass(frozen=True) +class EC2NetworkInterfaceToEC2SubnetRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2NetworkInterfaceToEC2Subnet(CartographyRelSchema): + target_node_label: str = 'EC2Subnet' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('SubnetId')}, + ) + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "PART_OF_SUBNET" + properties: EC2NetworkInterfaceToEC2SubnetRelProperties = EC2NetworkInterfaceToEC2SubnetRelProperties() + + +@dataclass(frozen=True) +class EC2NetworkInterfaceToEC2SecurityGroupRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2NetworkInterfaceToEC2SecurityGroup(CartographyRelSchema): + target_node_label: str = 'EC2SecurityGroup' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('GroupId')}, + ) + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "MEMBER_OF_EC2_SECURITY_GROUP" + properties: EC2NetworkInterfaceToEC2SubnetRelProperties = EC2NetworkInterfaceToEC2SubnetRelProperties() + + +@dataclass(frozen=True) +class EC2NetworkInterfaceSchema(CartographyNodeSchema): + label: str = 'NetworkInterface' + properties: EC2NetworkInterfaceNodeProperties = EC2NetworkInterfaceNodeProperties() + sub_resource_relationship: EC2NetworkInterfaceToAWSAccount = EC2NetworkInterfaceToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + EC2NetworkInterfaceToEC2Instance(), + EC2NetworkInterfaceToEC2Subnet(), + EC2NetworkInterfaceToEC2SecurityGroup(), + ], + ) diff --git a/cartography/models/aws/ec2/reservations.py b/cartography/models/aws/ec2/reservations.py new file mode 100644 index 000000000..48a3178ba --- /dev/null +++ b/cartography/models/aws/ec2/reservations.py @@ -0,0 +1,43 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class EC2ReservationNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('ReservationId') + reservationid: PropertyRef = PropertyRef('ReservationId') + ownerid: PropertyRef = PropertyRef('OwnerId') + requesterid: PropertyRef = PropertyRef('RequesterId') + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2ReservationToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2ReservationToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EC2ReservationToAwsAccountRelProperties = EC2ReservationToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class EC2ReservationSchema(CartographyNodeSchema): + label: str = 'EC2Reservation' + properties: EC2ReservationNodeProperties = EC2ReservationNodeProperties() + sub_resource_relationship: EC2ReservationToAWSAccount = EC2ReservationToAWSAccount() diff --git a/cartography/models/aws/ec2/securitygroups.py b/cartography/models/aws/ec2/securitygroups.py new file mode 100644 index 000000000..34813340c --- /dev/null +++ b/cartography/models/aws/ec2/securitygroups.py @@ -0,0 +1,64 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class EC2SecurityGroupNodeProperties(CartographyNodeProperties): + # arn: PropertyRef = PropertyRef('Arn', extra_index=True) # TODO decide on this + id: PropertyRef = PropertyRef('GroupId') + groupid: PropertyRef = PropertyRef('GroupId', extra_index=True) + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2SecurityGroupToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2SecurityGroupToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EC2SecurityGroupToAwsAccountRelProperties = EC2SecurityGroupToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class EC2SubnetToEC2InstanceRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2SecurityGroupToEC2Instance(CartographyRelSchema): + target_node_label: str = 'EC2Instance' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('InstanceId')}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "MEMBER_OF_EC2_SECURITY_GROUP" + properties: EC2SubnetToEC2InstanceRelProperties = EC2SubnetToEC2InstanceRelProperties() + + +@dataclass(frozen=True) +class EC2SecurityGroupSchema(CartographyNodeSchema): + label: str = 'EC2SecurityGroup' + properties: EC2SecurityGroupNodeProperties = EC2SecurityGroupNodeProperties() + sub_resource_relationship: EC2SecurityGroupToAWSAccount = EC2SecurityGroupToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + EC2SecurityGroupToEC2Instance(), + ], + ) diff --git a/cartography/models/aws/ec2/subnets.py b/cartography/models/aws/ec2/subnets.py new file mode 100644 index 000000000..62db69909 --- /dev/null +++ b/cartography/models/aws/ec2/subnets.py @@ -0,0 +1,64 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class EC2SubnetNodeProperties(CartographyNodeProperties): + # arn: PropertyRef = PropertyRef('Arn', extra_index=True) TODO decide this + id: PropertyRef = PropertyRef('SubnetId') + subnet_id: PropertyRef = PropertyRef('SubnetId', extra_index=True) + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2SubnetToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2SubnetToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EC2SubnetToAwsAccountRelProperties = EC2SubnetToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class EC2SubnetToEC2InstanceRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EC2SubnetToEC2Instance(CartographyRelSchema): + target_node_label: str = 'EC2Instance' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('InstanceId')}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "PART_OF_SUBNET" + properties: EC2SubnetToEC2InstanceRelProperties = EC2SubnetToEC2InstanceRelProperties() + + +@dataclass(frozen=True) +class EC2SubnetSchema(CartographyNodeSchema): + label: str = 'EC2Subnet' + properties: EC2SubnetNodeProperties = EC2SubnetNodeProperties() + sub_resource_relationship: EC2SubnetToAWSAccount = EC2SubnetToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + EC2SubnetToEC2Instance(), + ], + ) diff --git a/cartography/models/aws/ec2/volumes.py b/cartography/models/aws/ec2/volumes.py new file mode 100644 index 000000000..2140f4fcd --- /dev/null +++ b/cartography/models/aws/ec2/volumes.py @@ -0,0 +1,63 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class EBSVolumeNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('VolumeId') + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + deleteontermination: PropertyRef = PropertyRef('DeleteOnTermination') + + +@dataclass(frozen=True) +class EBSVolumeToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EBSVolumeToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EBSVolumeToAwsAccountRelProperties = EBSVolumeToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class EBSVolumeToEC2InstanceRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EBSVolumeToEC2Instance(CartographyRelSchema): + target_node_label: str = 'EC2Instance' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('InstanceId')}, + ) + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "ATTACHED_TO" + properties: EBSVolumeToEC2InstanceRelProperties = EBSVolumeToEC2InstanceRelProperties() + + +@dataclass(frozen=True) +class EBSVolumeSchema(CartographyNodeSchema): + label: str = 'EBSVolume' + properties: EBSVolumeNodeProperties = EBSVolumeNodeProperties() + sub_resource_relationship: EBSVolumeToAWSAccount = EBSVolumeToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + EBSVolumeToEC2Instance(), + ], + ) diff --git a/tests/integration/cartography/intel/aws/ec2/test_ec2_instances.py b/tests/integration/cartography/intel/aws/ec2/test_ec2_instances.py index 77f422d1d..e9daf6bce 100644 --- a/tests/integration/cartography/intel/aws/ec2/test_ec2_instances.py +++ b/tests/integration/cartography/intel/aws/ec2/test_ec2_instances.py @@ -1,151 +1,300 @@ -import cartography.intel.aws.ec2 +from unittest.mock import MagicMock +from unittest.mock import patch + +import cartography.intel.aws.ec2.instances import cartography.intel.aws.iam import tests.data.aws.ec2.instances import tests.data.aws.iam +from cartography.intel.aws.ec2.instances import sync_ec2_instances from cartography.util import run_analysis_job +from tests.data.aws.ec2.instances import DESCRIBE_INSTANCES +from tests.integration.cartography.intel.aws.common import create_test_account +from tests.integration.util import check_nodes +from tests.integration.util import check_rels TEST_ACCOUNT_ID = '000000000000' TEST_REGION = 'us-east-1' TEST_UPDATE_TAG = 123456789 -def test_load_ec2_instances(neo4j_session, *args): +@patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) +def test_sync_ec2_instances(mock_get_instances, neo4j_session): """ Ensure that instances actually get loaded and have their key fields """ - data = tests.data.aws.ec2.instances.DESCRIBE_INSTANCES['Reservations'] - cartography.intel.aws.ec2.instances.load_ec2_instances( - neo4j_session, data, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, - ) + # Arrange + boto3_session = MagicMock() + create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) - expected_nodes = { - ( - "i-01", - "i-01", - ), - ( - "i-02", - "i-02", - ), - ( - "i-03", - "i-03", - ), - ( - "i-04", - "i-04", - ), - } - - nodes = neo4j_session.run( - """ - MATCH (i:EC2Instance) return i.id, i.instanceid - """, + # Act + sync_ec2_instances( + neo4j_session, + boto3_session, + [TEST_REGION], + TEST_ACCOUNT_ID, + TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) - actual_nodes = { - ( - n['i.id'], - n['i.instanceid'], - ) - for n in nodes + + # Assert EC2 instances exist + assert check_nodes(neo4j_session, 'EC2Instance', ['id', 'instanceid']) == { + ("i-01", "i-01"), + ("i-02", "i-02"), + ("i-03", "i-03"), + ("i-04", "i-04"), } - assert actual_nodes == expected_nodes + # Assert that instances are connected to their expected reservations + assert check_rels( + neo4j_session, + 'EC2Reservation', + 'reservationid', + 'EC2Instance', + 'id', + 'MEMBER_OF_EC2_RESERVATION', + rel_direction_right=False, + ) == { + ("r-01", "i-01"), + ("r-02", "i-02"), + ("r-03", "i-03"), + ("r-03", "i-04"), + } -def test_ec2_reservations_to_instances(neo4j_session, *args): - """ - Ensure that instances are connected to their expected reservations - """ - data = tests.data.aws.ec2.instances.DESCRIBE_INSTANCES['Reservations'] - cartography.intel.aws.ec2.instances.load_ec2_instances( - neo4j_session, data, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, - ) + # Assert network interface to instances + assert check_rels( + neo4j_session, + 'NetworkInterface', + 'id', + 'EC2Instance', + 'id', + 'NETWORK_INTERFACE', + rel_direction_right=False, + ) == { + ('eni-de', 'i-01'), + ('eni-87', 'i-02'), + ('eni-75', 'i-03'), + ('eni-76', 'i-04'), + } - expected_nodes = { - ( - "r-01", - "i-01", - ), - ( - "r-02", - "i-02", - ), - ( - "r-03", - "i-03", - ), - ( - "r-03", - "i-04", - ), - } - - nodes = neo4j_session.run( - """ - MATCH (r:EC2Reservation)<-[:MEMBER_OF_EC2_RESERVATION]-(i:EC2Instance) RETURN r.reservationid, i.id - """, - ) - actual_nodes = { - ( - n['r.reservationid'], - n['i.id'], - ) - for n in nodes + # Assert network interface to subnet + assert check_rels( + neo4j_session, + 'NetworkInterface', + 'id', + 'EC2Subnet', + 'id', + 'PART_OF_SUBNET', + rel_direction_right=True, + ) == { + ('eni-75', 'SOME_SUBNET_1'), + ('eni-76', 'SOME_SUBNET_1'), + ('eni-87', 'SOME_SUBNET_1'), + } + + # Assert network interface to security group + assert check_rels( + neo4j_session, + 'NetworkInterface', + 'id', + 'EC2SecurityGroup', + 'id', + 'MEMBER_OF_EC2_SECURITY_GROUP', + rel_direction_right=True, + ) == { + ('eni-75', 'SOME_GROUP_ID_2'), + ('eni-75', 'THIS_IS_A_SG_ID'), + ('eni-76', 'SOME_GROUP_ID_2'), + ('eni-76', 'THIS_IS_A_SG_ID'), + ('eni-87', 'SOME_GROUP_ID_2'), + ('eni-87', 'SOME_GROUP_ID_3'), + ('eni-de', 'SOME_GROUP_ID_2'), + ('eni-de', 'sg-GROUP-ID'), + } + + # Assert network interface to AWS account + assert check_rels( + neo4j_session, + 'NetworkInterface', + 'id', + 'AWSAccount', + 'id', + 'RESOURCE', + rel_direction_right=False, + ) == { + ('eni-75', '000000000000'), + ('eni-76', '000000000000'), + ('eni-87', '000000000000'), + ('eni-de', '000000000000'), + } + + # Assert EC2 Key Pair to AWS account + assert check_rels( + neo4j_session, + 'EC2KeyPair', + 'id', + 'AWSAccount', + 'id', + 'RESOURCE', + rel_direction_right=False, + ) == { + ('arn:aws:ec2:us-east-1:000000000000:key-pair/boot', '000000000000'), + } + + # Assert EC2 Key Pair to EC2 instance + assert check_rels( + neo4j_session, + 'EC2KeyPair', + 'id', + 'EC2Instance', + 'id', + 'SSH_LOGIN_TO', + rel_direction_right=True, + ) == { + ('arn:aws:ec2:us-east-1:000000000000:key-pair/boot', 'i-01'), + ('arn:aws:ec2:us-east-1:000000000000:key-pair/boot', 'i-02'), + ('arn:aws:ec2:us-east-1:000000000000:key-pair/boot', 'i-03'), + ('arn:aws:ec2:us-east-1:000000000000:key-pair/boot', 'i-04'), + } + + # Assert EC2 Security Group to EC2 Instance + assert check_rels( + neo4j_session, + 'EC2SecurityGroup', + 'id', + 'EC2Instance', + 'id', + 'MEMBER_OF_EC2_SECURITY_GROUP', + rel_direction_right=False, + ) == { + ('sg-GROUP-ID', 'i-01'), + ('SOME_GROUP_ID_2', 'i-01'), + ('SOME_GROUP_ID_2', 'i-02'), + ('SOME_GROUP_ID_2', 'i-03'), + ('SOME_GROUP_ID_2', 'i-04'), + ('SOME_GROUP_ID_3', 'i-02'), + ('THIS_IS_A_SG_ID', 'i-03'), + ('THIS_IS_A_SG_ID', 'i-04'), + } + + # Assert EC2 Security Group to AWS account + assert check_rels( + neo4j_session, + 'EC2SecurityGroup', + 'id', + 'AWSAccount', + 'id', + 'RESOURCE', + rel_direction_right=False, + ) == { + ('SOME_GROUP_ID_2', '000000000000'), + ('SOME_GROUP_ID_3', '000000000000'), + ('THIS_IS_A_SG_ID', '000000000000'), + ('sg-GROUP-ID', '000000000000'), + } + + # Assert EC2 Subnet to EC2 Instance + assert check_rels( + neo4j_session, + 'EC2Subnet', + 'id', + 'EC2Instance', + 'id', + 'PART_OF_SUBNET', + rel_direction_right=False, + ) == { + ('SOME_SUBNET_1', 'i-02'), + ('SOME_SUBNET_1', 'i-03'), + ('SOME_SUBNET_1', 'i-04'), + } + + # Assert EC2 Subnet to AWS account + assert check_rels( + neo4j_session, + 'EC2Subnet', + 'id', + 'AWSAccount', + 'id', + 'RESOURCE', + rel_direction_right=False, + ) == { + ('SOME_SUBNET_1', '000000000000'), + } + + # Assert EBS Volume to EC2 Instance + assert check_rels( + neo4j_session, + 'EBSVolume', + 'id', + 'EC2Instance', + 'id', + 'ATTACHED_TO', + rel_direction_right=True, + ) == { + ('vol-0df', 'i-01'), + ('vol-03', 'i-02'), + ('vol-09', 'i-03'), + ('vol-04', 'i-04'), + } + + # Assert EBS Volume to AWS account + assert check_rels( + neo4j_session, + 'EBSVolume', + 'id', + 'AWSAccount', + 'id', + 'RESOURCE', + rel_direction_right=False, + ) == { + ('vol-03', '000000000000'), + ('vol-04', '000000000000'), + ('vol-09', '000000000000'), + ('vol-0df', '000000000000'), } - assert actual_nodes == expected_nodes -def test_ec2_iaminstanceprofiles(neo4j_session): +@patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) +def test_ec2_iaminstanceprofiles(mock_get_instances, neo4j_session): """ Ensure that EC2Instances are attached to the IAM Roles that they can assume due to their IAM instance profiles """ - neo4j_session.run( - """ - MERGE (aws:AWSAccount{id: $aws_account_id}) - ON CREATE SET aws.firstseen = timestamp() - SET aws.lastupdated = $aws_update_tag - """, - aws_account_id=TEST_ACCOUNT_ID, - aws_update_tag=TEST_UPDATE_TAG, - ) - - data_instances = tests.data.aws.ec2.instances.DESCRIBE_INSTANCES['Reservations'] + # Arrange + boto3_session = MagicMock() + create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) data_iam = tests.data.aws.iam.INSTACE['Roles'] - - cartography.intel.aws.ec2.instances.load_ec2_instances( - neo4j_session, data_instances, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, + sync_ec2_instances( + neo4j_session, + boto3_session, + [TEST_REGION], + TEST_ACCOUNT_ID, + TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) - cartography.intel.aws.iam.load_roles( neo4j_session, data_iam, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, ) - common_job_parameters = { "UPDATE_TAG": TEST_UPDATE_TAG, } + # Act run_analysis_job( 'aws_ec2_iaminstanceprofile.json', neo4j_session, common_job_parameters, ) - expected_nodes = { - ('arn:aws:iam::000000000000:role/SERVICE_NAME_2', 'i-02'), - ('arn:aws:iam::000000000000:role/ANOTHER_SERVICE_NAME', 'i-03'), - ('arn:aws:iam::000000000000:role/ANOTHER_SERVICE_NAME', 'i-04'), + # Assert + assert check_rels( + neo4j_session, + 'EC2Instance', + 'id', + 'AWSRole', + 'arn', + 'STS_ASSUMEROLE_ALLOW', + rel_direction_right=True, + ) == { + ('i-02', 'arn:aws:iam::000000000000:role/SERVICE_NAME_2'), + ('i-03', 'arn:aws:iam::000000000000:role/ANOTHER_SERVICE_NAME'), + ('i-04', 'arn:aws:iam::000000000000:role/ANOTHER_SERVICE_NAME'), } - - nodes = neo4j_session.run( - """ - MATCH (i:EC2Instance)-[:STS_ASSUMEROLE_ALLOW]->(r:AWSRole) return r.arn, i.id - """, - ) - actual_nodes = { - ( - n['r.arn'], - n['i.id'], - ) - for n in nodes - } - assert actual_nodes == expected_nodes diff --git a/tests/integration/cartography/intel/aws/ec2/test_ec2_volumes.py b/tests/integration/cartography/intel/aws/ec2/test_ec2_volumes.py index 858b8de90..705d2db3e 100644 --- a/tests/integration/cartography/intel/aws/ec2/test_ec2_volumes.py +++ b/tests/integration/cartography/intel/aws/ec2/test_ec2_volumes.py @@ -1,7 +1,13 @@ +from unittest.mock import MagicMock +from unittest.mock import patch + import cartography.intel.aws.ec2.instances import cartography.intel.aws.ec2.volumes import tests.data.aws.ec2.instances import tests.data.aws.ec2.volumes +from cartography.intel.aws.ec2.instances import sync_ec2_instances +from tests.data.aws.ec2.instances import DESCRIBE_INSTANCES + TEST_ACCOUNT_ID = '000000000000' TEST_REGION = 'eu-west-1' @@ -80,11 +86,17 @@ def test_load_volume_to_account_rels(neo4j_session): assert actual == expected -def test_load_volume_to_instance_rels(neo4j_session): +@patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) +def test_load_volume_to_instance_rels(mock_get_instances, neo4j_session): # Arrange: Load in ec2 instances first - instance_data = tests.data.aws.ec2.instances.DESCRIBE_INSTANCES['Reservations'] - cartography.intel.aws.ec2.instances.load_ec2_instances( - neo4j_session, instance_data, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, + boto3_session = MagicMock() + sync_ec2_instances( + neo4j_session, + boto3_session, + [TEST_REGION], + TEST_ACCOUNT_ID, + TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) # Prep the volume data raw_volumes = tests.data.aws.ec2.volumes.DESCRIBE_VOLUMES diff --git a/tests/integration/cartography/intel/aws/test_resourcegroupstaggingapi.py b/tests/integration/cartography/intel/aws/test_resourcegroupstaggingapi.py index 420b6909c..aa11115b3 100644 --- a/tests/integration/cartography/intel/aws/test_resourcegroupstaggingapi.py +++ b/tests/integration/cartography/intel/aws/test_resourcegroupstaggingapi.py @@ -1,9 +1,12 @@ import copy +from unittest.mock import MagicMock +from unittest.mock import patch import cartography.intel.aws.ec2 import cartography.intel.aws.resourcegroupstaggingapi as rgta -import tests.data.aws.ec2.instances import tests.data.aws.resourcegroupstaggingapi +from cartography.intel.aws.ec2.instances import sync_ec2_instances +from tests.data.aws.ec2.instances import DESCRIBE_INSTANCES from tests.integration.cartography.intel.aws.common import create_test_account @@ -14,19 +17,28 @@ def _ensure_local_neo4j_has_test_ec2_instance_data(neo4j_session): create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) - data = tests.data.aws.ec2.instances.DESCRIBE_INSTANCES['Reservations'] - cartography.intel.aws.ec2.instances.load_ec2_instances( - neo4j_session, data, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, + boto3_session = MagicMock() + sync_ec2_instances( + neo4j_session, + boto3_session, + [TEST_REGION], + TEST_ACCOUNT_ID, + TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) -def test_transform_and_load_ec2_tags(neo4j_session): +@patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) +def test_transform_and_load_ec2_tags(mock_get_instances, neo4j_session): """ Verify that (:EC2Instance)-[:TAGGED]->(:AWSTag) relationships work as expected. """ + # Arrange _ensure_local_neo4j_has_test_ec2_instance_data(neo4j_session) resource_type = 'ec2:instance' get_resources_response = copy.deepcopy(tests.data.aws.resourcegroupstaggingapi.GET_RESOURCES_RESPONSE) + + # Act rgta.transform_tags(get_resources_response, resource_type) rgta.load_tags( neo4j_session, @@ -36,10 +48,11 @@ def test_transform_and_load_ec2_tags(neo4j_session): TEST_ACCOUNT_ID, TEST_UPDATE_TAG, ) + + # Assert expected = { ('i-01', 'TestKey:TestValue'), } - # Fetch relationships result = neo4j_session.run( """ @@ -49,10 +62,9 @@ def test_transform_and_load_ec2_tags(neo4j_session): actual = { (r['n1.id'], r['n2.id']) for r in result } - assert actual == expected - # Test the cleanup removes old tags that are not attached to any resource + # Act: Test the cleanup removes old tags that are not attached to any resource new_update_tag = TEST_UPDATE_TAG + 1 new_response = copy.deepcopy(tests.data.aws.resourcegroupstaggingapi.GET_RESOURCES_RESPONSE_UPDATED) rgta.transform_tags(new_response, resource_type) @@ -66,11 +78,12 @@ def test_transform_and_load_ec2_tags(neo4j_session): ) neo4j_session.run('MATCH (i:EC2Instance) DETACH DELETE (i) RETURN COUNT(*) as TotalCompleted') rgta.cleanup(neo4j_session, {'AWS_ID': TEST_ACCOUNT_ID, 'UPDATE_TAG': new_update_tag}) + + # Assert expected = { ('TestKeyUpdated:TestValueUpdated'), } result = neo4j_session.run('MATCH (t:AWSTag) RETURN t.id') - print(result) actual = { (r['t.id']) for r in result } diff --git a/tests/integration/cartography/intel/aws/test_ssm.py b/tests/integration/cartography/intel/aws/test_ssm.py index 84dcf1e24..a27e1a3f4 100644 --- a/tests/integration/cartography/intel/aws/test_ssm.py +++ b/tests/integration/cartography/intel/aws/test_ssm.py @@ -1,7 +1,13 @@ +from unittest.mock import MagicMock +from unittest.mock import patch + import cartography.intel.aws.ec2.instances import cartography.intel.aws.ssm import tests.data.aws.ec2.instances import tests.data.aws.ssm +from cartography.intel.aws.ec2.instances import sync_ec2_instances +from tests.data.aws.ec2.instances import DESCRIBE_INSTANCES +from tests.integration.cartography.intel.aws.common import create_test_account TEST_ACCOUNT_ID = '000000000000' TEST_REGION = 'us-east-1' @@ -9,28 +15,21 @@ def _ensure_load_instances(neo4j_session): - data = tests.data.aws.ec2.instances.DESCRIBE_INSTANCES['Reservations'] - cartography.intel.aws.ec2.instances.load_ec2_instances( - neo4j_session, data, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, - ) - - -def _ensure_load_account(neo4j_session): - # Create Test AWSAccount - neo4j_session.run( - """ - MERGE (aws:AWSAccount{id: $aws_account_id}) - ON CREATE SET aws.firstseen = timestamp() - SET aws.lastupdated = $aws_update_tag - """, - aws_account_id=TEST_ACCOUNT_ID, - aws_update_tag=TEST_UPDATE_TAG, + boto3_session = MagicMock() + sync_ec2_instances( + neo4j_session, + boto3_session, + [TEST_REGION], + TEST_ACCOUNT_ID, + TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) -def test_load_instance_information(neo4j_session): +@patch.object(cartography.intel.aws.ec2.instances, 'get_ec2_instances', return_value=DESCRIBE_INSTANCES['Reservations']) +def test_load_instance_information(mock_get_instances, neo4j_session): # load account and instances, to be able to test relationships - _ensure_load_account(neo4j_session) + create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) _ensure_load_instances(neo4j_session) cartography.intel.aws.ssm.load_instance_information( @@ -86,10 +85,11 @@ def test_load_instance_information(neo4j_session): def test_load_instance_patches(neo4j_session): - # load account and instances, to be able to test relationships - _ensure_load_account(neo4j_session) + # Arrange: load account and instances, to be able to test relationships + create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) _ensure_load_instances(neo4j_session) + # Act cartography.intel.aws.ssm.load_instance_patches( neo4j_session, tests.data.aws.ssm.INSTANCE_PATCHES, @@ -98,11 +98,11 @@ def test_load_instance_patches(neo4j_session): TEST_UPDATE_TAG, ) + # Assert expected_nodes = { ("i-01-test.x86_64:0:4.2.46-34.amzn2", 1636404678, ("CVE-2022-0000", "CVE-2022-0001")), ("i-02-test.x86_64:0:4.2.46-34.amzn2", 1636404678, ("CVE-2022-0000", "CVE-2022-0001")), } - nodes = neo4j_session.run( """ MATCH (:AWSAccount{id: "000000000000"})-[:RESOURCE]->(n:SSMInstancePatch) @@ -121,6 +121,7 @@ def test_load_instance_patches(neo4j_session): } assert actual_nodes == expected_nodes + # Assert nodes = neo4j_session.run( """ MATCH (:EC2Instance{id: "i-01"})-[:HAS_PATCH]->(n:SSMInstancePatch) @@ -130,6 +131,7 @@ def test_load_instance_patches(neo4j_session): actual_nodes = {n["n.id"] for n in nodes} assert actual_nodes == {"i-01-test.x86_64:0:4.2.46-34.amzn2"} + # Assert nodes = neo4j_session.run( """ MATCH (:EC2Instance{id: "i-02"})-[:HAS_PATCH]->(n:SSMInstancePatch) From 940149c195bd6ee685e564fbcb033243e9e411d0 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 20 Mar 2023 15:55:11 -0700 Subject: [PATCH 2/3] Remove incorrect comment --- cartography/graph/statement.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cartography/graph/statement.py b/cartography/graph/statement.py index 4c2bf619f..2b0f1c1a6 100644 --- a/cartography/graph/statement.py +++ b/cartography/graph/statement.py @@ -117,7 +117,7 @@ def _run_iterative(self, session: neo4j.Session) -> None: """ self.parameters["LIMIT_SIZE"] = self.iterationsize - while True: # TODO if there are 0 things to clean up, then we will hang forever + while True: result: neo4j.Result = session.write_transaction(self._run_noniterative) # Exit if we have finished processing all items From 9487d3861e0010a1635f0224f361cc53f2c1b9be Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Thu, 30 Mar 2023 23:06:47 -0700 Subject: [PATCH 3/3] Use namedtuples, delete dead code --- cartography/intel/aws/ec2/instances.py | 87 +++++++++----------------- 1 file changed, 30 insertions(+), 57 deletions(-) diff --git a/cartography/intel/aws/ec2/instances.py b/cartography/intel/aws/ec2/instances.py index 320372b89..869585677 100644 --- a/cartography/intel/aws/ec2/instances.py +++ b/cartography/intel/aws/ec2/instances.py @@ -1,9 +1,9 @@ import logging import time +from collections import namedtuple from typing import Any from typing import Dict from typing import List -from typing import Tuple import boto3 import neo4j @@ -23,6 +23,18 @@ logger = logging.getLogger(__name__) +Ec2Data = namedtuple( + 'Ec2Data', [ + "reservation_list", + "instance_list", + "subnet_list", + "sg_list", + "keypair_list", + "network_interface_list", + "instance_ebs_volumes_list", + ], +) + @timeit @aws_handle_regions @@ -35,19 +47,7 @@ def get_ec2_instances(boto3_session: boto3.session.Session, region: str) -> List return reservations -def transform_ec2_instances( - reservations: List[Dict[str, Any]], - region: str, - current_aws_account_id: str, -) -> Tuple[ - List[Dict[str, Any]], - List[Dict[str, Any]], - List[Dict[str, Any]], - List[Dict[str, Any]], - List[Dict[str, Any]], - List[Dict[str, Any]], - List[Dict[str, Any]], -]: +def transform_ec2_instances(reservations: List[Dict[str, Any]], region: str, current_aws_account_id: str) -> Ec2Data: reservation_list = [] instance_list = [] subnet_list = [] @@ -144,33 +144,14 @@ def transform_ec2_instances( # 'SnapshotId': mapping['Ebs']['SnapshotId'], # TODO check on this }) - return ( - reservation_list, - instance_list, - subnet_list, - sg_list, - keypair_list, - network_interface_list, - instance_ebs_volumes_list, - ) - - -@timeit -def load_instances( - neo4j_session: neo4j.Session, - instance_list: List[Dict[str, Any]], - region: str, - current_aws_account_id: str, - update_tag: int, -) -> None: - logger.info(f"Loading {len(instance_list)} EC2 instances for region '{region}' into graph.") - load( - neo4j_session, - EC2InstanceSchema(), - instance_list, - lastupdated=update_tag, - Region=region, - AWS_ID=current_aws_account_id, + return Ec2Data( + reservation_list=reservation_list, + instance_list=instance_list, + subnet_list=subnet_list, + sg_list=sg_list, + keypair_list=keypair_list, + network_interface_list=network_interface_list, + instance_ebs_volumes_list=instance_ebs_volumes_list, ) @@ -346,26 +327,18 @@ def sync_ec2_instances( for region in regions: logger.info("Syncing EC2 instances for region '%s' in account '%s'.", region, current_aws_account_id) reservations = get_ec2_instances(boto3_session, region) - ( - reservation_list, - instance_list, - subnet_list, - sg_list, - key_pair_list, - nic_list, - ebs_volumes_list, - ) = transform_ec2_instances(reservations, region, current_aws_account_id) + ec2_data = transform_ec2_instances(reservations, region, current_aws_account_id) load_ec2_instance_data( neo4j_session, region, current_aws_account_id, update_tag, - reservation_list, - instance_list, - subnet_list, - sg_list, - key_pair_list, - nic_list, - ebs_volumes_list, + ec2_data.reservation_list, + ec2_data.instance_list, + ec2_data.subnet_list, + ec2_data.sg_list, + ec2_data.keypair_list, + ec2_data.network_interface_list, + ec2_data.instance_ebs_volumes_list, ) cleanup(neo4j_session, common_job_parameters)