Skip to content

Commit

Permalink
Initial commit for id standardization
Browse files Browse the repository at this point in the history
  • Loading branch information
achantavy committed Mar 21, 2023
1 parent e68b70f commit 0a444e9
Show file tree
Hide file tree
Showing 22 changed files with 366 additions and 334 deletions.
20 changes: 0 additions & 20 deletions cartography/data/jobs/cleanup/aws_import_volumes_cleanup.json

This file was deleted.

63 changes: 39 additions & 24 deletions cartography/intel/aws/ec2/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.intel.aws.ec2.util import get_botocore_config
from cartography.intel.aws.util.arns import build_arn
from cartography.models.aws.ec2.instances import EC2InstanceSchema
from cartography.models.aws.ec2.keypairs import EC2KeyPairSchema
from cartography.models.aws.ec2.networkinterfaces import EC2NetworkInterfaceSchema
Expand Down Expand Up @@ -65,10 +66,12 @@ def transform_ec2_instances(
})
for instance in reservation['Instances']:
instance_id = instance['InstanceId']
instance_arn = build_arn('ec2', current_aws_account_id, 'instance', instance_id, region)
launch_time = instance.get("LaunchTime")
launch_time_unix = str(time.mktime(launch_time.timetuple())) if launch_time else None
instance_list.append(
{
'Arn': instance_arn,
'InstanceId': instance_id,
'ReservationId': reservation_id,
'PublicDnsName': instance.get("PublicDnsName"),
Expand Down Expand Up @@ -97,51 +100,63 @@ def transform_ec2_instances(
if subnet_id:
subnet_list.append(
{
'Arn': build_arn('ec2', current_aws_account_id, 'subnet', subnet_id, region),
'SubnetId': subnet_id,
'InstanceId': instance_id,
'InstanceArn': instance_arn,
},
)

if instance.get("KeyName"):
key_name = instance["KeyName"]
key_pair_arn = f'arn:aws:ec2:{region}:{current_aws_account_id}:key-pair/{key_name}'
keypair_list.append({
'KeyPairArn': key_pair_arn,
'KeyName': key_name,
'InstanceId': instance_id,
})

if instance.get("SecurityGroups"):
for group in instance["SecurityGroups"]:
sg_list.append(
{
'GroupId': group['GroupId'],
'InstanceId': instance_id,
},
)

for network_interface in instance['NetworkInterfaces']:
for security_group in network_interface['Groups']:
network_interface_id = network_interface['NetworkInterfaceId']
network_interface_list.append({
'NetworkInterfaceId': network_interface['NetworkInterfaceId'],
'Arn': build_arn(
'ec2',
current_aws_account_id,
'network-interface',
network_interface_id,
region,
),
'NetworkInterfaceId': network_interface_id,
'Status': network_interface['Status'],
'MacAddress': network_interface['MacAddress'],
'Description': network_interface['Description'],
'PrivateDnsName': network_interface['PrivateDnsName'],
'PrivateIpAddress': network_interface['PrivateIpAddress'],
'InstanceId': instance_id,
'InstanceArn': instance_arn,
# Match using the subnet_id and not the arn so that we allow for subnet_id to be None.
'SubnetId': subnet_id,
'GroupId': security_group['GroupId'],
})

if instance.get("SecurityGroups"):
for group in instance["SecurityGroups"]:
group_id = group['GroupId']
sg_list.append(
{
'Arn': build_arn('ec2', current_aws_account_id, 'security-group', group_id, region),
'GroupId': group_id,
'InstanceArn': instance_arn,
},
)

if instance.get("KeyName"):
key_name = instance["KeyName"]
keypair_list.append({
'Arn': build_arn('ec2', current_aws_account_id, 'key-pair', key_name, region),
'KeyName': key_name,
'InstanceArn': instance_arn,
})

if 'BlockDeviceMappings' in instance and len(instance['BlockDeviceMappings']) > 0:
for mapping in instance['BlockDeviceMappings']:
if 'VolumeId' in mapping['Ebs']:
volume_id = mapping['Ebs']['VolumeId']
instance_ebs_volumes_list.append({
'InstanceId': instance_id,
'VolumeId': mapping['Ebs']['VolumeId'],
'Arn': build_arn('ec2', current_aws_account_id, 'volume', volume_id, region),
'InstanceArn': instance_arn,
'VolumeId': volume_id,
'DeleteOnTermination': mapping['Ebs']['DeleteOnTermination'],
# 'SnapshotId': mapping['Ebs']['SnapshotId'], # TODO check on this
})

return (
Expand Down
127 changes: 56 additions & 71 deletions cartography/intel/aws/ec2/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@
import boto3
import neo4j

from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.intel.aws.util.arns import build_arn
from cartography.models.aws.ec2.volumes import EBSVolumeSchema
from cartography.util import aws_handle_regions
from cartography.util import run_cleanup_job
from cartography.util import timeit

logger = logging.getLogger(__name__)


@timeit
@aws_handle_regions
def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict[str, Any]]:
client = boto3_session.client('ec2', region_name=region)
paginator = client.get_paginator('describe_volumes')
volumes: List[Dict] = []
Expand All @@ -25,94 +28,76 @@ def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]


def transform_volumes(volumes: List[Dict[str, Any]], region: str, current_aws_account_id: str) -> List[Dict[str, Any]]:
result = []
for volume in volumes:
volume['VolumeArn'] = f"arn:aws:ec2:{region}:{current_aws_account_id}:volume/{volume['VolumeId']}"
volume['CreateTime'] = str(volume['CreateTime'])
return volumes
attachments = volume.get('Attachments', [])
active_attachments = [a for a in attachments if a['State'] == 'attached']

volume_id = volume['VolumeId']
raw_vol = ({
'Arn': build_arn('ec2', current_aws_account_id, 'volume', volume_id, region),
'AvailabilityZone': volume['AvailabilityZone'],
'CreateTime': volume['CreateTime'],
'Encrypted': volume['Encrypted'],
'Size': volume['Size'],
'State': volume['State'],
'OutpostArn': volume['OutpostArn'],
'SnapshotId': volume['SnapshotId'],
'Iops': volume['Iops'],
'FastRestored': volume['FastRestored'],
'MultiAttachEnabled': volume['MultiAttachEnabled'],
'VolumeType': volume['VolumeType'],
'VolumeId': volume_id,
'KmsKeyId': volume['KmsKeyId'],
})

if not active_attachments:
result.append(raw_vol)
continue

for attachment in active_attachments:
vol_with_attachment = raw_vol.copy()
vol_with_attachment['InstanceId'] = attachment['InstanceId']
result.append(vol_with_attachment)

return result


@timeit
def load_volumes(
neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
neo4j_session: neo4j.Session,
ebs_data: List[Dict[str, Any]],
region: str,
current_aws_account_id: str,
update_tag: int,
) -> None:
ingest_volumes = """
UNWIND $volumes_list as volume
MERGE (vol:EBSVolume{id: volume.VolumeId})
ON CREATE SET vol.firstseen = timestamp()
SET vol.arn = volume.VolumeArn,
vol.lastupdated = $update_tag,
vol.availabilityzone = volume.AvailabilityZone,
vol.createtime = volume.CreateTime,
vol.encrypted = volume.Encrypted,
vol.size = volume.Size,
vol.state = volume.State,
vol.outpostarn = volume.OutpostArn,
vol.snapshotid = volume.SnapshotId,
vol.iops = volume.Iops,
vol.fastrestored = volume.FastRestored,
vol.multiattachenabled = volume.MultiAttachEnabled,
vol.type = volume.VolumeType,
vol.kmskeyid = volume.KmsKeyId,
vol.region=$Region
WITH vol
MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
MERGE (aa)-[r:RESOURCE]->(vol)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $update_tag
"""

neo4j_session.run(
ingest_volumes,
volumes_list=data,
AWS_ACCOUNT_ID=current_aws_account_id,
load(
neo4j_session,
EBSVolumeSchema(),
ebs_data,
Region=region,
update_tag=update_tag,
AWS_ID=current_aws_account_id,
lastupdated=update_tag,
)


def load_volume_relationships(
neo4j_session: neo4j.Session,
volumes: List[Dict[str, Any]],
aws_update_tag: int,
) -> None:
add_relationship_query = """
MATCH (volume:EBSVolume{arn: $VolumeArn})
WITH volume
MATCH (instance:EC2Instance{instanceid: $InstanceId})
MERGE (volume)-[r:ATTACHED_TO_EC2_INSTANCE]->(instance)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
"""
for volume in volumes:
for attachment in volume.get('Attachments', []):
if attachment['State'] != 'attached':
continue
neo4j_session.run(
add_relationship_query,
VolumeArn=volume['VolumeArn'],
InstanceId=attachment['InstanceId'],
aws_update_tag=aws_update_tag,
)


@timeit
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
run_cleanup_job(
'aws_import_volumes_cleanup.json',
neo4j_session,
common_job_parameters,
)
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None:
GraphJob.from_node_schema(EBSVolumeSchema(), common_job_parameters).run(neo4j_session)


@timeit
def sync_ebs_volumes(
neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str],
current_aws_account_id: str, update_tag: int, common_job_parameters: Dict,
neo4j_session: neo4j.Session,
boto3_session: boto3.session.Session,
regions: List[str],
current_aws_account_id: str,
update_tag: int,
common_job_parameters: Dict[str, Any],
) -> None:
for region in regions:
logger.debug("Syncing volumes for region '%s' in account '%s'.", region, current_aws_account_id)
data = get_volumes(boto3_session, region)
transformed_data = transform_volumes(data, region, current_aws_account_id)
load_volumes(neo4j_session, transformed_data, region, current_aws_account_id, update_tag)
load_volume_relationships(neo4j_session, transformed_data, update_tag)
cleanup_volumes(neo4j_session, common_job_parameters)
10 changes: 5 additions & 5 deletions cartography/intel/aws/resourcegroupstaggingapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,17 @@ def get_short_id_from_lb2_arn(alb_arn: str) -> str:
TAG_RESOURCE_TYPE_MAPPINGS: Dict = {
'autoscaling:autoScalingGroup': {'label': 'AutoScalingGroup', 'property': 'arn'},
'dynamodb:table': {'label': 'DynamoDBTable', 'property': 'id'},
'ec2:instance': {'label': 'EC2Instance', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:instance': {'label': 'EC2Instance', 'property': 'id'},
'ec2:internet-gateway': {'label': 'AWSInternetGateway', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:key-pair': {'label': 'EC2KeyPair', 'property': 'id'},
'ec2:network-interface': {'label': 'NetworkInterface', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:network-interface': {'label': 'NetworkInterface', 'property': 'id'},
'ecr:repository': {'label': 'ECRRepository', 'property': 'id'},
'ec2:security-group': {'label': 'EC2SecurityGroup', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:subnet': {'label': 'EC2Subnet', 'property': 'subnetid', 'id_func': get_short_id_from_ec2_arn},
'ec2:security-group': {'label': 'EC2SecurityGroup', 'property': 'id'},
'ec2:subnet': {'label': 'EC2Subnet', 'property': 'id'},
'ec2:transit-gateway': {'label': 'AWSTransitGateway', 'property': 'id'},
'ec2:transit-gateway-attachment': {'label': 'AWSTransitGatewayAttachment', 'property': 'id'},
'ec2:vpc': {'label': 'AWSVpc', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:volume': {'label': 'EBSVolume', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ec2:volume': {'label': 'EBSVolume', 'property': 'id'},
'ec2:elastic-ip-address': {'label': 'ElasticIPAddress', 'property': 'id', 'id_func': get_short_id_from_ec2_arn},
'ecs:cluster': {'label': 'ECSCluster', 'property': 'id'},
'ecs:container': {'label': 'ECSContainer', 'property': 'id'},
Expand Down
8 changes: 4 additions & 4 deletions cartography/intel/aws/ssm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ def get_instance_ids(neo4j_session: neo4j.Session, region: str, current_aws_acco
get_instances_query = """
MATCH (:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(i:EC2Instance)
WHERE i.region = $Region
RETURN i.id
RETURN i.instanceid
"""
results = neo4j_session.run(get_instances_query, AWS_ACCOUNT_ID=current_aws_account_id, Region=region)
instance_ids = []
for r in results:
instance_ids.append(r['i.id'])
instance_ids.append(r['i.instanceid'])
return instance_ids


Expand Down Expand Up @@ -105,7 +105,7 @@ def load_instance_information(
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
WITH i
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{id: i.instance_id})
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{instanceid: i.instance_id})
MERGE (ec2_instance)-[r2:HAS_INFORMATION]->(i)
ON CREATE SET r2.firstseen = timestamp()
SET r2.lastupdated = $aws_update_tag
Expand Down Expand Up @@ -153,7 +153,7 @@ def load_instance_patches(
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
WITH p
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{id: p.instance_id})
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ec2_instance:EC2Instance{instanceid: p.instance_id})
MERGE (ec2_instance)-[r2:HAS_PATCH]->(p)
ON CREATE SET r2.firstseen = timestamp()
SET r2.lastupdated = $aws_update_tag
Expand Down
18 changes: 18 additions & 0 deletions cartography/intel/aws/util/arns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Optional


def build_arn(
resource: str,
account: str,
typename: str,
name: str,
region: Optional[str] = None,
partition: Optional[str] = None,
) -> str:
if not partition:
# TODO: support partitions from others. Please file an issue on this if needed, would love to hear from you
partition = 'aws'
if not region:
# Some resources are present in all regions, e.g. IAM policies
region = ""
return f"arn:{partition}:{resource}:{region}:{account}:{typename}/{name}"
6 changes: 3 additions & 3 deletions cartography/models/aws/ec2/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

@dataclass(frozen=True)
class EC2InstanceNodeProperties(CartographyNodeProperties):
# TODO arn: PropertyRef = PropertyRef('Arn', extra_index=True)
id: PropertyRef = PropertyRef('InstanceId')
arn: PropertyRef = PropertyRef('Arn', extra_index=True)
id: PropertyRef = PropertyRef('Arn')
instanceid: PropertyRef = PropertyRef('InstanceId', extra_index=True)
publicdnsname: PropertyRef = PropertyRef('PublicDnsName', extra_index=True)
privateipaddress: PropertyRef = PropertyRef('PrivateIpAddress')
Expand Down Expand Up @@ -64,7 +64,7 @@ class EC2InstanceToEC2ReservationRelProperties(CartographyRelProperties):
class EC2InstanceToEC2Reservation(CartographyRelSchema):
target_node_label: str = 'EC2Reservation'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'reservationid': PropertyRef('ReservationId')},
{'id': PropertyRef('ReservationId')},
)
direction: LinkDirection = LinkDirection.OUTWARD
rel_label: str = "MEMBER_OF_EC2_RESERVATION"
Expand Down
Loading

0 comments on commit 0a444e9

Please sign in to comment.