From 97f64dbcfcfbe46737b834e0e7613d81d2c0fbd2 Mon Sep 17 00:00:00 2001 From: Phi Xu <66100237+phishoes@users.noreply.github.com> Date: Thu, 13 Apr 2023 14:15:29 -0700 Subject: [PATCH] Refactor AWS DynamoDB to use new data model (#1153) Refactors DynamoDB sync to use the cartography data model. --- cartography/data/indexes.cypher | 5 - .../aws_import_dynamodb_tables_cleanup.json | 20 -- cartography/intel/aws/dynamodb.py | 123 ++++++----- cartography/models/aws/dynamodb/gsi.py | 68 ++++++ cartography/models/aws/dynamodb/tables.py | 47 +++++ .../cartography/intel/aws/test_dynamodb.py | 196 ++++++++++++++++-- 6 files changed, 354 insertions(+), 105 deletions(-) delete mode 100644 cartography/data/jobs/cleanup/aws_import_dynamodb_tables_cleanup.json create mode 100644 cartography/models/aws/dynamodb/gsi.py create mode 100644 cartography/models/aws/dynamodb/tables.py diff --git a/cartography/data/indexes.cypher b/cartography/data/indexes.cypher index 0227ce9fc..072599f71 100644 --- a/cartography/data/indexes.cypher +++ b/cartography/data/indexes.cypher @@ -90,11 +90,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:DODroplet) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:DODroplet) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:DOProject) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:DOProject) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:DynamoDBGlobalSecondaryIndex) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:DynamoDBGlobalSecondaryIndex) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:DynamoDBTable) ON (n.arn); -CREATE INDEX IF NOT EXISTS FOR (n:DynamoDBTable) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:DynamoDBTable) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:EBSSnapshot) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:EBSSnapshot) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:EC2KeyPair) ON (n.keyfingerprint); diff --git a/cartography/data/jobs/cleanup/aws_import_dynamodb_tables_cleanup.json b/cartography/data/jobs/cleanup/aws_import_dynamodb_tables_cleanup.json deleted file mode 100644 index d112a0cd8..000000000 --- a/cartography/data/jobs/cleanup/aws_import_dynamodb_tables_cleanup.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "statements": [ - { - "query": "MATCH (n:DynamoDBTable)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (g:DynamoDBGlobalSecondaryIndex)<-[:GLOBAL_SECONDARY_INDEX]-(:DynamoDBTable)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE g.lastupdated <> $UPDATE_TAG WITH g LIMIT $LIMIT_SIZE DETACH DELETE (g)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:DynamoDBGlobalSecondaryIndex)<-[r:GLOBAL_SECONDARY_INDEX]-(:DynamoDBTable)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - } - ], - "name": "cleanup DynamoDBTable" -} diff --git a/cartography/intel/aws/dynamodb.py b/cartography/intel/aws/dynamodb.py index 55cc65971..05f39caf9 100644 --- a/cartography/intel/aws/dynamodb.py +++ b/cartography/intel/aws/dynamodb.py @@ -1,14 +1,19 @@ import logging +from typing import Any from typing import Dict from typing import List import boto3 import neo4j +from cartography.client.core.tx import load +from cartography.graph.job import GraphJob +from cartography.intel.aws.ec2.util import get_botocore_config +from cartography.models.aws.dynamodb.gsi import DynamoDBGSISchema +from cartography.models.aws.dynamodb.tables import DynamoDBTableSchema from cartography.stats import get_stats_client from cartography.util import aws_handle_regions from cartography.util import merge_module_sync_metadata -from cartography.util import run_cleanup_job from cartography.util import timeit logger = logging.getLogger(__name__) @@ -18,7 +23,7 @@ @timeit @aws_handle_regions def get_dynamodb_tables(boto3_session: boto3.session.Session, region: str) -> List[Dict]: - client = boto3_session.client('dynamodb', region_name=region) + client = boto3_session.client('dynamodb', region_name=region, config=get_botocore_config()) paginator = client.get_paginator('list_tables') dynamodb_tables = [] for page in paginator.paginate(): @@ -27,77 +32,69 @@ def get_dynamodb_tables(boto3_session: boto3.session.Session, region: str) -> Li return dynamodb_tables +@timeit +def transform_dynamodb_tables(dynamodb_tables: List, region: str) -> Any: + ddb_table_data: List[Dict[str, Any]] = [] + ddb_gsi_data: List[Dict[str, Any]] = [] + + for table in dynamodb_tables: + ddb_table_data.append({ + 'Arn': table['Table']['TableArn'], + 'TableName': table['Table']['TableName'], + 'Region': region, + 'Rows': table['Table']['ItemCount'], + 'Size': table['Table']['TableSizeBytes'], + 'ProvisionedThroughputReadCapacityUnits': table['Table']['ProvisionedThroughput']['ReadCapacityUnits'], + 'ProvisionedThroughputWriteCapacityUnits': table['Table']['ProvisionedThroughput']['WriteCapacityUnits'], + }) + for gsi in table['Table'].get('GlobalSecondaryIndexes', []): + ddb_gsi_data.append({ + 'Arn': gsi['IndexArn'], + 'TableArn': table['Table']['TableArn'], + 'Region': region, + 'ProvisionedThroughputReadCapacityUnits': gsi['ProvisionedThroughput']['ReadCapacityUnits'], + 'ProvisionedThroughputWriteCapacityUnits': gsi['ProvisionedThroughput']['WriteCapacityUnits'], + 'GSIName': gsi['IndexName'], + }) + return ddb_table_data, ddb_gsi_data + + @timeit def load_dynamodb_tables( - neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, + neo4j_session: neo4j.Session, tables_data: List[Dict[str, Any]], region: str, current_aws_account_id: str, aws_update_tag: int, ) -> None: - ingest_table = """ - MERGE (table:DynamoDBTable{id: $Arn}) - ON CREATE SET table.firstseen = timestamp(), table.arn = $Arn, table.name = $TableName, - table.region = $Region - SET table.lastupdated = $aws_update_tag, table.rows = $Rows, table.size = $Size, - table.provisioned_throughput_read_capacity_units = $ProvisionedThroughputReadCapacityUnits, - table.provisioned_throughput_write_capacity_units = $ProvisionedThroughputWriteCapacityUnits - WITH table - MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (owner)-[r:RESOURCE]->(table) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $aws_update_tag - """ - - for table in data: - neo4j_session.run( - ingest_table, - Arn=table['Table']['TableArn'], - Region=region, - ProvisionedThroughputReadCapacityUnits=table['Table']['ProvisionedThroughput']['ReadCapacityUnits'], - ProvisionedThroughputWriteCapacityUnits=table['Table']['ProvisionedThroughput']['WriteCapacityUnits'], - Size=table['Table']['TableSizeBytes'], - TableName=table['Table']['TableName'], - Rows=table['Table']['ItemCount'], - AWS_ACCOUNT_ID=current_aws_account_id, - aws_update_tag=aws_update_tag, - ) - load_gsi(neo4j_session, table, region, current_aws_account_id, aws_update_tag) + logger.info(f"Loading Dynamo DB tables {len(tables_data)} for region '{region}' into graph.") + load( + neo4j_session, + DynamoDBTableSchema(), + tables_data, + lastupdated=aws_update_tag, + Region=region, + AWS_ID=current_aws_account_id, + ) @timeit -def load_gsi( - neo4j_session: neo4j.Session, table: Dict, region: str, current_aws_account_id: str, +def load_dynamodb_gsi( + neo4j_session: neo4j.Session, gsi_data: List[Dict[str, Any]], region: str, current_aws_account_id: str, aws_update_tag: int, ) -> None: - ingest_gsi = """ - MERGE (gsi:DynamoDBGlobalSecondaryIndex{id: $Arn}) - ON CREATE SET gsi.firstseen = timestamp(), gsi.arn = $Arn, gsi.name = $GSIName, - gsi.region = $Region - SET gsi.lastupdated = $aws_update_tag, - gsi.provisioned_throughput_read_capacity_units = $ProvisionedThroughputReadCapacityUnits, - gsi.provisioned_throughput_write_capacity_units = $ProvisionedThroughputWriteCapacityUnits - WITH gsi - MATCH (table:DynamoDBTable{arn: $TableArn}) - MERGE (table)-[r:GLOBAL_SECONDARY_INDEX]->(gsi) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $aws_update_tag - """ - - for gsi in table['Table'].get('GlobalSecondaryIndexes', []): - neo4j_session.run( - ingest_gsi, - TableArn=table['Table']['TableArn'], - Arn=gsi['IndexArn'], - Region=region, - ProvisionedThroughputReadCapacityUnits=gsi['ProvisionedThroughput']['ReadCapacityUnits'], - ProvisionedThroughputWriteCapacityUnits=gsi['ProvisionedThroughput']['WriteCapacityUnits'], - GSIName=gsi['IndexName'], - AWS_ACCOUNT_ID=current_aws_account_id, - aws_update_tag=aws_update_tag, - ) + logger.info(f"Loading Dynamo DB GSI {len(gsi_data)} for region '{region}' into graph.") + load( + neo4j_session, + DynamoDBGSISchema(), + gsi_data, + lastupdated=aws_update_tag, + Region=region, + AWS_ID=current_aws_account_id, + ) @timeit def cleanup_dynamodb_tables(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: - run_cleanup_job('aws_import_dynamodb_tables_cleanup.json', neo4j_session, common_job_parameters) + GraphJob.from_node_schema(DynamoDBTableSchema(), common_job_parameters).run(neo4j_session) + GraphJob.from_node_schema(DynamoDBGSISchema(), common_job_parameters).run(neo4j_session) @timeit @@ -107,8 +104,10 @@ def sync_dynamodb_tables( ) -> None: for region in regions: logger.info("Syncing DynamoDB for region in '%s' in account '%s'.", region, current_aws_account_id) - data = get_dynamodb_tables(boto3_session, region) - load_dynamodb_tables(neo4j_session, data, region, current_aws_account_id, aws_update_tag) + dynamodb_tables = get_dynamodb_tables(boto3_session, region) + ddb_table_data, ddb_gsi_data = transform_dynamodb_tables(dynamodb_tables, region) + load_dynamodb_tables(neo4j_session, ddb_table_data, region, current_aws_account_id, aws_update_tag) + load_dynamodb_gsi(neo4j_session, ddb_gsi_data, region, current_aws_account_id, aws_update_tag) cleanup_dynamodb_tables(neo4j_session, common_job_parameters) diff --git a/cartography/models/aws/dynamodb/gsi.py b/cartography/models/aws/dynamodb/gsi.py new file mode 100644 index 000000000..d498dfc8a --- /dev/null +++ b/cartography/models/aws/dynamodb/gsi.py @@ -0,0 +1,68 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class DynamoDBGSINodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Arn') + arn: PropertyRef = PropertyRef('Arn') + name: PropertyRef = PropertyRef('GSIName') + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + provisioned_throughput_read_capacity_units: PropertyRef = PropertyRef('ProvisionedThroughputReadCapacityUnits') + provisioned_throughput_write_capacity_units: PropertyRef = PropertyRef('ProvisionedThroughputWriteCapacityUnits') + + +@dataclass(frozen=True) +class DynamoDBGSIToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +# (:DynamoDBGlobalSecondaryIndex)<-[:RESOURCE]-(:AWSAccount) +class DynamoDBGSIToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: DynamoDBGSIToAwsAccountRelProperties = DynamoDBGSIToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class DynamoDBGSIToDynamoDBTableRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +# (:DynamoDBGlobalSecondaryIndex)<-[:GLOBAL_SECONDARY_INDEX]-(:DynamoDBTable) +class DynamoDBGSIToDynamoDBTable(CartographyRelSchema): + target_node_label: str = 'DynamoDBTable' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'arn': PropertyRef('TableArn')}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "GLOBAL_SECONDARY_INDEX" + properties: DynamoDBGSIToDynamoDBTableRelProperties = DynamoDBGSIToDynamoDBTableRelProperties() + + +@dataclass(frozen=True) +class DynamoDBGSISchema(CartographyNodeSchema): + label: str = 'DynamoDBGlobalSecondaryIndex' + properties: DynamoDBGSINodeProperties = DynamoDBGSINodeProperties() + sub_resource_relationship: DynamoDBGSIToAWSAccount = DynamoDBGSIToAWSAccount() + other_relationships: OtherRelationships = OtherRelationships( + [ + DynamoDBGSIToDynamoDBTable(), + ], + ) diff --git a/cartography/models/aws/dynamodb/tables.py b/cartography/models/aws/dynamodb/tables.py new file mode 100644 index 000000000..85c6cb1bf --- /dev/null +++ b/cartography/models/aws/dynamodb/tables.py @@ -0,0 +1,47 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class DynamoDBTableNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Arn') + arn: PropertyRef = PropertyRef('Arn') + name: PropertyRef = PropertyRef('TableName') + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + rows: PropertyRef = PropertyRef('Rows') + size: PropertyRef = PropertyRef('Size') + provisioned_throughput_read_capacity_units: PropertyRef = PropertyRef('ProvisionedThroughputReadCapacityUnits') + provisioned_throughput_write_capacity_units: PropertyRef = PropertyRef('ProvisionedThroughputWriteCapacityUnits') + + +@dataclass(frozen=True) +class DynamoDBTableToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +# (:DynamoDBTable)<-[:RESOURCE]-(:AWSAccount) +class DynamoDBTableToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: DynamoDBTableToAwsAccountRelProperties = DynamoDBTableToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class DynamoDBTableSchema(CartographyNodeSchema): + label: str = 'DynamoDBTable' + properties: DynamoDBTableNodeProperties = DynamoDBTableNodeProperties() + sub_resource_relationship: DynamoDBTableToAWSAccount = DynamoDBTableToAWSAccount() diff --git a/tests/integration/cartography/intel/aws/test_dynamodb.py b/tests/integration/cartography/intel/aws/test_dynamodb.py index 6356b7456..efd2f84bb 100644 --- a/tests/integration/cartography/intel/aws/test_dynamodb.py +++ b/tests/integration/cartography/intel/aws/test_dynamodb.py @@ -1,33 +1,193 @@ +from unittest.mock import MagicMock +from unittest.mock import patch + import cartography.intel.aws.dynamodb import tests.data.aws.dynamodb +from tests.integration.cartography.intel.aws.common import create_test_account +from tests.integration.util import check_nodes +from tests.integration.util import check_rels TEST_ACCOUNT_ID = '000000000000' TEST_REGION = 'us-east-1' TEST_UPDATE_TAG = 123456789 -def test_load_dynamodb(neo4j_session): - data = tests.data.aws.dynamodb.LIST_DYNAMODB_TABLES['Tables'] +@patch.object( + cartography.intel.aws.dynamodb, 'get_dynamodb_tables', + return_value=tests.data.aws.dynamodb.LIST_DYNAMODB_TABLES['Tables'], +) +def test_load_dynamodb(mock_get_instances, neo4j_session): + """ + Ensure that instances actually get loaded and have their key fields + """ + # Arrange + boto3_session = MagicMock() + create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) - cartography.intel.aws.dynamodb.load_dynamodb_tables( + # Act + cartography.intel.aws.dynamodb.sync_dynamodb_tables( neo4j_session, - data, + boto3_session, TEST_REGION, TEST_ACCOUNT_ID, TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) - expected_rows = 1000000 - expected_nodes = { - ("arn:aws:dynamodb:us-east-1:000000000000:table/example-table", expected_rows), - ("arn:aws:dynamodb:us-east-1:000000000000:table/sample-table", expected_rows), - ("arn:aws:dynamodb:us-east-1:000000000000:table/model-table", expected_rows), - ("arn:aws:dynamodb:us-east-1:000000000000:table/basic-table", expected_rows), - } - - nodes = neo4j_session.run( - """ - MATCH (d:DynamoDBTable) return d.arn, d.rows - """, + + # Assert ddb table nodes exist + assert check_nodes(neo4j_session, 'DynamoDBTable', ['id', 'rows']) == { + ("arn:aws:dynamodb:us-east-1:000000000000:table/example-table", 1000000), + ("arn:aws:dynamodb:us-east-1:000000000000:table/sample-table", 1000000), + ("arn:aws:dynamodb:us-east-1:000000000000:table/model-table", 1000000), + ("arn:aws:dynamodb:us-east-1:000000000000:table/basic-table", 1000000), + } + + # Assert ddb gsi nodes exist + assert check_nodes(neo4j_session, 'DynamoDBGlobalSecondaryIndex', ['id']) == { + ('arn:aws:dynamodb:us-east-1:table/example-table/index/sample_2-index',), + ('arn:aws:dynamodb:us-east-1:table/model-table/index/sample_2-index',), + ('arn:aws:dynamodb:us-east-1:table/model-table/index/sample_3-index',), + ('arn:aws:dynamodb:us-east-1:table/model-table/index/sample_1-index',), + ('arn:aws:dynamodb:us-east-1:table/example-table/index/sample_1-index',), + ('arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_2-index',), + ('arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_1-index',), + ('arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_3-index',), + } + + # Assert AWSAccount -> DynamoDBTable + assert check_rels( + neo4j_session, + 'DynamoDBTable', + 'id', + 'AWSAccount', + 'id', + 'RESOURCE', + rel_direction_right=False, + ) == { + ('arn:aws:dynamodb:us-east-1:000000000000:table/example-table', '000000000000'), + ('arn:aws:dynamodb:us-east-1:000000000000:table/sample-table', '000000000000'), + ('arn:aws:dynamodb:us-east-1:000000000000:table/model-table', '000000000000'), + ('arn:aws:dynamodb:us-east-1:000000000000:table/basic-table', '000000000000'), + } + + # Assert AWSAccount -> DynamoDBGlobalSecondaryIndex + assert check_rels( + neo4j_session, + 'AWSAccount', + 'id', + 'DynamoDBGlobalSecondaryIndex', + 'id', + 'RESOURCE', + rel_direction_right=True, + ) == { + ( + '000000000000', + 'arn:aws:dynamodb:us-east-1:table/example-table/index/sample_1-index', + ), + ( + '000000000000', + 'arn:aws:dynamodb:us-east-1:table/example-table/index/sample_2-index', + ), + ( + '000000000000', + 'arn:aws:dynamodb:us-east-1:table/model-table/index/sample_1-index', + ), + ( + '000000000000', + 'arn:aws:dynamodb:us-east-1:table/model-table/index/sample_2-index', + ), + ( + '000000000000', + 'arn:aws:dynamodb:us-east-1:table/model-table/index/sample_3-index', + ), + ( + '000000000000', + 'arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_1-index', + ), + ( + '000000000000', + 'arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_2-index', + ), + ( + '000000000000', + 'arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_3-index', + ), + } + + # Assert DynamoDBTable -> DynamoDBGlobalSecondaryIndex + assert check_rels( + neo4j_session, + 'DynamoDBTable', + 'id', + 'DynamoDBGlobalSecondaryIndex', + 'id', + 'GLOBAL_SECONDARY_INDEX', + rel_direction_right=True, + ) == { + ( + 'arn:aws:dynamodb:us-east-1:000000000000:table/example-table', + 'arn:aws:dynamodb:us-east-1:table/example-table/index/sample_1-index', + ), + ( + 'arn:aws:dynamodb:us-east-1:000000000000:table/example-table', + 'arn:aws:dynamodb:us-east-1:table/example-table/index/sample_2-index', + ), + ( + 'arn:aws:dynamodb:us-east-1:000000000000:table/model-table', + 'arn:aws:dynamodb:us-east-1:table/model-table/index/sample_1-index', + ), + ( + 'arn:aws:dynamodb:us-east-1:000000000000:table/model-table', + 'arn:aws:dynamodb:us-east-1:table/model-table/index/sample_2-index', + ), + ( + 'arn:aws:dynamodb:us-east-1:000000000000:table/model-table', + 'arn:aws:dynamodb:us-east-1:table/model-table/index/sample_3-index', + ), + ( + 'arn:aws:dynamodb:us-east-1:000000000000:table/sample-table', + 'arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_1-index', + ), + ( + 'arn:aws:dynamodb:us-east-1:000000000000:table/sample-table', + 'arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_2-index', + ), + ( + 'arn:aws:dynamodb:us-east-1:000000000000:table/sample-table', + 'arn:aws:dynamodb:us-east-1:table/sample-table/index/sample_3-index', + ), + } + + # Arrange: load in an unrelated EC2 instance. This should not be affected by the EMR module's cleanup job. + neo4j_session.run( + ''' + MERGE (i:EC2Instance{id:1234, lastupdated: $lastupdated})<-[r:RESOURCE]-(:AWSAccount{id: $aws_account_id}) + SET r.lastupdated = $lastupdated + ''', + aws_account_id=TEST_ACCOUNT_ID, + lastupdated=TEST_UPDATE_TAG, ) - actual_nodes = {(n['d.arn'], n['d.rows']) for n in nodes} - assert actual_nodes == expected_nodes + + # [Pre-test] Assert that the unrelated EC2 instance exists + assert check_rels(neo4j_session, 'AWSAccount', 'id', 'EC2Instance', 'id', 'RESOURCE') == { + (TEST_ACCOUNT_ID, 1234), + } + + # Act: run the cleanup job + common_job_parameters = { + 'UPDATE_TAG': TEST_UPDATE_TAG + 1, # Simulate a new sync run finished so the old update tag is obsolete now + 'AWS_ID': TEST_ACCOUNT_ID, + # Add in extra params that may have been added by other modules. + # Expectation: These should not affect cleanup job execution. + 'permission_relationships_file': '/path/to/perm/rels/file', + 'OKTA_ORG_ID': 'my-org-id', + } + cartography.intel.aws.dynamodb.cleanup_dynamodb_tables(neo4j_session, common_job_parameters) + + # Assert: Expect no ddb nodes in the graph now + assert check_nodes(neo4j_session, 'DynamoDBTable', ['id']) == set() + assert check_nodes(neo4j_session, 'DynamoDBGlobalSecondaryIndex', ['id']) == set() + # Assert: Expect that the unrelated EC2 instance was not touched by the cleanup job + assert check_rels(neo4j_session, 'AWSAccount', 'id', 'EC2Instance', 'id', 'RESOURCE') == { + (TEST_ACCOUNT_ID, 1234), + }