From 7a3e4239ae9035141147444dfe03db10bdf99398 Mon Sep 17 00:00:00 2001 From: jradhakrishnan Date: Wed, 30 Aug 2023 07:53:38 +0530 Subject: [PATCH 1/4] Guard against NPE while building metric samples --- .../ai/asserts/aws/exporter/MetricSampleBuilder.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/java/ai/asserts/aws/exporter/MetricSampleBuilder.java b/src/main/java/ai/asserts/aws/exporter/MetricSampleBuilder.java index 08cdb458..a2d9bf92 100644 --- a/src/main/java/ai/asserts/aws/exporter/MetricSampleBuilder.java +++ b/src/main/java/ai/asserts/aws/exporter/MetricSampleBuilder.java @@ -67,11 +67,13 @@ public Optional buildSingleSample(String metricName, Map Double metric) { labels = new TreeMap<>(labels); AWSAccount accountDetails = taskExecutorUtil.getAccountDetails(); - labels.putIfAbsent(TENANT, accountDetails.getTenant()); - if (hasLength(accountDetails.getName())) { - labels.putIfAbsent(ENV, accountDetails.getName()); - } else { - labels.putIfAbsent(ENV, accountDetails.getAccountId()); + if (accountDetails != null) { + labels.putIfAbsent(TENANT, accountDetails.getTenant()); + if (hasLength(accountDetails.getName())) { + labels.putIfAbsent(ENV, accountDetails.getName()); + } else { + labels.putIfAbsent(ENV, accountDetails.getAccountId()); + } } if (labels.containsKey(SCRAPE_REGION_LABEL)) { labels.putIfAbsent(SITE, labels.get(SCRAPE_REGION_LABEL)); From 33d005d81354e94a5a4a9c917ecd4810a64e70fa Mon Sep 17 00:00:00 2001 From: jradhakrishnan Date: Wed, 30 Aug 2023 07:54:07 +0530 Subject: [PATCH 2/4] Add `tenant` label in ecs meta metrics --- .../ai/asserts/aws/exporter/ECSTaskProvider.java | 2 ++ .../asserts/aws/exporter/ECSTaskProviderTest.java | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/ai/asserts/aws/exporter/ECSTaskProvider.java b/src/main/java/ai/asserts/aws/exporter/ECSTaskProvider.java index 323858d8..793a9aca 100644 --- a/src/main/java/ai/asserts/aws/exporter/ECSTaskProvider.java +++ b/src/main/java/ai/asserts/aws/exporter/ECSTaskProvider.java @@ -51,6 +51,7 @@ import static ai.asserts.aws.MetricNameUtil.SCRAPE_NAMESPACE_LABEL; import static ai.asserts.aws.MetricNameUtil.SCRAPE_OPERATION_LABEL; import static ai.asserts.aws.MetricNameUtil.SCRAPE_REGION_LABEL; +import static ai.asserts.aws.MetricNameUtil.TENANT; /** * Builds ECS Task scrape targets. Scraping ECS is best done using the ECS Sidecar. @@ -111,6 +112,7 @@ public List collect() { target.getLogConfigs().forEach(logConfig -> { Map logInfoLabels = new TreeMap<>(); + logInfoLabels.put(TENANT, labels.getTenant()); logInfoLabels.put(SCRAPE_ACCOUNT_ID_LABEL, labels.getAccountId()); logInfoLabels.put(SCRAPE_REGION_LABEL, labels.getRegion()); logInfoLabels.put("cluster", labels.getCluster()); diff --git a/src/test/java/ai/asserts/aws/exporter/ECSTaskProviderTest.java b/src/test/java/ai/asserts/aws/exporter/ECSTaskProviderTest.java index e417bd87..00713faf 100644 --- a/src/test/java/ai/asserts/aws/exporter/ECSTaskProviderTest.java +++ b/src/test/java/ai/asserts/aws/exporter/ECSTaskProviderTest.java @@ -116,6 +116,7 @@ public void afterPropertiesSet() throws Exception { @Test public void collect() throws Exception { Resource cluster1 = Resource.builder() + .tenant("acme") .name("cluster1") .region("region") .account("account1") @@ -124,6 +125,7 @@ public void collect() throws Exception { Resource task1 = Resource.builder() + .tenant("acme") .name("task1") .build(); @@ -143,6 +145,7 @@ public void collect() throws Exception { tasksByCluster.put(cluster1, ImmutableMap.of(task1, ImmutableList.of(mockStaticConfig, mockStaticConfig))); Labels labels1 = Labels.builder() + .tenant("acme") .accountId("account1") .region("us-west-2") .cluster("cluster") @@ -162,6 +165,7 @@ public void collect() throws Exception { expect(sampleBuilder.buildSingleSample(CONTAINER_LOG_INFO_METRIC, ImmutableSortedMap.naturalOrder() + .put("tenant", "acme") .put("account_id", "account1") .put("region", "us-west-2") .put("cluster", "cluster") @@ -174,6 +178,7 @@ public void collect() throws Exception { .build(), 1.0D)).andReturn(Optional.of(mockSample)); Labels labels2 = Labels.builder() + .tenant("acme") .accountId("account1") .region("us-west-2") .cluster("cluster") @@ -192,6 +197,7 @@ public void collect() throws Exception { .build())); expect(sampleBuilder.buildSingleSample(CONTAINER_LOG_INFO_METRIC, ImmutableSortedMap.naturalOrder() + .put("tenant", "acme") .put("account_id", "account1") .put("region", "us-west-2") .put("cluster", "cluster") @@ -226,12 +232,14 @@ public void getService() { @Test public void getScrapeTargets() { AWSAccount awsAccount = AWSAccount.builder() + .tenant("acme") .accountId("account1") .regions(ImmutableSet.of("region")) .name("test-account") .build(); Resource cluster1 = Resource.builder() + .tenant("acme") .name("cluster1") .region("region") .account("account1") @@ -239,6 +247,7 @@ public void getScrapeTargets() { .build(); Resource task1 = Resource.builder() + .tenant("acme") .name("task1") .build(); @@ -265,7 +274,7 @@ void buildNewTargets(AWSAccount _account, ScrapeConfig _scrapeConfig, testClass.getTasksByCluster().put(cluster1, ImmutableMap.of(task1, ImmutableList.of(mockStaticConfig))); - expect(scrapeConfigProvider.getScrapeConfig(null)).andReturn(scrapeConfig); + expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig); expect(accountProvider.getAccounts()).andReturn(ImmutableSet.of(awsAccount)); expect(awsClientProvider.getECSClient("region", awsAccount)).andReturn(ecsClient); @@ -279,6 +288,7 @@ void buildNewTargets(AWSAccount _account, ScrapeConfig _scrapeConfig, @Test public void discoverNewTargets() { Resource cluster1 = Resource.builder() + .tenant("acme") .name("cluster1") .region("region") .account("account1") @@ -338,6 +348,7 @@ public void discoverNewTargets() { public void buildNewTargets() { expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig).anyTimes(); Resource cluster1 = Resource.builder() + .tenant("acme") .name("cluster1") .region("region") .account("account1") @@ -347,6 +358,7 @@ public void buildNewTargets() { Resource service2 = Resource.builder().arn("service2-arn").build(); Resource cluster2 = Resource.builder() + .tenant("acme") .name("cluster2") .region("region") .account("account1") From 7a8a56777d35f31f48b225bc05bf05316ee990d2 Mon Sep 17 00:00:00 2001 From: jradhakrishnan Date: Wed, 30 Aug 2023 07:54:57 +0530 Subject: [PATCH 3/4] Only process target groups in same account/region --- .../exporter/LBToLambdaRoutingBuilder.java | 76 ++++++++++--------- .../LBToLambdaRoutingBuilderTest.java | 6 +- 2 files changed, 44 insertions(+), 38 deletions(-) diff --git a/src/main/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilder.java b/src/main/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilder.java index 3c3daf3c..c31c2a70 100644 --- a/src/main/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilder.java +++ b/src/main/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilder.java @@ -4,8 +4,8 @@ */ package ai.asserts.aws.exporter; -import ai.asserts.aws.AWSClientProvider; import ai.asserts.aws.AWSApiCallRateLimiter; +import ai.asserts.aws.AWSClientProvider; import ai.asserts.aws.SimpleTenantTask; import ai.asserts.aws.TaskExecutorUtil; import ai.asserts.aws.account.AWSAccount; @@ -80,42 +80,44 @@ private Pair, Set> buildRelations(String region, try { ElasticLoadBalancingV2Client elbV2Client = awsClientProvider.getELBV2Client(region, accountRegion); Map tgToLB = targetGroupLBMapProvider.getTgToLB(); - tgToLB.keySet().forEach(tg -> { - try { - String api = "ElasticLoadBalancingV2Client/describeTargetHealth"; - DescribeTargetHealthResponse response = rateLimiter.doWithRateLimit( - api, - ImmutableSortedMap.of( - SCRAPE_REGION_LABEL, region, - SCRAPE_ACCOUNT_ID_LABEL, accountRegion.getAccountId(), - SCRAPE_OPERATION_LABEL, api - ) - , () -> elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder() - .targetGroupArn(tg.getArn()) - .build())); - if (!isEmpty(response.targetHealthDescriptions())) { - response.targetHealthDescriptions().stream() - .map(tH -> resourceMapper.map(tH.target().id())) - .filter(opt -> opt.isPresent() && opt.get().getType().equals(LambdaFunction)) - .map(Optional::get) - .forEach(lambda -> routing.add(ResourceRelation.builder() - .from(tgToLB.get(tg)) - .to(lambda) - .name("ROUTES_TO") - .build())); - } - } catch (TargetGroupNotFoundException e) { - log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg); - missingTgs.add(tg); - } catch (Exception e) { - if (e.getCause() instanceof TargetGroupNotFoundException) { - log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg); - missingTgs.add(tg); - } else { - log.error("Failed to build resource relations", e); - } - } - }); + tgToLB.keySet().stream() + .filter(tg -> tg.getAccount().equals(accountRegion.getAccountId()) && region.equals(tg.getRegion())) + .forEach(tg -> { + try { + String api = "ElasticLoadBalancingV2Client/describeTargetHealth"; + DescribeTargetHealthResponse response = rateLimiter.doWithRateLimit( + api, + ImmutableSortedMap.of( + SCRAPE_REGION_LABEL, region, + SCRAPE_ACCOUNT_ID_LABEL, accountRegion.getAccountId(), + SCRAPE_OPERATION_LABEL, api + ) + , () -> elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder() + .targetGroupArn(tg.getArn()) + .build())); + if (!isEmpty(response.targetHealthDescriptions())) { + response.targetHealthDescriptions().stream() + .map(tH -> resourceMapper.map(tH.target().id())) + .filter(opt -> opt.isPresent() && opt.get().getType().equals(LambdaFunction)) + .map(Optional::get) + .forEach(lambda -> routing.add(ResourceRelation.builder() + .from(tgToLB.get(tg)) + .to(lambda) + .name("ROUTES_TO") + .build())); + } + } catch (TargetGroupNotFoundException e) { + log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg); + missingTgs.add(tg); + } catch (Exception e) { + if (e.getCause() instanceof TargetGroupNotFoundException) { + log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg); + missingTgs.add(tg); + } else { + log.error("Failed to build resource relations", e); + } + } + }); } catch (Exception e) { log.error("Error " + accountRegion, e); } diff --git a/src/test/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilderTest.java b/src/test/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilderTest.java index f5ffa177..cb496406 100644 --- a/src/test/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilderTest.java +++ b/src/test/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilderTest.java @@ -70,7 +70,7 @@ public void setup() { new TaskExecutorUtil(new TestTaskThreadPool(), new AWSApiCallRateLimiter(metricCollector, (account) -> "acme"))); - AWSAccount awsAccount = new AWSAccount("tenant", "account", "accessId", "secretKey", "role", + AWSAccount awsAccount = new AWSAccount("acme", "account", "accessId", "secretKey", "role", ImmutableSet.of("region")); expect(accountProvider.getAccounts()).andReturn(ImmutableSet.of(awsAccount)).anyTimes(); expect(awsClientProvider.getELBV2Client("region", awsAccount)).andReturn(elbV2Client).anyTimes(); @@ -80,7 +80,11 @@ public void setup() { @Test void getRoutings() { + expect(targetGroupResource.getAccount()).andReturn("account"); + expect(targetGroupResource.getRegion()).andReturn("region"); expect(targetGroupResource.getArn()).andReturn("tg-arn"); + expect(targetGroupResource2.getAccount()).andReturn("account"); + expect(targetGroupResource2.getRegion()).andReturn("region"); expect(targetGroupResource2.getArn()).andReturn("tg-arn2"); expect(elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder() .targetGroupArn("tg-arn") From 3b16861ebe39dbafa0c4909f82fcd7d70bfe59f9 Mon Sep 17 00:00:00 2001 From: jradhakrishnan Date: Wed, 30 Aug 2023 07:55:19 +0530 Subject: [PATCH 4/4] Replace `tenant` with `asserts_customer` in internal metrics --- .../ai/asserts/aws/AWSApiCallRateLimiter.java | 17 ++++++++++++----- .../java/ai/asserts/aws/MetricNameUtil.java | 1 + .../asserts/aws/AWSApiCallRateLimiterTest.java | 2 +- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/main/java/ai/asserts/aws/AWSApiCallRateLimiter.java b/src/main/java/ai/asserts/aws/AWSApiCallRateLimiter.java index f67c2f62..a8f08ea6 100644 --- a/src/main/java/ai/asserts/aws/AWSApiCallRateLimiter.java +++ b/src/main/java/ai/asserts/aws/AWSApiCallRateLimiter.java @@ -17,6 +17,7 @@ import java.util.concurrent.Callable; import static ai.asserts.aws.MetricNameUtil.ASSERTS_ERROR_TYPE; +import static ai.asserts.aws.MetricNameUtil.ASSERTS_CUSTOMER; import static ai.asserts.aws.MetricNameUtil.SCRAPE_ACCOUNT_ID_LABEL; import static ai.asserts.aws.MetricNameUtil.SCRAPE_ERROR_COUNT_METRIC; import static ai.asserts.aws.MetricNameUtil.SCRAPE_LATENCY_METRIC; @@ -71,21 +72,27 @@ public , V> V doWithRateLimit(String api, SortedMap errorLabels = new TreeMap<>(labels); errorLabels.put(ASSERTS_ERROR_TYPE, e.getClass().getSimpleName()); + + // In SaaS mode, we don't want the exporter internal metrics to end up in the tenant's TSDB + errorLabels.remove(TENANT); if (tenantName != null) { - errorLabels.put(TENANT, tenantName); + errorLabels.put(ASSERTS_CUSTOMER, tenantName); } metricCollector.recordCounterValue(SCRAPE_ERROR_COUNT_METRIC, errorLabels, 1); throw new RuntimeException(e); } finally { tick = System.currentTimeMillis() - tick; - labels = new TreeMap<>(labels); + + // In SaaS mode, we don't want the exporter internal metrics to end up in the tenant's TSDB + SortedMap latencyLabels = new TreeMap<>(labels); + latencyLabels.remove(TENANT); if (tenantName != null) { - labels.put(TENANT, tenantName); + latencyLabels.put(ASSERTS_CUSTOMER, tenantName); } - metricCollector.recordLatency(SCRAPE_LATENCY_METRIC, labels, tick); + metricCollector.recordLatency(SCRAPE_LATENCY_METRIC, latencyLabels, tick); } } diff --git a/src/main/java/ai/asserts/aws/MetricNameUtil.java b/src/main/java/ai/asserts/aws/MetricNameUtil.java index b596cc4b..731ec645 100644 --- a/src/main/java/ai/asserts/aws/MetricNameUtil.java +++ b/src/main/java/ai/asserts/aws/MetricNameUtil.java @@ -19,6 +19,7 @@ public class MetricNameUtil { public static final String SCRAPE_LATENCY_METRIC = "aws_exporter_milliseconds"; public static final String ASSERTS_ERROR_TYPE = "asserts_error_type"; public static final String TENANT = "tenant"; + public static final String ASSERTS_CUSTOMER = "asserts_customer"; public static final String ENV = "asserts_env"; public static final String SITE = "asserts_site"; public static final String SCRAPE_ERROR_COUNT_METRIC = "aws_exporter_error_total"; diff --git a/src/test/java/ai/asserts/aws/AWSApiCallRateLimiterTest.java b/src/test/java/ai/asserts/aws/AWSApiCallRateLimiterTest.java index c49853c7..a60d4b78 100644 --- a/src/test/java/ai/asserts/aws/AWSApiCallRateLimiterTest.java +++ b/src/test/java/ai/asserts/aws/AWSApiCallRateLimiterTest.java @@ -35,7 +35,7 @@ public void setup() { labels = new TreeMap<>(); labels.put("account_id", "account"); labels.put("region", "region"); - labels.put("tenant", "acme"); + labels.put("asserts_customer", "acme"); rateLimiter = new AWSApiCallRateLimiter(metricCollector, (accountId) -> "acme", 1.0D); }