Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Radha/sc 16157 fix some more issues #315

Merged
merged 4 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions src/main/java/ai/asserts/aws/AWSApiCallRateLimiter.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.concurrent.Callable;

import static ai.asserts.aws.MetricNameUtil.ASSERTS_ERROR_TYPE;
import static ai.asserts.aws.MetricNameUtil.ASSERTS_CUSTOMER;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_ACCOUNT_ID_LABEL;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_ERROR_COUNT_METRIC;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_LATENCY_METRIC;
Expand Down Expand Up @@ -71,21 +72,27 @@ public <K extends AWSAPICall<V>, V> V doWithRateLimit(String api, SortedMap<Stri
tick = System.currentTimeMillis();
return k.makeCall();
} catch (Throwable e) {
log.error("Exception", e);
log.error("Exception in: " + regionKey, e);
SortedMap<String, String> errorLabels = new TreeMap<>(labels);
errorLabels.put(ASSERTS_ERROR_TYPE, e.getClass().getSimpleName());

// In SaaS mode, we don't want the exporter internal metrics to end up in the tenant's TSDB
errorLabels.remove(TENANT);
if (tenantName != null) {
errorLabels.put(TENANT, tenantName);
errorLabels.put(ASSERTS_CUSTOMER, tenantName);
}
metricCollector.recordCounterValue(SCRAPE_ERROR_COUNT_METRIC, errorLabels, 1);
throw new RuntimeException(e);
} finally {
tick = System.currentTimeMillis() - tick;
labels = new TreeMap<>(labels);

// In SaaS mode, we don't want the exporter internal metrics to end up in the tenant's TSDB
SortedMap<String, String> latencyLabels = new TreeMap<>(labels);
latencyLabels.remove(TENANT);
if (tenantName != null) {
labels.put(TENANT, tenantName);
latencyLabels.put(ASSERTS_CUSTOMER, tenantName);
}
metricCollector.recordLatency(SCRAPE_LATENCY_METRIC, labels, tick);
metricCollector.recordLatency(SCRAPE_LATENCY_METRIC, latencyLabels, tick);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/main/java/ai/asserts/aws/MetricNameUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class MetricNameUtil {
public static final String SCRAPE_LATENCY_METRIC = "aws_exporter_milliseconds";
public static final String ASSERTS_ERROR_TYPE = "asserts_error_type";
public static final String TENANT = "tenant";
public static final String ASSERTS_CUSTOMER = "asserts_customer";
public static final String ENV = "asserts_env";
public static final String SITE = "asserts_site";
public static final String SCRAPE_ERROR_COUNT_METRIC = "aws_exporter_error_total";
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/ai/asserts/aws/exporter/ECSTaskProvider.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import static ai.asserts.aws.MetricNameUtil.SCRAPE_NAMESPACE_LABEL;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_OPERATION_LABEL;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_REGION_LABEL;
import static ai.asserts.aws.MetricNameUtil.TENANT;

/**
* Builds ECS Task scrape targets. Scraping ECS is best done using the ECS Sidecar.
Expand Down Expand Up @@ -111,6 +112,7 @@ public List<MetricFamilySamples> collect() {

target.getLogConfigs().forEach(logConfig -> {
Map<String, String> logInfoLabels = new TreeMap<>();
logInfoLabels.put(TENANT, labels.getTenant());
logInfoLabels.put(SCRAPE_ACCOUNT_ID_LABEL, labels.getAccountId());
logInfoLabels.put(SCRAPE_REGION_LABEL, labels.getRegion());
logInfoLabels.put("cluster", labels.getCluster());
Expand Down
76 changes: 39 additions & 37 deletions src/main/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
*/
package ai.asserts.aws.exporter;

import ai.asserts.aws.AWSClientProvider;
import ai.asserts.aws.AWSApiCallRateLimiter;
import ai.asserts.aws.AWSClientProvider;
import ai.asserts.aws.SimpleTenantTask;
import ai.asserts.aws.TaskExecutorUtil;
import ai.asserts.aws.account.AWSAccount;
Expand Down Expand Up @@ -80,42 +80,44 @@ private Pair<Set<ResourceRelation>, Set<Resource>> buildRelations(String region,
try {
ElasticLoadBalancingV2Client elbV2Client = awsClientProvider.getELBV2Client(region, accountRegion);
Map<Resource, Resource> tgToLB = targetGroupLBMapProvider.getTgToLB();
tgToLB.keySet().forEach(tg -> {
try {
String api = "ElasticLoadBalancingV2Client/describeTargetHealth";
DescribeTargetHealthResponse response = rateLimiter.doWithRateLimit(
api,
ImmutableSortedMap.of(
SCRAPE_REGION_LABEL, region,
SCRAPE_ACCOUNT_ID_LABEL, accountRegion.getAccountId(),
SCRAPE_OPERATION_LABEL, api
)
, () -> elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder()
.targetGroupArn(tg.getArn())
.build()));
if (!isEmpty(response.targetHealthDescriptions())) {
response.targetHealthDescriptions().stream()
.map(tH -> resourceMapper.map(tH.target().id()))
.filter(opt -> opt.isPresent() && opt.get().getType().equals(LambdaFunction))
.map(Optional::get)
.forEach(lambda -> routing.add(ResourceRelation.builder()
.from(tgToLB.get(tg))
.to(lambda)
.name("ROUTES_TO")
.build()));
}
} catch (TargetGroupNotFoundException e) {
log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg);
missingTgs.add(tg);
} catch (Exception e) {
if (e.getCause() instanceof TargetGroupNotFoundException) {
log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg);
missingTgs.add(tg);
} else {
log.error("Failed to build resource relations", e);
}
}
});
tgToLB.keySet().stream()
.filter(tg -> tg.getAccount().equals(accountRegion.getAccountId()) && region.equals(tg.getRegion()))
.forEach(tg -> {
try {
String api = "ElasticLoadBalancingV2Client/describeTargetHealth";
DescribeTargetHealthResponse response = rateLimiter.doWithRateLimit(
api,
ImmutableSortedMap.of(
SCRAPE_REGION_LABEL, region,
SCRAPE_ACCOUNT_ID_LABEL, accountRegion.getAccountId(),
SCRAPE_OPERATION_LABEL, api
)
, () -> elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder()
.targetGroupArn(tg.getArn())
.build()));
if (!isEmpty(response.targetHealthDescriptions())) {
response.targetHealthDescriptions().stream()
.map(tH -> resourceMapper.map(tH.target().id()))
.filter(opt -> opt.isPresent() && opt.get().getType().equals(LambdaFunction))
.map(Optional::get)
.forEach(lambda -> routing.add(ResourceRelation.builder()
.from(tgToLB.get(tg))
.to(lambda)
.name("ROUTES_TO")
.build()));
}
} catch (TargetGroupNotFoundException e) {
log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg);
missingTgs.add(tg);
} catch (Exception e) {
if (e.getCause() instanceof TargetGroupNotFoundException) {
log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg);
missingTgs.add(tg);
} else {
log.error("Failed to build resource relations", e);
}
}
});
} catch (Exception e) {
log.error("Error " + accountRegion, e);
}
Expand Down
12 changes: 7 additions & 5 deletions src/main/java/ai/asserts/aws/exporter/MetricSampleBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,13 @@ public Optional<Sample> buildSingleSample(String metricName, Map<String, String>
Double metric) {
labels = new TreeMap<>(labels);
AWSAccount accountDetails = taskExecutorUtil.getAccountDetails();
labels.putIfAbsent(TENANT, accountDetails.getTenant());
if (hasLength(accountDetails.getName())) {
labels.putIfAbsent(ENV, accountDetails.getName());
} else {
labels.putIfAbsent(ENV, accountDetails.getAccountId());
if (accountDetails != null) {
labels.putIfAbsent(TENANT, accountDetails.getTenant());
if (hasLength(accountDetails.getName())) {
labels.putIfAbsent(ENV, accountDetails.getName());
} else {
labels.putIfAbsent(ENV, accountDetails.getAccountId());
}
}
if (labels.containsKey(SCRAPE_REGION_LABEL)) {
labels.putIfAbsent(SITE, labels.get(SCRAPE_REGION_LABEL));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public void setup() {
labels = new TreeMap<>();
labels.put("account_id", "account");
labels.put("region", "region");
labels.put("tenant", "acme");
labels.put("asserts_customer", "acme");
rateLimiter = new AWSApiCallRateLimiter(metricCollector,
(accountId) -> "acme", 1.0D);
}
Expand Down
14 changes: 13 additions & 1 deletion src/test/java/ai/asserts/aws/exporter/ECSTaskProviderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ public void afterPropertiesSet() throws Exception {
@Test
public void collect() throws Exception {
Resource cluster1 = Resource.builder()
.tenant("acme")
.name("cluster1")
.region("region")
.account("account1")
Expand All @@ -124,6 +125,7 @@ public void collect() throws Exception {


Resource task1 = Resource.builder()
.tenant("acme")
.name("task1")
.build();

Expand All @@ -143,6 +145,7 @@ public void collect() throws Exception {
tasksByCluster.put(cluster1, ImmutableMap.of(task1, ImmutableList.of(mockStaticConfig, mockStaticConfig)));

Labels labels1 = Labels.builder()
.tenant("acme")
.accountId("account1")
.region("us-west-2")
.cluster("cluster")
Expand All @@ -162,6 +165,7 @@ public void collect() throws Exception {

expect(sampleBuilder.buildSingleSample(CONTAINER_LOG_INFO_METRIC,
ImmutableSortedMap.<String, String>naturalOrder()
.put("tenant", "acme")
.put("account_id", "account1")
.put("region", "us-west-2")
.put("cluster", "cluster")
Expand All @@ -174,6 +178,7 @@ public void collect() throws Exception {
.build(), 1.0D)).andReturn(Optional.of(mockSample));

Labels labels2 = Labels.builder()
.tenant("acme")
.accountId("account1")
.region("us-west-2")
.cluster("cluster")
Expand All @@ -192,6 +197,7 @@ public void collect() throws Exception {
.build()));
expect(sampleBuilder.buildSingleSample(CONTAINER_LOG_INFO_METRIC,
ImmutableSortedMap.<String, String>naturalOrder()
.put("tenant", "acme")
.put("account_id", "account1")
.put("region", "us-west-2")
.put("cluster", "cluster")
Expand Down Expand Up @@ -226,19 +232,22 @@ public void getService() {
@Test
public void getScrapeTargets() {
AWSAccount awsAccount = AWSAccount.builder()
.tenant("acme")
.accountId("account1")
.regions(ImmutableSet.of("region"))
.name("test-account")
.build();

Resource cluster1 = Resource.builder()
.tenant("acme")
.name("cluster1")
.region("region")
.account("account1")
.arn("cluster1-arn")
.build();

Resource task1 = Resource.builder()
.tenant("acme")
.name("task1")
.build();

Expand All @@ -265,7 +274,7 @@ void buildNewTargets(AWSAccount _account, ScrapeConfig _scrapeConfig,

testClass.getTasksByCluster().put(cluster1, ImmutableMap.of(task1, ImmutableList.of(mockStaticConfig)));

expect(scrapeConfigProvider.getScrapeConfig(null)).andReturn(scrapeConfig);
expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig);

expect(accountProvider.getAccounts()).andReturn(ImmutableSet.of(awsAccount));
expect(awsClientProvider.getECSClient("region", awsAccount)).andReturn(ecsClient);
Expand All @@ -279,6 +288,7 @@ void buildNewTargets(AWSAccount _account, ScrapeConfig _scrapeConfig,
@Test
public void discoverNewTargets() {
Resource cluster1 = Resource.builder()
.tenant("acme")
.name("cluster1")
.region("region")
.account("account1")
Expand Down Expand Up @@ -338,6 +348,7 @@ public void discoverNewTargets() {
public void buildNewTargets() {
expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig).anyTimes();
Resource cluster1 = Resource.builder()
.tenant("acme")
.name("cluster1")
.region("region")
.account("account1")
Expand All @@ -347,6 +358,7 @@ public void buildNewTargets() {
Resource service2 = Resource.builder().arn("service2-arn").build();

Resource cluster2 = Resource.builder()
.tenant("acme")
.name("cluster2")
.region("region")
.account("account1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public void setup() {
new TaskExecutorUtil(new TestTaskThreadPool(),
new AWSApiCallRateLimiter(metricCollector, (account) -> "acme")));

AWSAccount awsAccount = new AWSAccount("tenant", "account", "accessId", "secretKey", "role",
AWSAccount awsAccount = new AWSAccount("acme", "account", "accessId", "secretKey", "role",
ImmutableSet.of("region"));
expect(accountProvider.getAccounts()).andReturn(ImmutableSet.of(awsAccount)).anyTimes();
expect(awsClientProvider.getELBV2Client("region", awsAccount)).andReturn(elbV2Client).anyTimes();
Expand All @@ -80,7 +80,11 @@ public void setup() {

@Test
void getRoutings() {
expect(targetGroupResource.getAccount()).andReturn("account");
expect(targetGroupResource.getRegion()).andReturn("region");
expect(targetGroupResource.getArn()).andReturn("tg-arn");
expect(targetGroupResource2.getAccount()).andReturn("account");
expect(targetGroupResource2.getRegion()).andReturn("region");
expect(targetGroupResource2.getArn()).andReturn("tg-arn2");
expect(elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder()
.targetGroupArn("tg-arn")
Expand Down
Loading