diff --git a/app/stacks/cumulus/iam.tf b/app/stacks/cumulus/iam.tf index 7b92679..d8fad38 100644 --- a/app/stacks/cumulus/iam.tf +++ b/app/stacks/cumulus/iam.tf @@ -33,6 +33,7 @@ data "aws_iam_policy_document" "allow_sfn_distributed_maps" { "states:DescribeExecution", "states:StartExecution", "states:StopExecution", + "states:RedriveExecution", ] resources = ["*"] } diff --git a/app/stacks/cumulus/templates/discover-granules-workflow.asl.json b/app/stacks/cumulus/templates/discover-granules-workflow.asl.json index 720fc91..d334230 100644 --- a/app/stacks/cumulus/templates/discover-granules-workflow.asl.json +++ b/app/stacks/cumulus/templates/discover-granules-workflow.asl.json @@ -31,7 +31,7 @@ "Type": "Map", "End": true, "MaxConcurrency": 10, - "ToleratedFailurePercentage": 0, + "ToleratedFailurePercentage": 3, "ItemReader": { "Resource": "arn:aws:states:::s3:getObject", "ReaderConfig": { @@ -89,7 +89,8 @@ "collection": "{$.meta.collection}", "buckets": "{$.meta.buckets}", "stack": "{$.meta.stack}", - "duplicateGranuleHandling": "{$.meta.collection.duplicateHandling}" + "duplicateGranuleHandling": "{$.meta.collection.duplicateHandling}", + "concurrency": 8 } } }, @@ -98,6 +99,7 @@ "Retry": [ { "ErrorEquals": [ + "Lambda.ClientExecutionTimeoutException", "Lambda.ServiceException", "Lambda.AWSLambdaException", "Lambda.SdkClientException" @@ -105,6 +107,14 @@ "IntervalSeconds": 2, "MaxAttempts": 6, "BackoffRate": 2 + }, + { + "ErrorEquals": [ + "Lambda.Unknown", + "States.Timeout" + ], + "IntervalSeconds": 30, + "MaxAttempts": 1 } ], "Next": "BatchGranules" @@ -131,6 +141,7 @@ "Retry": [ { "ErrorEquals": [ + "Lambda.ClientExecutionTimeoutException", "Lambda.ServiceException", "Lambda.AWSLambdaException", "Lambda.SdkClientException" @@ -181,6 +192,7 @@ "Retry": [ { "ErrorEquals": [ + "Lambda.ClientExecutionTimeoutException", "Lambda.ServiceException", "Lambda.AWSLambdaException", "Lambda.SdkClientException" @@ -230,6 +242,7 @@ "Retry": [ { "ErrorEquals": [ + "Lambda.ClientExecutionTimeoutException", "Lambda.ServiceException", "Lambda.AWSLambdaException", "Lambda.SdkClientException" @@ -290,6 +303,7 @@ "Retry": [ { "ErrorEquals": [ + "Lambda.ClientExecutionTimeoutException", "Lambda.ServiceException", "Lambda.AWSLambdaException", "Lambda.SdkClientException"