CUMULUS-3427 (#3491)

* first commit * making timeouts configurable * changelog + docs * fixing variable * fixing docs + removing test vars * testing message_remover * adding variables to modules * fixing mistakes * fixing errors * removed config for non-tf-modules lambdas * naming rehaul * fixing issues * final fixes before review * removing list * PR feedback * small format changes * re-adding rds-cluster-tf vars * rds-cluster change * PR feedback * PR feedback * fixed small typo * PR feedback * removing defaults + small fix * re-adding in default example vals * fixing typo
nasa · Oct 13, 2023 · b3166bf · b3166bf
1 parent 9493d00
commit b3166bf
Show file tree

Hide file tree

Showing 63 changed files with 329 additions and 186 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -49,6 +49,12 @@ Users/clients that do not make use of these endpoints will not be impacted.
   - Added `max_download_time` column to PostgreSQL `providers` table
   - Updated `@cumulus/ingest/lock` to check expired locks based on `provider.maxDownloadTime`
 
+### Fixed
+
+- **CUMULUS-3427**
+  - fixed issue where some lambda and task memory sizes and timeouts were not configurable
+  - changed the naming conventions for memory size and timeouts configuration to simply the lambda name
+
 ### Changed
 
 - **CUMULUS-3351**

diff --git a/docs/configuration/task-configuration.md b/docs/configuration/task-configuration.md
@@ -58,75 +58,36 @@ elasticsearch_client_config = {
 
 ## lambda_timeouts
 
-A configurable map of timeouts (in seconds) for cumulus ingest module task lambdas in the form:
+An optional configurable map of timeouts (in seconds) for cumulus lambdas in the form:
 
 ```hcl
-<lambda_identifier>_timeout: <timeout>
-  type = map(string)
+lambda_timeouts = {
+  <lambda_name> = <timeout>
+}
 ```
 
-Currently the following values are supported:
-
-- add_missing_file_checksums_task_timeout
-- discover_granules_task_timeout
-- discover_pdrs_task_timeout
-- fake_processing_task_timeout
-- files_to_granules_task_timeout
-- hello_world_task_timeout
-- hyrax_metadata_update_tasks_timeout
-- lzards_backup_task_timeout
-- move_granules_task_timeout
-- parse_pdr_task_timeout
-- pdr_status_check_task_timeout
-- post_to_cmr_task_timeout
-- queue_granules_task_timeout
-- queue_pdrs_task_timeout
-- queue_workflow_task_timeout
-- sf_sqs_report_task_timeout
-- sync_granule_task_timeout
-- update_granules_cmr_metadata_file_links_task_timeout
-
 ### Example
 
 ```tf
 lambda_timeouts = {
-  discover_granules_task_timeout = 300
+  sqsMessageRemover = 300
 }
 ```
 
 ## lambda_memory_sizes
 
-A configurable map of memory sizes (in MBs) for cumulus ingest module task lambdas in the form:
+An optional configurable map of memory sizes (in MBs) for cumulus lambdas in the form:
 
 ```hcl
-<lambda_identifier>_memory_size: <memory_size>
-  type = map(string)
+lambda_memory_sizes = {
+  <lambda_name> = <memory_size>
+}
 ```
 
-Currently the following values are supported:
-
-- add_missing_file_checksums_task_memory_size
-- discover_granules_task_memory_size
-- discover_pdrs_task_memory_size
-- fake_processing_task_memory_size
-- hyrax_metadata_updates_task_memory_size
-- lzards_backup_task_memory_size
-- move_granules_task_memory_size
-- parse_pdr_task_memory_size
-- pdr_status_check_task_memory_size
-- post_to_cmr_task_memory_size
-- queue_granules_task_memory_size
-- queue_pdrs_task_memory_size
-- queue_workflow_task_memory_size
-- sf_sqs_report_task_memory_size
-- sync_granule_task_memory_size
-- update_cmr_acess_constraints_task_memory_size
-- update_granules_cmr_metadata_file_links_task_memory_size
-
 ### Example
 
 ```tf
 lambda_memory_sizes = {
-  queue_granules_task_memory_size = 1036
+  SyncGranule = 1024
 }
 ```
diff --git a/example/cumulus-tf/cumulus_distribution.tf b/example/cumulus-tf/cumulus_distribution.tf
@@ -18,28 +18,30 @@ resource "aws_s3_bucket_object" "bucket_map_yaml_distribution" {
 }
 
 module "cumulus_distribution" {
-  source                   = "../../tf-modules/cumulus_distribution"
-  deploy_to_ngap           = true
-  prefix                   = var.prefix
-  api_gateway_stage        = local.distribution_api_gateway_stage
-  api_url                  = var.cumulus_distribution_url
-  bucket_map_file          = aws_s3_bucket_object.bucket_map_yaml_distribution.id
-  bucketname_prefix        = ""
-  cmr_acl_based_credentials = true
-  cmr_environment           = var.cmr_environment
-  cmr_provider              = var.cmr_provider
-  lambda_subnet_ids        = local.subnet_ids
-  oauth_client_id          = var.csdap_client_id
-  oauth_client_password    = var.csdap_client_password
-  oauth_host_url           = var.csdap_host_url
-  oauth_provider           = "cognito"
-  permissions_boundary_arn = var.permissions_boundary_arn
-  buckets                  = var.buckets
-  sts_credentials_lambda_function_arn = data.aws_lambda_function.sts_credentials.arn
+  source                                = "../../tf-modules/cumulus_distribution"
+  deploy_to_ngap                        = true
+  prefix                                = var.prefix
+  api_gateway_stage                     = local.distribution_api_gateway_stage
+  api_url                               = var.cumulus_distribution_url
+  bucket_map_file                       = aws_s3_bucket_object.bucket_map_yaml_distribution.id
+  bucketname_prefix                     = ""
+  cmr_acl_based_credentials             = true
+  cmr_environment                       = var.cmr_environment
+  cmr_provider                          = var.cmr_provider
+  lambda_subnet_ids                     = local.subnet_ids
+  oauth_client_id                       = var.csdap_client_id
+  oauth_client_password                 = var.csdap_client_password
+  oauth_host_url                        = var.csdap_host_url
+  oauth_provider                        = "cognito"
+  permissions_boundary_arn              = var.permissions_boundary_arn
+  buckets                               = var.buckets
+  sts_credentials_lambda_function_arn   = data.aws_lambda_function.sts_credentials.arn
   sts_policy_helper_lambda_function_arn = data.aws_lambda_function.sts_policy_helper.arn
-  system_bucket            = var.system_bucket
-  tags                     = local.tags
-  vpc_id                   = local.vpc_id
-  default_log_retention_days       = var.default_log_retention_days
-  cloudwatch_log_retention_periods = var.cloudwatch_log_retention_periods
+  system_bucket                         = var.system_bucket
+  tags                                  = local.tags
+  vpc_id                                = local.vpc_id
+  default_log_retention_days            = var.default_log_retention_days
+  cloudwatch_log_retention_periods      = var.cloudwatch_log_retention_periods
+  lambda_timeouts                       = var.lambda_timeouts
+  lambda_memory_sizes                   = var.lambda_memory_sizes
 }
diff --git a/example/cumulus-tf/main.tf b/example/cumulus-tf/main.tf
@@ -86,7 +86,10 @@ module "cumulus" {
   }]
 
   vpc_id            = var.vpc_id != null ? var.vpc_id : data.aws_vpc.application_vpc[0].id
-  lambda_subnet_ids = local.subnet_ids
+
+  lambda_subnet_ids   = local.subnet_ids
+  lambda_timeouts     = var.lambda_timeouts
+  lambda_memory_sizes = var.lambda_memory_sizes
 
   rds_security_group                     = local.rds_security_group
   rds_user_access_secret_arn             = local.rds_credentials_secret_arn

diff --git a/example/cumulus-tf/tea_s3_credentials_endpoint_test.tf b/example/cumulus-tf/tea_s3_credentials_endpoint_test.tf
@@ -26,6 +26,7 @@ module "tea_s3_credentials_endpoint_test" {
   urs_url                                        = "https://uat.urs.earthdata.nasa.gov" 
   cmr_acl_based_credentials                      = true 
   vpc_id                                         = local.vpc_id
-
+  lambda_memory_sizes                            = var.lambda_memory_sizes
+  lambda_timeouts                                = var.lambda_timeouts 
   tags = local.tags
 }
diff --git a/example/cumulus-tf/variables.tf b/example/cumulus-tf/variables.tf
@@ -407,14 +407,29 @@ variable "orca_s3_secret_key" {
 }
 
 variable "lambda_timeouts" {
-  type = map(string)
-  default = {}
+  description = "Configurable map of timeouts for lambdas"
+  type = map(number)
+  default = {
+    cleanExecutions = 400           # archive
+    DistributionApiEndpoints = 400  # cumulus_distribution
+    s3-credentials-endpoint  = 400  # distribution
+    HelloWorld = 400                # ingest
+    s3-replicator = 400             # s3-replicator
+    TeaCache = 400                  # tea-map-cache
+  }
 }
 
 variable "lambda_memory_sizes" {
-  description = "Memory sizes for lambda functions"
-  type = map(string)
-  default = {}
+  description = "Configurable map of memory sizes for lambdas"
+  type = map(number)
+  default = {
+    cleanExecutions = 384           # archive
+    DistributionApiEndpoints = 384  # cumulus_distribution
+    s3-credentials-endpoint  = 384  # distribution
+    HelloWorld = 384                # ingest
+    s3-replicator = 384             # s3-replicator
+    TeaCache = 384                  # tea-map-cache
+  }
 }
 
 variable "optional_dynamo_tables" {

diff --git a/example/data-persistence-tf/main.tf b/example/data-persistence-tf/main.tf
@@ -37,6 +37,8 @@ module "provision_database" {
   rds_user_password                      = var.rds_user_password == "" ? random_string.db_pass.result : var.rds_user_password
   rds_connection_timing_configuration    = var.rds_connection_timing_configuration
   dbRecreation                           = true
+  lambda_timeouts                        = var.lambda_timeouts
+  lambda_memory_sizes                    = var.lambda_memory_sizes
 }
 
 module "data_persistence" {
@@ -52,5 +54,7 @@ module "data_persistence" {
   rds_security_group_id          = var.rds_security_group
   rds_user_access_secret_arn     = module.provision_database.database_credentials_secret_arn
   permissions_boundary_arn       = var.permissions_boundary_arn
-  tags = merge(var.tags, { Deployment = var.prefix })
+  tags                           = merge(var.tags, { Deployment = var.prefix })
+  lambda_timeouts                = var.lambda_timeouts
+  lambda_memory_sizes            = var.lambda_memory_sizes
 }
diff --git a/example/data-persistence-tf/variables.tf b/example/data-persistence-tf/variables.tf
@@ -92,3 +92,19 @@ variable "subnets_tag_name" {
   type = string
   default = "Private application us-east-1a *"
 }
+
+variable "lambda_memory_sizes" {
+  description = "Configurable map of memory sizes for lambdas"
+  type = map(number)
+  default = {
+    ProvisionPostgresDatabase = 384 # data-persistence
+  }
+}
+
+variable "lambda_timeouts" {
+  description = "Configurable map of timeouts for lambdas"
+  type = map(number)
+  default = {
+    ProvisionPostgresDatabase = 100 # data-persistence
+  }
+}
diff --git a/example/rds-cluster-tf/main.tf b/example/rds-cluster-tf/main.tf
@@ -25,4 +25,6 @@ module "rds_cluster" {
   cluster_identifier  = var.cluster_identifier
   tags                = var.tags
   snapshot_identifier = var.snapshot_identifier
+  lambda_timeouts     = var.lambda_timeouts
+  lambda_memory_sizes = var.lambda_memory_sizes
 }
diff --git a/example/rds-cluster-tf/variables.tf b/example/rds-cluster-tf/variables.tf
@@ -78,3 +78,18 @@ variable "subnets_tag_name" {
   default = "Private application *"
 }
 
+variable "lambda_memory_sizes" {
+  description = "Configurable map of memory sizes for lambdas"
+  type = map(number)
+  default = {
+    ProvisionPostgresDatabase = 384 # cumulus-rds-tf
+  }
+}
+
+variable "lambda_timeouts" {
+  description = "Configurable map of timeouts for lambdas"
+  type = map(number)
+  default = {
+    ProvisionPostgresDatabase = 100 # cumulus-rds-tf
+  }
+}
diff --git a/example/rds-cluster-tf/vpc.tf b/example/rds-cluster-tf/vpc.tf
@@ -6,7 +6,7 @@ data "aws_vpc" "application_vpc" {
 }
 
 data "aws_subnets" "subnet_ids" {
-  count = length(var.lambda_subnet_ids) == 0 ? 1 : 0
+  count = length(var.subnets) == 0 ? 1 : 0
   filter {
     name   = "vpc-id"
     values = [var.vpc_id != null ? var.vpc_id : data.aws_vpc.application_vpc[0].id]

diff --git a/lambdas/db-provision-user-database/main.tf b/lambdas/db-provision-user-database/main.tf
@@ -15,8 +15,8 @@ resource "aws_lambda_function" "provision_database" {
   handler          = "index.handler"
   role             = aws_iam_role.db_provision.arn
   runtime          = "nodejs16.x"
-  memory_size      = 256
-  timeout          = 500
+  memory_size      = lookup(var.lambda_memory_sizes, "ProvisionPostgresDatabase", 512)
+  timeout          = lookup(var.lambda_timeouts, "ProvisionPostgresDatabase", 500)
   environment {
     variables = {
       acquireTimeoutMillis      = var.rds_connection_timing_configuration.acquireTimeoutMillis

diff --git a/lambdas/db-provision-user-database/variables.tf b/lambdas/db-provision-user-database/variables.tf
@@ -58,3 +58,14 @@ variable "dbRecreation" {
   default     = false
 }
 
+variable "lambda_memory_sizes" {
+  description = "Configurable map of memory sizes for lambdas"
+  type = map(number)
+  default = {}
+}
+
+variable "lambda_timeouts" {
+  description = "Configurable map of timeouts for lambdas"
+  type = map(number)
+  default = {}
+}
diff --git a/lambdas/sqs-message-remover/main.tf b/lambdas/sqs-message-remover/main.tf
@@ -5,8 +5,8 @@ resource "aws_lambda_function" "sqs_message_remover" {
   handler          = "index.handler"
   role             = var.lambda_processing_role_arn
   runtime          = "nodejs16.x"
-  timeout          = 100
-  memory_size      = 256
+  timeout          = lookup(var.lambda_timeouts, "sqsMessageRemover", 100)
+  memory_size      = lookup(var.lambda_memory_sizes, "sqsMessageRemover", 512)
   environment {
     variables = {
       stackName        = var.prefix

diff --git a/lambdas/sqs-message-remover/variables.tf b/lambdas/sqs-message-remover/variables.tf
@@ -17,6 +17,18 @@ variable "lambda_processing_role_arn" {
 
 # Optional
 
+variable "lambda_memory_sizes" {
+  description = "Configurable map of memory sizes for lambdas"
+  type = map(number)
+  default = {}
+}
+
+variable "lambda_timeouts" {
+  description = "Configurable map of timeouts for lambdas"
+  type = map(number)
+  default = {}
+}
+
 variable "lambda_subnet_ids" {
   description = "Subnet IDs for Lambdas"
   type        = list(string)

diff --git a/tf-modules/archive/api.tf b/tf-modules/archive/api.tf
@@ -149,11 +149,11 @@ resource "aws_lambda_function" "private_api" {
   handler          = "index.handler"
   role             = aws_iam_role.lambda_api_gateway.arn
   runtime          = "nodejs16.x"
-  timeout          = 100
+  timeout          = lookup(var.lambda_timeouts, "PrivateApiLambda", 100)
   environment {
     variables = merge(local.api_env_variables, {"auth_mode"="private"})
   }
-  memory_size = 1280
+  memory_size = lookup(var.lambda_memory_sizes, "PrivateApiLambda", 1280)
   tags        = var.tags
 
   dynamic "vpc_config" {
@@ -174,11 +174,11 @@ resource "aws_lambda_function" "api" {
   handler          = "index.handler"
   role             = aws_iam_role.lambda_api_gateway.arn
   runtime          = "nodejs16.x"
-  timeout          = 100
+  timeout          = lookup(var.lambda_timeouts, "ApiEndpoints", 100)
   environment {
     variables = merge(local.api_env_variables, {"auth_mode"="public"})
   }
-  memory_size = 1280
+  memory_size = lookup(var.lambda_memory_sizes, "ApiEndpoints", 1280)
   tags        = var.tags
 
   reserved_concurrent_executions = var.api_reserved_concurrency

diff --git a/tf-modules/archive/bootstrap.tf b/tf-modules/archive/bootstrap.tf
@@ -5,8 +5,8 @@ resource "aws_lambda_function" "custom_bootstrap" {
   handler          = "index.handler"
   role             = var.lambda_processing_role_arn
   runtime          = "nodejs16.x"
-  timeout          = 300
-  memory_size      = 320
+  timeout          = lookup(var.lambda_timeouts, "CustomBootstrap", 300)
+  memory_size      = lookup(var.lambda_memory_sizes, "CustomBootstrap", 512)
   environment {
     variables = {
       stackName                     = var.prefix

diff --git a/tf-modules/archive/bulk_operation.tf b/tf-modules/archive/bulk_operation.tf
@@ -5,8 +5,8 @@ resource "aws_lambda_function" "bulk_operation" {
   handler          = "index.handler"
   role             = var.lambda_processing_role_arn
   runtime          = "nodejs16.x"
-  timeout          = 300
-  memory_size      = 512
+  timeout          = lookup(var.lambda_timeouts, "bulkOperation", 300)
+  memory_size      = lookup(var.lambda_memory_sizes, "bulkOperation", 512)
   environment {
     variables = {
       acquireTimeoutMillis         = var.rds_connection_timing_configuration.acquireTimeoutMillis

diff --git a/tf-modules/archive/clean_executions.tf b/tf-modules/archive/clean_executions.tf
@@ -14,8 +14,8 @@ resource "aws_lambda_function" "clean_executions" {
   handler          = "index.handler"
   role             = var.lambda_processing_role_arn
   runtime          = "nodejs16.x"
-  timeout          = 900
-  memory_size      = 192
+  timeout          = lookup(var.lambda_timeouts, "cleanExecutions", 900)
+  memory_size      = lookup(var.lambda_memory_sizes, "cleanExecutions", 512)
   dead_letter_config {
     target_arn = aws_sqs_queue.clean_executions_dead_letter_queue.arn
   }