DataDog · dd-mergequeue · Oct 18, 2024 · Oct 18, 2024 · GeorgeHahn · Oct 18, 2024
@@ -0,0 +1,12 @@
+auth_token_file_path: /tmp/agent-auth-token
+
+dd_url: http://127.0.0.1:9091
+process_config.process_dd_url: http://localhost:9092
+
+# Disable cloud detection. This stops the Agent from poking around the
+# execution environment & network. This is particularly important if the target
+# has network access.
+cloud_provider_metadata: []
+
+telemetry.enabled: true
+telemetry.checks: '*'
@@ -0,0 +1,39 @@
+# Agent 'out of the box' idle experiment. Represents an agent install with the
+# default configuration and no active workload.
+
+optimization_goal: memory
+erratic: false
+
+target:
+  name: datadog-agent
+  command: /bin/entrypoint.sh
+
+  environment:
+    DD_API_KEY: 00000001
+    DD_HOSTNAME: smp-regression
+
+  profiling_environment:
+    DD_INTERNAL_PROFILING_BLOCK_PROFILE_RATE: 10000
+    DD_INTERNAL_PROFILING_CPU_DURATION: 1m
+    DD_INTERNAL_PROFILING_DELTA_PROFILES: true
+    DD_INTERNAL_PROFILING_ENABLED: true
+    DD_INTERNAL_PROFILING_ENABLE_GOROUTINE_STACKTRACES: true
+    DD_INTERNAL_PROFILING_MUTEX_PROFILE_FRACTION: 10
+    DD_INTERNAL_PROFILING_PERIOD: 1m
+    DD_INTERNAL_PROFILING_UNIX_SOCKET: /var/run/datadog/apm.socket
+    DD_PROFILING_EXECUTION_TRACE_ENABLED: true
+    DD_PROFILING_EXECUTION_TRACE_PERIOD: 1m
+    DD_PROFILING_WAIT_PROFILE: true
+
+    DD_INTERNAL_PROFILING_EXTRA_TAGS: experiment:quality_gate_idle
+
+checks:
+  - name: memory_usage
+    description: "Memory usage quality gate. This puts a bound on the total agent memory usage."
+    bounds:
+      series: total_rss_bytes
+      upper_bound: "430.0 MiB"
+
+report_links:
+  - text: "bounds checks dashboard"
+    link: "https://app.datadoghq.com/dashboard/vz3-jd5-bdi?fromUser=true&refresh_mode=paused&tpl_var_experiment%5B0%5D={{ experiment }}&tpl_var_job_id%5B0%5D={{ job_id }}&tpl_var_run-id%5B0%5D={{ job_id }}&view=spans&from_ts={{ start_time_ms }}&to_ts={{ end_time_ms }}&live=false"
@@ -0,0 +1,176 @@
+generator: []
+
+blackhole:
+  - http:
+      binding_addr: "127.0.0.1:9091"
+  - http:
+      binding_addr: "127.0.0.1:9092"
+
+target_metrics:
+  - prometheus: #core agent telemetry
+      uri: "http://127.0.0.1:5000/telemetry"
+      tags:
+        sub_agent: "core"
+  - prometheus: #process agent telemetry 
+      uri: "http://127.0.0.1:6062/telemetry"
+      tags:
+        sub_agent: "process"
+  - expvar: #trace agent telemetry
+      uri: "http://127.0.0.1:5012/debug/vars"
+      vars:
+        - "/Event"
+        - "/ServiceCheck"
+        - "/check_run_v1"
+        - "/cmdline"
+        - "/compressor/BytesIn"
+        - "/compressor/BytesOut"
+        - "/compressor/TotalCompressCycles"
+        - "/compressor/TotalPayloads"
+        - "/connections"
+        - "/container"
+        - "/events_v2"
+        - "/forwarder/APIKeyFailure"
+        - "/forwarder/APIKeyStatus"
+        - "/forwarder/FileStorage/CurrentSizeInBytes"
+        - "/forwarder/FileStorage/DeserializeCount"
+        - "/forwarder/FileStorage/DeserializeErrorsCount"
+        - "/forwarder/FileStorage/DeserializeTransactionsCount"
+        - "/forwarder/FileStorage/FileSize"
+        - "/forwarder/FileStorage/FilesCount"
+        - "/forwarder/FileStorage/FilesRemovedCount"
+        - "/forwarder/FileStorage/PointsDroppedCount"
+        - "/forwarder/FileStorage/SerializeCount"
+        - "/forwarder/FileStorage/StartupReloadedRetryFilesCount"
+        - "/forwarder/RemovalPolicy/FilesFromUnknownDomainCount"
+        - "/forwarder/RemovalPolicy/NewRemovalPolicyCount"
+        - "/forwarder/RemovalPolicy/OutdatedFilesCount"
+        - "/forwarder/RemovalPolicy/RegisteredDomainCount"
+        - "/forwarder/TransactionContainer/CurrentMemSizeInBytes"
+        - "/forwarder/TransactionContainer/ErrorsCount"
+        - "/forwarder/TransactionContainer/PointsDroppedCount"
+        - "/forwarder/TransactionContainer/TransactionsCount"
+        - "/forwarder/TransactionContainer/TransactionsDroppedCount"
+        - "/forwarder/Transactions/Cluster"
+        - "/forwarder/Transactions/ClusterRole"
+        - "/forwarder/Transactions/ClusterRoleBinding"
+        - "/forwarder/Transactions/ConnectionEvents/ConnectSuccess"
+        - "/forwarder/Transactions/ConnectionEvents/DNSSuccess"
+        - "/forwarder/Transactions/CronJob"
+        - "/forwarder/Transactions/CustomResource"
+        - "/forwarder/Transactions/CustomResourceDefinition"
+        - "/forwarder/Transactions/DaemonSet"
+        - "/forwarder/Transactions/Deployment"
+        - "/forwarder/Transactions/Dropped"
+        - "/forwarder/Transactions/DroppedByEndpoint"
+        - "/forwarder/Transactions/ECSTask"
+        - "/forwarder/Transactions/Errors"
+        - "/forwarder/Transactions/ErrorsByType/ConnectionErrors"
+        - "/forwarder/Transactions/ErrorsByType/DNSErrors"
+        - "/forwarder/Transactions/ErrorsByType/SentRequestErrors"
+        - "/forwarder/Transactions/ErrorsByType/TLSErrors"
+        - "/forwarder/Transactions/ErrorsByType/WroteRequestErrors"
+        - "/forwarder/Transactions/HTTPErrors"
+        - "/forwarder/Transactions/HTTPErrorsByCode"
+        - "/forwarder/Transactions/HighPriorityQueueFull"
+        - "/forwarder/Transactions/HorizontalPodAutoscaler"
+        - "/forwarder/Transactions/Ingress"
+        - "/forwarder/Transactions/InputBytesByEndpoint"
+        - "/forwarder/Transactions/InputCountByEndpoint"
+        - "/forwarder/Transactions/Job"
+        - "/forwarder/Transactions/LimitRange"
+        - "/forwarder/Transactions/Namespace"
+        - "/forwarder/Transactions/NetworkPolicy"
+        - "/forwarder/Transactions/Node"
+        - "/forwarder/Transactions/OrchestratorManifest"
+        - "/forwarder/Transactions/PersistentVolume"
+        - "/forwarder/Transactions/PersistentVolumeClaim"
+        - "/forwarder/Transactions/Pod"
+        - "/forwarder/Transactions/ReplicaSet"
+        - "/forwarder/Transactions/Requeued"
+        - "/forwarder/Transactions/RequeuedByEndpoint"
+        - "/forwarder/Transactions/Retried"
+        - "/forwarder/Transactions/RetriedByEndpoint"
+        - "/forwarder/Transactions/RetryQueueSize"
+        - "/forwarder/Transactions/Role"
+        - "/forwarder/Transactions/RoleBinding"
+        - "/forwarder/Transactions/Service"
+        - "/forwarder/Transactions/ServiceAccount"
+        - "/forwarder/Transactions/StatefulSet"
+        - "/forwarder/Transactions/StorageClass"
+        - "/forwarder/Transactions/Success"
+        - "/forwarder/Transactions/SuccessByEndpoint/check_run_v1"
+        - "/forwarder/Transactions/SuccessByEndpoint/connections"
+        - "/forwarder/Transactions/SuccessByEndpoint/container"
+        - "/forwarder/Transactions/SuccessByEndpoint/events_v2"
+        - "/forwarder/Transactions/SuccessByEndpoint/host_metadata_v2"
+        - "/forwarder/Transactions/SuccessByEndpoint/intake"
+        - "/forwarder/Transactions/SuccessByEndpoint/orchestrator"
+        - "/forwarder/Transactions/SuccessByEndpoint/process"
+        - "/forwarder/Transactions/SuccessByEndpoint/rtcontainer"
+        - "/forwarder/Transactions/SuccessByEndpoint/rtprocess"
+        - "/forwarder/Transactions/SuccessByEndpoint/series_v1"
+        - "/forwarder/Transactions/SuccessByEndpoint/series_v2"
+        - "/forwarder/Transactions/SuccessByEndpoint/services_checks_v2"
+        - "/forwarder/Transactions/SuccessByEndpoint/sketches_v1"
+        - "/forwarder/Transactions/SuccessByEndpoint/sketches_v2"
+        - "/forwarder/Transactions/SuccessByEndpoint/validate_v1"
+        - "/forwarder/Transactions/SuccessBytesByEndpoint"
+        - "/forwarder/Transactions/VerticalPodAutoscaler"
+        - "/host_metadata_v2"
+        - "/hostname/errors"
+        - "/hostname/provider"
+        - "/intake"
+        - "/jsonstream/CompressorLocks"
+        - "/jsonstream/ItemDrops"
+        - "/jsonstream/PayloadFulls"
+        - "/jsonstream/TotalCalls"
+        - "/jsonstream/TotalItems"
+        - "/jsonstream/TotalLockTime"
+        - "/jsonstream/TotalSerializationTime"
+        - "/jsonstream/WriteItemErrors"
+        - "/kubeletQueries"
+        - "/orchestrator"
+        - "/pid"
+        - "/process"
+        - "/rtcontainer"
+        - "/rtprocess"
+        - "/serializer/SendEventsErrItemTooBigs"
+        - "/serializer/SendEventsErrItemTooBigsFallback"
+        - "/series"
+        - "/series_v1"
+        - "/series_v2"
+        - "/services_checks_v2"
+        - "/sketch_series/ItemTooBig"
+        - "/sketch_series/PayloadFull"
+        - "/sketch_series/UnexpectedItemDrops"
+        - "/sketches_v1"
+        - "/sketches_v2"
+        - "/splitter/NotTooBig"
+        - "/splitter/PayloadDrops"
+        - "/splitter/TooBig"
+        - "/splitter/TotalLoops"
+        - "/stats_writer/Bytes"
+        - "/stats_writer/ClientPayloads"
+        - "/stats_writer/Errors"
+        - "/stats_writer/Payloads"
+        - "/stats_writer/Retries"
+        - "/stats_writer/Splits"
+        - "/stats_writer/StatsBuckets"
+        - "/stats_writer/StatsEntries"
+        - "/trace_writer/Bytes"
+        - "/trace_writer/BytesUncompressed"
+        - "/trace_writer/Errors"
+        - "/trace_writer/Events"
+        - "/trace_writer/Payloads"
+        - "/trace_writer/Retries"
+        - "/trace_writer/SingleMaxSize"
+        - "/trace_writer/Spans"
+        - "/trace_writer/Traces"
+        - "/uptime"
+        - "/validate_v1"
+        - "/version/Version"
+        - "/version/GitCommit"
+        - "/watchdog/CPU/UserAvg"
+        - "/watchdog/Mem/Alloc"
+      tags:
+        sub_agent: "trace"
@@ -0,0 +1,74 @@
+auth_token_file_path: /tmp/agent-auth-token
+hostname: smp-regression
+
+dd_url: http://127.0.0.1:9092
+
+confd_path: /etc/datadog-agent/conf.d
+
+# Disable cloud detection. This stops the Agent from poking around the
+# execution environment & network. This is particularly important if the target
+# has network access.
+cloud_provider_metadata: []
+
+dogstatsd_socket: '/tmp/dsd.socket'
+
+logs_enabled: true
+
+apm_config:
+  enabled: true
+
+process_config:
+  process_collection:
+    enabled: true
+  container_collection:
+    enabled: true
+
+network_path:
+  connections_monitoring:
+    enabled: true
+
+runtime_security_config:
+  ## Set to true to enable Threat Detection
+  enabled: true
+
+cluster_checks:
+  enabled: true
+
+otlp_config:
+  metrics:
+    enabled: true
+  traces:
+    enabled: true
+  logs:
+    enabled: true
+
+system_probe_config:
+  enabled: true
+
+network_config:
+  enabled: true
+
+# Per Cloud Security Management setup documentation
+# https://docs.datadoghq.com/security/cloud_security_management/setup/agent/linux/
+remote_configuration:
+  # SMP environment does not support remote config currently.
+  enabled: false
+
+compliance_config:
+  ## Set to true to enable CIS benchmarks for Misconfigurations.
+  enabled: true
+  host_benchmarks:
+    enabled: true
+
+# Vulnerabilities are evaluated and scanned against your containers and hosts every hour.
+sbom:
+  enabled: true
+  # Set to true to enable Container Vulnerability Management
+  container_image:
+    enabled: true
+  # Set to true to enable Host Vulnerability Management
+  host:
+    enabled: true
+
+container_image:
+  enabled: true
@@ -0,0 +1,13 @@
+# Per https://docs.datadoghq.com/security/cloud_security_management/setup/agent/linux/
+runtime_security_config:
+  ## @param enabled - boolean - optional - default: false
+  ## Set to true to enable Threat Detection
+  enabled: true
+
+compliance_config:
+  ## @param enabled - boolean - optional - default: false
+  ## Set to true to enable CIS benchmarks for Misconfigurations.
+  #
+  enabled: true
+  host_benchmarks:
+    enabled: true
@@ -0,0 +1,10 @@
+# Per https://docs.datadoghq.com/security/cloud_security_management/setup/agent/linux/
+
+runtime_security_config:
+  ## @param enabled - boolean - optional - default: false
+  ## Set to true to enable Threat Detection
+  enabled: true
+
+  remote_configuration:
+    ## @param enabled - boolean - optional - default: false
+    enabled: true
@@ -0,0 +1,52 @@
+# Agent 'all features enabled' idle experiment. Represents an agent install with
+# all sub-agents enabled in configuration and no active workload.
+
+optimization_goal: memory
+erratic: false
+
+target:
+  name: datadog-agent
+  command: /bin/entrypoint.sh
+
+  environment:
+    DD_TELEMETRY_ENABLED: true
+    DD_API_KEY: 00000001
+    DD_HOSTNAME: smp-regression
+    DD_DD_URL: http://127.0.0.1:9092
+
+  profiling_environment:
+    # internal profiling
+    DD_INTERNAL_PROFILING_ENABLED: true
+    DD_SYSTEM_PROBE_INTERNAL_PROFILING_ENABLED: true
+    # run all the time
+    DD_SYSTEM_PROBE_INTERNAL_PROFILING_PERIOD: 1m
+    DD_INTERNAL_PROFILING_PERIOD: 1m
+    DD_SYSTEM_PROBE_INTERNAL_PROFILING_CPU_DURATION: 1m
+    DD_INTERNAL_PROFILING_CPU_DURATION: 1m
+    # destination
+    DD_INTERNAL_PROFILING_UNIX_SOCKET: /var/run/datadog/apm.socket
+    DD_SYSTEM_PROBE_CONFIG_INTERNAL_PROFILING_UNIX_SOCKET: /var/run/datadog/apm.socket
+    # tags
+    DD_INTERNAL_PROFILING_EXTRA_TAGS: experiment:quality_gate_idle_all_features
+    DD_SYSTEM_PROBE_CONFIG_INTERNAL_PROFILING_EXTRA_TAGS: experiment:quality_gate_idle_all_features
+
+    DD_INTERNAL_PROFILING_BLOCK_PROFILE_RATE: 10000
+    DD_INTERNAL_PROFILING_DELTA_PROFILES: true
+    DD_INTERNAL_PROFILING_ENABLE_GOROUTINE_STACKTRACES: true
+    DD_INTERNAL_PROFILING_MUTEX_PROFILE_FRACTION: 10
+
+    # ddprof options
+    DD_PROFILING_EXECUTION_TRACE_ENABLED: true
+    DD_PROFILING_EXECUTION_TRACE_PERIOD: 1m
+    DD_PROFILING_WAIT_PROFILE: true
+
+checks:
+  - name: memory_usage
+    description: "Memory usage quality gate. This puts a bound on the total agent memory usage."
+    bounds:
+      series: total_rss_bytes
+      upper_bound: "785.0 MiB"
+
+report_links:
+  - text: "bounds checks dashboard"
+    link: "https://app.datadoghq.com/dashboard/vz3-jd5-bdi?fromUser=true&refresh_mode=paused&tpl_var_experiment%5B0%5D={{ experiment }}&tpl_var_job_id%5B0%5D={{ job_id }}&tpl_var_run-id%5B0%5D={{ job_id }}&view=spans&from_ts={{ start_time_ms }}&to_ts={{ end_time_ms }}&live=false"