From c0b7fe41a1939204edf67c2aef761372cd92841f Mon Sep 17 00:00:00 2001
From: Jason Jorgensen <jjorgensen@ojolabs.com>
Date: Fri, 13 May 2022 12:17:38 -0500
Subject: [PATCH] adding an alternative Datadog metric format 'servicetags' to
 limit the number of metrics that are created and make monitoring them easier.
 minor fixes so docs could build

---
 awslimitchecker/metrics/datadog.py            |  75 ++++++++++--
 awslimitchecker/tests/metrics/test_datadog.py | 115 ++++++++++++++++++
 docs/source/cli_usage.rst                     |  46 +++++--
 docs/source/cli_usage.rst.template            |  20 +++
 docs/source/getting_started.rst               |   2 +-
 docs/source/limits.rst                        |   4 +-
 6 files changed, 235 insertions(+), 27 deletions(-)
diff --git a/awslimitchecker/metrics/datadog.py b/awslimitchecker/metrics/datadog.py
index c2b5fc3a..b110b64b 100644
--- a/awslimitchecker/metrics/datadog.py
+++ b/awslimitchecker/metrics/datadog.py
@@ -53,7 +53,8 @@ class Datadog(MetricsProvider):
 
     def __init__(
         self, region_name, prefix='awslimitchecker.', api_key=None,
-        extra_tags=None, host='https://api.datadoghq.com'
+        extra_tags=None, host='https://api.datadoghq.com',
+        metric_format='original'
     ):
         """
         Initialize the Datadog metrics provider. This class does not have any
@@ -76,6 +77,15 @@ def __init__(
         :param extra_tags: CSV list of additional tags to send with metrics.
           All metrics will automatically be tagged with ``region:<region name>``
         :type extra_tags: str
+        :param metric_format: ``original`` default format has a metric name
+          `awslimitchecker.s3.buckets.limit` per service limit `S3 / Buckets`.
+          ``servicetags`` format uses only 2 metric names
+          `awslimitchecker.limit` and `awslimitchecker.max_usage` with the
+          limit as a tag, `awslimitchecker.limit ['service_limit:s3.buckets']`.
+          This can be used with a single monitor with a single query
+          `avg(last_4h):avg:awslimitchecker.max_usage{*} by {service_limit} /
+          avg:awslimitchecker.limit{*} by {service_limit} * 100 > 95`
+        :type metric_format: str
         """
         super(Datadog, self).__init__(region_name)
         self._prefix = prefix
@@ -84,6 +94,7 @@ def __init__(
             self._tags.extend(extra_tags.split(','))
         self._api_key = os.environ.get('DATADOG_API_KEY')
         self._host = os.environ.get('DATADOG_HOST', host)
+        self._metric_format = metric_format
         if api_key is not None:
             self._api_key = api_key
         if self._api_key is None:
@@ -122,6 +133,19 @@ def _name_for_metric(self, service, limit):
             re.sub(r'[^0-9a-zA-Z]+', '_', limit)
         )).lower()
 
+    def _name_for_datadog(self, name):
+        """
+        Return a name that's safe for datadog
+
+        :param name: service or limit or other name
+        :type service: str
+        :return: datadog safe name
+        :rtype: str
+        """
+        return ('%s' % (
+            re.sub(r'[^0-9a-zA-Z]+', '_', name)
+        )).lower()
+
     def flush(self):
         ts = int(time.time())
         logger.debug('Flushing metrics to Datadog.')
@@ -137,21 +161,50 @@ def flush(self):
                 max_usage = 0
             else:
                 max_usage = max(u).get_value()
-            mname = self._name_for_metric(lim.service.service_name, lim.name)
-            series.append({
-                'metric': '%s.max_usage' % mname,
-                'points': [[ts, max_usage]],
-                'type': 'gauge',
-                'tags': self._tags
-            })
             limit = lim.get_limit()
-            if limit is not None:
+
+            if self._metric_format == 'original':
+                mname = self._name_for_metric(
+                    lim.service.service_name, lim.name)
                 series.append({
-                    'metric': '%s.limit' % mname,
-                    'points': [[ts, limit]],
+                    'metric': '%s.max_usage' % mname,
+                    'points': [[ts, max_usage]],
                     'type': 'gauge',
                     'tags': self._tags
                 })
+                if limit is not None:
+                    series.append({
+                        'metric': '%s.limit' % mname,
+                        'points': [[ts, limit]],
+                        'type': 'gauge',
+                        'tags': self._tags
+                    })
+            elif self._metric_format == 'servicetags':
+                mtags = self._tags.copy()
+                mtags.extend(['service:%s' %
+                              self._name_for_datadog(lim.service.service_name)])
+                mtags.extend(['service_limit:%s.%s' %
+                              (self._name_for_datadog(lim.service.service_name),
+                               self._name_for_datadog(lim.name))])
+                series.append({
+                    'metric': '%smax_usage' % self._prefix,
+                    'points': [[ts, max_usage]],
+                    'type': 'gauge',
+                    'tags': mtags
+                })
+                limit = lim.get_limit()
+                if limit is not None:
+                    series.append({
+                        'metric': '%slimit' % self._prefix,
+                        'points': [[ts, limit]],
+                        'type': 'gauge',
+                        'tags': mtags
+                    })
+            else:
+                raise RuntimeError(
+                    "ERROR: Datadog metric provider metric_format must "
+                    "be 'original' or 'servicetags'."
+                )
         logger.info('POSTing %d metrics to datadog', len(series))
         data = {'series': series}
         encoded = json.dumps(data).encode('utf-8')
diff --git a/awslimitchecker/tests/metrics/test_datadog.py b/awslimitchecker/tests/metrics/test_datadog.py
index 77ebf592..09c7ef93 100644
--- a/awslimitchecker/tests/metrics/test_datadog.py
+++ b/awslimitchecker/tests/metrics/test_datadog.py
@@ -171,6 +171,7 @@ def setup(self):
             m_init.return_value = None
             self.cls = Datadog()
             self.cls._host = 'https://api.datadoghq.com'
+            self.cls._metric_format = 'original'
 
 
 class TestValidateAuth(DatadogTester):
@@ -228,6 +229,14 @@ def test_simple(self):
         ) == 'foobar.service_name_.limit_name_'
 
 
+class TestSafeName(DatadogTester):
+
+    def test_simple(self):
+        assert self.cls._name_for_datadog(
+            'limit* NAME .'
+        ) == 'limit_name_'
+
+
 class TestFlush(DatadogTester):
 
     @freeze_time("2016-12-16 10:40:42", tz_offset=0, auto_tick_seconds=6)
@@ -297,6 +306,112 @@ def test_happy_path(self):
         assert c[2]['headers'] == {'Content-type': 'application/json'}
         assert json.loads(c[2]['body'].decode()) == expected
 
+    @freeze_time("2016-12-16 10:40:42", tz_offset=0, auto_tick_seconds=6)
+    def test_servicetags_format(self):
+        self.cls._prefix = 'prefix.'
+        self.cls._tags = ['tag1', 'tag:2']
+        self.cls._limits = []
+        self.cls._api_key = 'myKey'
+        self.cls._metric_format = 'servicetags'
+        self.cls.set_run_duration(123.45)
+        limA = Mock(
+            name='limitA', service=Mock(service_name='SVC1')
+        )
+        type(limA).name = 'limitA'
+        limA.get_current_usage.return_value = []
+        limA.get_limit.return_value = None
+        self.cls.add_limit(limA)
+        limB = Mock(
+            name='limitB', service=Mock(service_name='SVC1')
+        )
+        type(limB).name = 'limitB'
+        mocku = Mock()
+        mocku.get_value.return_value = 6
+        limB.get_current_usage.return_value = [mocku]
+        limB.get_limit.return_value = 10
+        self.cls.add_limit(limB)
+        mock_http = Mock()
+        mock_resp = Mock(status=200, data='{"status": "ok"}')
+        mock_http.request.return_value = mock_resp
+        self.cls._http = mock_http
+        self.cls.flush()
+        ts = 1481884842
+        expected = {
+            'series': [
+                {
+                    'metric': 'prefix.runtime',
+                    'points': [[ts, 123.45]],
+                    'type': 'gauge',
+                    'tags': ['tag1', 'tag:2']
+                },
+                {
+                    'metric': 'prefix.max_usage',
+                    'points': [[ts, 0]],
+                    'type': 'gauge',
+                    'tags': ['tag1', 'tag:2',
+                             'service:svc1', 'service_limit:svc1.limita']
+                },
+                {
+                    'metric': 'prefix.max_usage',
+                    'points': [[ts, 6]],
+                    'type': 'gauge',
+                    'tags': ['tag1', 'tag:2',
+                             'service:svc1', 'service_limit:svc1.limitb']
+                },
+                {
+                    'metric': 'prefix.limit',
+                    'points': [[ts, 10]],
+                    'type': 'gauge',
+                    'tags': ['tag1', 'tag:2',
+                             'service:svc1', 'service_limit:svc1.limitb']
+                }
+            ]
+        }
+        assert len(mock_http.mock_calls) == 1
+        c = mock_http.mock_calls[0]
+        assert c[0] == 'request'
+        assert c[1] == (
+            'POST', 'https://api.datadoghq.com/api/v1/series?api_key=myKey'
+        )
+        assert len(c[2]) == 2
+        assert c[2]['headers'] == {'Content-type': 'application/json'}
+        assert json.loads(c[2]['body'].decode()) == expected
+
+    @freeze_time("2016-12-16 10:40:42", tz_offset=0, auto_tick_seconds=6)
+    def test_invalid_format(self):
+        self.cls._prefix = 'prefix.'
+        self.cls._tags = ['tag1', 'tag:2']
+        self.cls._limits = []
+        self.cls._api_key = 'myKey'
+        self.cls._metric_format = 'invalidformat'
+        self.cls.set_run_duration(123.45)
+        limA = Mock(
+            name='limitA', service=Mock(service_name='SVC1')
+        )
+        type(limA).name = 'limitA'
+        limA.get_current_usage.return_value = []
+        limA.get_limit.return_value = None
+        self.cls.add_limit(limA)
+        limB = Mock(
+            name='limitB', service=Mock(service_name='SVC1')
+        )
+        type(limB).name = 'limitB'
+        mocku = Mock()
+        mocku.get_value.return_value = 6
+        limB.get_current_usage.return_value = [mocku]
+        limB.get_limit.return_value = 10
+        self.cls.add_limit(limB)
+        mock_http = Mock()
+        mock_resp = Mock(status=200, data='{"status": "ok"}')
+        mock_http.request.return_value = mock_resp
+        self.cls._http = mock_http
+        with pytest.raises(RuntimeError) as exc:
+            self.cls.flush()
+        assert str(exc.value) == "ERROR: Datadog metric provider " \
+                                 "metric_format must be " \
+                                 "'original' or 'servicetags'."
+        assert len(mock_http.mock_calls) == 0
+
     @freeze_time("2016-12-16 10:40:42", tz_offset=0, auto_tick_seconds=6)
     def test_api_error_non_default_host(self):
         self.cls._prefix = 'prefix.'
diff --git a/docs/source/cli_usage.rst b/docs/source/cli_usage.rst
index eb8d752b..7362f809 100644
--- a/docs/source/cli_usage.rst
+++ b/docs/source/cli_usage.rst
@@ -222,13 +222,13 @@ and limits followed by ``(API)`` have been obtained from the service's API.
 .. code-block:: console
 
    (venv)$ awslimitchecker -l
-   ApiGateway/API keys per account                                           500.0 (Quotas)
+   ApiGateway/API keys per account                                           10000.0 (Quotas)
    ApiGateway/Client certificates per account                                60.0 (Quotas)
    ApiGateway/Custom authorizers per API                                     10
    ApiGateway/Documentation parts per API                                    2000
    ApiGateway/Edge APIs per account                                          120.0 (Quotas)
    (...)
-   AutoScaling/Auto Scaling groups                                           200 (API)
+   AutoScaling/Auto Scaling groups                                           500 (API)
    (...)
    Lambda/Function Count                                                     None
    (...)
@@ -253,7 +253,7 @@ from the Service Quotas service.
    ApiGateway/Documentation parts per API                                    2000
    ApiGateway/Edge APIs per account                                          120
    (...)
-   AutoScaling/Auto Scaling groups                                           200 (API)
+   AutoScaling/Auto Scaling groups                                           500 (API)
    (...)
    Lambda/Function Count                                                     None
    (...)
@@ -275,13 +275,13 @@ from Trusted Advisor for all commands.
 .. code-block:: console
 
    (venv)$ awslimitchecker -l --skip-ta
-   ApiGateway/API keys per account                                           500.0 (Quotas)
+   ApiGateway/API keys per account                                           10000.0 (Quotas)
    ApiGateway/Client certificates per account                                60.0 (Quotas)
    ApiGateway/Custom authorizers per API                                     10
    ApiGateway/Documentation parts per API                                    2000
    ApiGateway/Edge APIs per account                                          120.0 (Quotas)
    (...)
-   AutoScaling/Auto Scaling groups                                           200 (API)
+   AutoScaling/Auto Scaling groups                                           500 (API)
    (...)
    Lambda/Function Count                                                     None
    (...)
@@ -344,15 +344,15 @@ using their IDs).
 .. code-block:: console
 
    (venv)$ awslimitchecker -u
-   ApiGateway/API keys per account                                           2
+   ApiGateway/API keys per account                                           0
    ApiGateway/Client certificates per account                                0
-   ApiGateway/Custom authorizers per API                                     max: 2d7q4kzcmh=2 (2d7q4kz (...)
-   ApiGateway/Documentation parts per API                                    max: 2d7q4kzcmh=2 (2d7q4kz (...)
-   ApiGateway/Edge APIs per account                                          9
+   ApiGateway/Custom authorizers per API                                     <unknown>
+   ApiGateway/Documentation parts per API                                    <unknown>
+   ApiGateway/Edge APIs per account                                          0
    (...)
-   VPC/Subnets per VPC                                                       max: vpc-f4279a92=6 (vpc-f (...)
+   VPC/Subnets per VPC                                                       max: vpc-02031d86da0b6d120 (...)
    VPC/VPCs                                                                  2
-   VPC/Virtual private gateways                                              1
+   VPC/Virtual private gateways                                              0
 
 
 
@@ -377,7 +377,7 @@ For example, to override the limits of EC2's "EC2-Classic Elastic IPs" and
 .. code-block:: console
 
    (venv)$ awslimitchecker -L "AutoScaling/Auto Scaling groups"=321 --limit="AutoScaling/Launch configurations"=456 -l
-   ApiGateway/API keys per account                                           500.0 (Quotas)
+   ApiGateway/API keys per account                                           10000.0 (Quotas)
    ApiGateway/Client certificates per account                                60.0 (Quotas)
    ApiGateway/Custom authorizers per API                                     10
    ApiGateway/Documentation parts per API                                    2000
@@ -412,7 +412,7 @@ Using a command like:
 .. code-block:: console
 
    (venv)$ awslimitchecker --limit-override-json=limit_overrides.json -l
-   ApiGateway/API keys per account                                           500.0 (Quotas)
+   ApiGateway/API keys per account                                           10000.0 (Quotas)
    ApiGateway/Client certificates per account                                60.0 (Quotas)
    ApiGateway/Custom authorizers per API                                     10
    ApiGateway/Documentation parts per API                                    2000
@@ -574,6 +574,26 @@ environment variable) and an optional ``extra_tags`` parameter:
 Metrics will be pushed to the provider only when awslimitchecker is done checking
 all limits.
 
+There is also an alternative metric format for
+:py:class:`~awslimitchecker.metrics.datadog.Datadog` metrics provider which
+uses only two metrics ``awslimitchecker.limit`` and
+``awslimitchecker.max_usage``. All service limits are added as tags to these
+metrics.
+
+To use this alternative format add optional parameter ``metric_format=servicetags``:
+
+.. code-block:: console
+
+    (venv)$ awslimitchecker \
+        --metrics-provider=Datadog \
+        --metrics-config=api_key=123456 \
+        --metrics-config=extra_tags=foo,bar,baz:blam \
+        --metrics-config=metric_format=servicetags
+
+You can use the following query with one Datadog monitor for all service limits.
+``avg(last_4h):avg:awslimitchecker.max_usage{*} by {service_limit} /
+avg:awslimitchecker.limit{*} by {service_limit} * 100 > 95``
+
 .. _cli_usage.alerts:
 
 Enable Alerts Provider
diff --git a/docs/source/cli_usage.rst.template b/docs/source/cli_usage.rst.template
index 38a0a5bf..657b9fb5 100644
--- a/docs/source/cli_usage.rst.template
+++ b/docs/source/cli_usage.rst.template
@@ -273,6 +273,26 @@ environment variable) and an optional ``extra_tags`` parameter:
 Metrics will be pushed to the provider only when awslimitchecker is done checking
 all limits.
 
+There is also an alternative metric format for
+:py:class:`~awslimitchecker.metrics.datadog.Datadog` metrics provider which
+uses only two metrics ``awslimitchecker.limit`` and
+``awslimitchecker.max_usage``. All service limits are added as tags to these
+metrics.
+
+To use this alternative format add optional parameter ``metric_format=servicetags``:
+
+.. code-block:: console
+
+    (venv)$ awslimitchecker \
+        --metrics-provider=Datadog \
+        --metrics-config=api_key=123456 \
+        --metrics-config=extra_tags=foo,bar,baz:blam \
+        --metrics-config=metric_format=servicetags
+
+You can use the following query with one Datadog monitor for all service limits.
+``avg(last_4h):avg:awslimitchecker.max_usage{*} by {service_limit} /
+avg:awslimitchecker.limit{*} by {service_limit} * 100 > 95``
+
 .. _cli_usage.alerts:
 
 Enable Alerts Provider
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
index 7aa10b31..d2aaa403 100644
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -179,7 +179,7 @@ Trusted Advisor
 
 awslimitchecker supports retrieving your current service limits via the
 `Trusted Advisor <https://aws.amazon.com/premiumsupport/trustedadvisor/>`_
-`"Service Limits" performance check <https://aws.amazon.com/premiumsupport/trustedadvisor/best-practices/#Performance>`_
+`"Service Limits" performance check <https://docs.aws.amazon.com/awssupport/latest/user/performance-checks.html>`_
 , for limits which Trusted Advisor tracks (currently a subset of what awslimitchecker
 knows about). The results of this check may not be available via the API for all
 accounts; as of December 2016, the Trusted Advisor documentation states that while
diff --git a/docs/source/limits.rst b/docs/source/limits.rst
index 76b10121..82dff6dc 100644
--- a/docs/source/limits.rst
+++ b/docs/source/limits.rst
@@ -222,7 +222,7 @@ type.
 Limit                                                                Trusted Advisor Quotas   API     Default
 ==================================================================== =============== ======== ======= ====
 All F Spot Instance Requests                                                         |check|          11  
-All G Spot Instance Requests                                                         |check|          11  
+All G Spot Instance Requests                                                                          11  
 All Inf Spot Instance Requests                                                       |check|          64  
 All P Spot Instance Requests                                                         |check|          16  
 All Standard (A, C, D, H, I, M, R, T, Z) Spot Instance Requests                      |check|          1440
@@ -234,7 +234,7 @@ Max target capacity for all spot fleets in region
 Max target capacity per spot fleet                                                                    3000
 Rules per VPC security group                                                         |check|          60  
 Running On-Demand All F instances                                                    |check|          128 
-Running On-Demand All G instances                                                    |check|          128 
+Running On-Demand All G instances                                                                     128 
 Running On-Demand All P instances                                                    |check|          128 
 Running On-Demand All Standard (A, C, D, H, I, M, R, T, Z) instances                 |check|          1152
 Running On-Demand All X instances                                                    |check|          128