From 44eabe92e6f4c693266f66f891a7e5bcb88dcd35 Mon Sep 17 00:00:00 2001
From: notoraptor <notoraptor@users.noreply.github.com>
Date: Fri, 5 Apr 2024 13:56:21 -0400
Subject: [PATCH] SARC-271: Update data frame with RGU data (#113)

* SARC-271: Update data frame with RGU data

* Log warnings if RGU start date or RGU/GPU ratios are not available.

* Rename function to update_cluster_job_series_rgu and make sure only jobs related to given cluster name are updated.

Add new function update_job_series_rgu to apply changes on data frame for all clusters.

* Log a warning if RGU file is not specified and another warning if RGU file is not found.

---------

Co-authored-by: Bruno Carrez <bruno.carrez@mila.quebec>
---
 config/sarc-dev.json                          |  16 +-
 config/sarc-prod.json                         |  16 +-
 sarc/config.py                                |   2 +
 sarc/jobs/series.py                           | 173 ++++++++-
 .../jobs/test_func_update_job_series_rgu.py   | 360 ++++++++++++++++++
 ...ate_job_series_rgu_with_real_test_data.txt |  41 ++
 .../gpu_to_rgu_billing.json                   |   4 +
 .../gpu_to_rgu_billing_empty.json             |   1 +
 .../patate_prometheus/gpu_to_rgu_billing.json |   3 +
 .../raisin_prometheus/gpu_to_rgu_billing.json |  11 +
 tests/sarc-test.json                          |  18 +-
 11 files changed, 631 insertions(+), 14 deletions(-)
 create mode 100644 tests/functional/jobs/test_func_update_job_series_rgu.py
 create mode 100644 tests/functional/jobs/test_func_update_job_series_rgu/test_update_job_series_rgu_with_real_test_data.txt
 create mode 100644 tests/not-so-secrets/fromage_prometheus/gpu_to_rgu_billing.json
 create mode 100644 tests/not-so-secrets/gerudo_prometheus/gpu_to_rgu_billing_empty.json
 create mode 100644 tests/not-so-secrets/patate_prometheus/gpu_to_rgu_billing.json
 create mode 100644 tests/not-so-secrets/raisin_prometheus/gpu_to_rgu_billing.json

diff --git a/config/sarc-dev.json b/config/sarc-dev.json
index 350f44c6..bde70c31 100644
--- a/config/sarc-dev.json
+++ b/config/sarc-dev.json
@@ -40,7 +40,9 @@
             "diskusage_report_command": "diskusage_report --project --all_users",
             "prometheus_url": "https://mila-thanos.calculquebec.ca",
             "prometheus_headers_file": "secrets/drac_prometheus/headers.json",
-            "start_date": "2022-04-01"
+            "start_date": "2022-04-01",
+            "rgu_start_date": "2023-11-28",
+            "gpu_to_rgu_billing": "secrets/gpu_to_rgu_billing_narval.json"
         },
         "beluga": {
             "host": "beluga.computecanada.ca",
@@ -52,7 +54,9 @@
             "diskusage_report_command": "diskusage_report --project --all_users",
             "prometheus_url": "https://mila-thanos.calculquebec.ca",
             "prometheus_headers_file": "secrets/drac_prometheus/headers.json",
-            "start_date": "2022-04-01"
+            "start_date": "2022-04-01",
+            "rgu_start_date": "2024-04-03",
+            "gpu_to_rgu_billing": "secrets/gpu_to_rgu_billing_beluga.json"
         },
         "graham": {
             "host": "graham.computecanada.ca",
@@ -65,7 +69,9 @@
             "prometheus_url": null,
             "prometheus_headers_file": null,
             "start_date": "2022-04-01",
-            "nodes_info_file": "secrets/nodes_graham.txt"
+            "nodes_info_file": "secrets/nodes_graham.txt",
+            "rgu_start_date": "2024-04-03",
+            "gpu_to_rgu_billing": "secrets/gpu_to_rgu_billing_graham.json"
         },
         "cedar": {
             "host": "cedar.computecanada.ca",
@@ -78,7 +84,9 @@
             "prometheus_url": null,
             "prometheus_headers_file": null,
             "start_date": "2022-04-01",
-            "nodes_info_file": "secrets/nodes_cedar.txt"
+            "nodes_info_file": "secrets/nodes_cedar.txt",
+            "rgu_start_date": "2024-04-03",
+            "gpu_to_rgu_billing": "secrets/gpu_to_rgu_billing_cedar.json"
         }
     }
 }
diff --git a/config/sarc-prod.json b/config/sarc-prod.json
index aeb952c6..543b3bb6 100644
--- a/config/sarc-prod.json
+++ b/config/sarc-prod.json
@@ -40,7 +40,9 @@
             "diskusage_report_command": "diskusage_report --project --all_users",
             "prometheus_url": "https://mila-thanos.calculquebec.ca",
             "prometheus_headers_file": "secrets/drac_prometheus/headers.json",
-            "start_date": "2022-04-01"
+            "start_date": "2022-04-01",
+            "rgu_start_date": "2023-11-28",
+            "gpu_to_rgu_billing": "secrets/gpu_to_rgu_billing_narval.json"
         },
         "beluga": {
             "host": "beluga.computecanada.ca",
@@ -52,7 +54,9 @@
             "diskusage_report_command": "diskusage_report --project --all_users",
             "prometheus_url": "https://mila-thanos.calculquebec.ca",
             "prometheus_headers_file": "secrets/drac_prometheus/headers.json",
-            "start_date": "2022-04-01"
+            "start_date": "2022-04-01",
+            "rgu_start_date": "2024-04-03",
+            "gpu_to_rgu_billing": "secrets/gpu_to_rgu_billing_beluga.json"
         },
         "graham": {
             "host": "graham.computecanada.ca",
@@ -65,7 +69,9 @@
             "prometheus_url": null,
             "prometheus_headers_file": null,
             "start_date": "2022-04-01",
-            "nodes_info_file": "secrets/nodes_graham.txt"
+            "nodes_info_file": "secrets/nodes_graham.txt",
+            "rgu_start_date": "2024-04-03",
+            "gpu_to_rgu_billing": "secrets/gpu_to_rgu_billing_graham.json"
         },
         "cedar": {
             "host": "cedar.computecanada.ca",
@@ -78,7 +84,9 @@
             "prometheus_url": null,
             "prometheus_headers_file": null,
             "start_date": "2022-04-01",
-            "nodes_info_file": "secrets/nodes_cedar.txt"
+            "nodes_info_file": "secrets/nodes_cedar.txt",
+            "rgu_start_date": "2024-04-03",
+            "gpu_to_rgu_billing": "secrets/gpu_to_rgu_billing_cedar.json"
         }
     }
 }
diff --git a/sarc/config.py b/sarc/config.py
index 8562cc5d..71458c97 100644
--- a/sarc/config.py
+++ b/sarc/config.py
@@ -83,6 +83,8 @@ class ClusterConfig(BaseModel):
     duc_storage_command: str = None
     diskusage_report_command: str = None
     start_date: str = "2022-04-01"
+    rgu_start_date: str = None
+    gpu_to_rgu_billing: Path = None
 
     @validator("timezone")
     def _timezone(cls, value):
diff --git a/sarc/jobs/series.py b/sarc/jobs/series.py
index f47c408a..7b083491 100644
--- a/sarc/jobs/series.py
+++ b/sarc/jobs/series.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
 
+import json
+import logging
+import os.path
 from datetime import datetime, timedelta
 from typing import TYPE_CHECKING, Callable
 
@@ -9,7 +12,7 @@
 from prometheus_api_client import MetricRangeDataFrame
 from tqdm import tqdm
 
-from sarc.config import MTL, UTC
+from sarc.config import MTL, UTC, ClusterConfig, config
 from sarc.jobs.job import JobStatistics, Statistics, count_jobs, get_jobs
 from sarc.traces import trace_decorator
 
@@ -401,6 +404,174 @@ def load_job_series(
     return pandas.DataFrame(rows)
 
 
+def update_cluster_job_series_rgu(
+    df: pandas.DataFrame, cluster_config: ClusterConfig
+) -> pandas.DataFrame:
+    """
+    Compute RGU information for jobs related to given cluster config in a data frame.
+
+    Parameters
+    ----------
+    df: DataFrame
+        Data frame to update, typically returned by `load_job_series`.
+        Should contain fields:
+        "cluster_name", "start_time", "allocated.gpu_type", "allocated.gres_gpu".
+    cluster_config: ClusterConfig
+        Configuration of cluster to which jobs to update belong.
+        Should define following config:
+        "rgu_start_date": date since when billing is given as RGU.
+        "gpu_to_rgu_billing": path to a JSON file containing a dict which maps
+        GPU type to RGU cost per GPU.
+
+    Returns
+    -------
+    DataFrame
+        Input data frame with:
+        - column `allocated.gres_gpu` updated if necessary.
+        - column `allocated.gres_rgu` added or updated to contain RGU billing.
+          Set to NaN (or unchanged if already present) for jobs from other clusters.
+        - column `gpu_type_rgu` added or updated to contain RGU cost per GPU (RGU/GPU ratio).
+          Set to NaN (or unchanged if already present) for jobs from other clusters.
+
+    Pseudocode describing how we update data frame:
+    for each job: if job.cluster_name == cluster_config.name:
+        if start_time < cluster_config.rgu_start_date:
+            # We are BEFORE transition to RGU
+            if allocated.gpu_type in gpu_to_rgu_billing:
+                # compute rgu columns
+                allocated.gres_rgu = allocated.gres_gpu * gpu_to_rgu_billing[allocated.gpu_type]
+                allocated.gpu_type_rgu = gpu_to_rgu_billing[allocated.gpu_type]
+            else:
+                # set rgu columns to nan
+                allocated.gres_rgu = nan
+                allocated.gpu_type_rgu = nan
+        else:
+            # We are AFTER transition to RGU
+            # Anyway, we assume gres_rgu is current gres_gpu
+            allocated.gres_rgu = allocated.gres_gpu
+
+            if allocated.gpu_type in gpu_to_rgu_billing:
+                # we fix gres_gpu by dividing it with RGU/GPU ratio
+                allocated.gres_gpu = allocated.gres_gpu / gpu_to_rgu_billing[allocated.gpu_type]
+                # we save RGU/GPU ratio
+                allocated.gpu_type_rgu = gpu_to_rgu_billing[allocated.gpu_type]
+            else:
+                # we cannot fix gres_gpu, so we set it to nan
+                allocated.gres_gpu = nan
+                # we cannot get RGU/GPU ratio, so we set it to nan
+                allocated.gpu_type_rgu = nan
+    """
+
+    # Make sure frame will have new RGU columns anyway, with NaN as default value.
+    if "allocated.gres_rgu" not in df.columns:
+        df["allocated.gres_rgu"] = np.nan
+    if "allocated.gpu_type_rgu" not in df.columns:
+        df["allocated.gpu_type_rgu"] = np.nan
+
+    if cluster_config.rgu_start_date is None:
+        logging.warning(
+            f"RGU update: no RGU start date for cluster {cluster_config.name}"
+        )
+        return df
+
+    if cluster_config.gpu_to_rgu_billing is None:
+        logging.warning(
+            f"RGU update: no RGU/GPU JSON path for cluster {cluster_config.name}"
+        )
+        return df
+
+    if not os.path.isfile(cluster_config.gpu_to_rgu_billing):
+        logging.warning(
+            f"RGU update: RGU/GPU JSON file not found for cluster {cluster_config.name} "
+            f"at: {cluster_config.gpu_to_rgu_billing}"
+        )
+        return df
+
+    # Otherwise, parse RGU start date.
+    rgu_start_date = datetime.fromisoformat(cluster_config.rgu_start_date).astimezone(
+        MTL
+    )
+
+    # Get RGU/GPU ratios.
+    with open(cluster_config.gpu_to_rgu_billing, "r", encoding="utf-8") as file:
+        gpu_to_rgu_billing = json.load(file)
+        assert isinstance(gpu_to_rgu_billing, dict)
+    if not gpu_to_rgu_billing:
+        logging.warning(
+            f"RGU update: no RGU/GPU available for cluster {cluster_config.name}"
+        )
+        return df
+
+    # We have now both RGU stare date and RGU/GPU ratios. We can update columns.
+
+    # Compute column allocated.gpu_type_rgu
+    # If a GPU type is not found in RGU/GPU ratios,
+    # then ratio will be set to NaN in output column.
+    col_ratio_rgu_by_gpu = df["allocated.gpu_type"].map(gpu_to_rgu_billing)
+
+    # Compute slices for both before and since RGU start date.
+    slice_before_rgu_time = (df["cluster_name"] == cluster_config.name) & (
+        df["start_time"] < rgu_start_date
+    )
+    slice_after_rgu_time = (df["cluster_name"] == cluster_config.name) & (
+        df["start_time"] >= rgu_start_date
+    )
+
+    # We can already set column allocated.gpu_type_rgu anyway.
+    df.loc[slice_before_rgu_time, "allocated.gpu_type_rgu"] = col_ratio_rgu_by_gpu[
+        slice_before_rgu_time
+    ]
+    df.loc[slice_after_rgu_time, "allocated.gpu_type_rgu"] = col_ratio_rgu_by_gpu[
+        slice_after_rgu_time
+    ]
+
+    # Compute allocated.gres_rgu where job started before RGU time.
+    df.loc[slice_before_rgu_time, "allocated.gres_rgu"] = (
+        df["allocated.gres_gpu"][slice_before_rgu_time]
+        * col_ratio_rgu_by_gpu[slice_before_rgu_time]
+    )
+
+    # Set allocated.gres_rgu with previous allocated.gres_gpu where job started after RGU time.
+    df.loc[slice_after_rgu_time, "allocated.gres_rgu"] = df["allocated.gres_gpu"][
+        slice_after_rgu_time
+    ]
+    # Then update allocated.gres_gpu where job started after RGU time.
+    df.loc[slice_after_rgu_time, "allocated.gres_gpu"] = (
+        df["allocated.gres_gpu"][slice_after_rgu_time]
+        / col_ratio_rgu_by_gpu[slice_after_rgu_time]
+    )
+
+    return df
+
+
+def update_job_series_rgu(df: DataFrame):
+    """
+    Compute RGU information for jobs in given data frame.
+
+    Parameters
+    ----------
+    df: DataFrame
+        Data frame to update, typically returned by `load_job_series`.
+        Should contain fields:
+         "cluster_name", "start_time", "allocated.gpu_type", "allocated.gres_gpu".
+
+    Returns
+    -------
+    DataFrame
+        Input data frame with:
+        - column `allocated.gres_gpu` updated if necessary.
+        - column `allocated.gres_rgu` added or updated to contain RGU billing.
+          Set to NaN (or unchanged if already present) for jobs from clusters without RGU.
+        - column `gpu_type_rgu` added or updated to contain RGU cost per GPU (RGU/GPU ratio).
+          Set to NaN (or unchanged if already present) for jobs from clusters without RGU.
+
+    For more details about implementation, see function `update_cluster_job_series_rgu`
+    """
+    for cluster_config in config().clusters.values():
+        update_cluster_job_series_rgu(df, cluster_config)
+    return df
+
+
 def _select_stat(name, dist):
     if not dist:
         return np.nan
diff --git a/tests/functional/jobs/test_func_update_job_series_rgu.py b/tests/functional/jobs/test_func_update_job_series_rgu.py
new file mode 100644
index 00000000..c38ac58e
--- /dev/null
+++ b/tests/functional/jobs/test_func_update_job_series_rgu.py
@@ -0,0 +1,360 @@
+import json
+from datetime import datetime
+from pprint import pformat
+from typing import Dict
+
+import numpy as np
+import pandas
+import pytest
+
+from sarc.config import MTL, ClusterConfig, config
+from sarc.jobs.series import (
+    load_job_series,
+    update_cluster_job_series_rgu,
+    update_job_series_rgu,
+)
+
+from .test_func_load_job_series import MOCK_TIME
+
+
+def _gen_data_frame(
+    cluster_names: list, start_times=[], gres_gpu: list = [], gpu_type: list = []
+):
+    """Generate a data frame suited for RGU tests."""
+    assert len(cluster_names) == len(start_times) == len(gres_gpu) == len(gpu_type)
+    rows = [
+        {
+            "cluster_name": cluster_name,
+            "start_time": start_time,
+            "allocated.gres_gpu": gres_gpu,
+            "allocated.gpu_type": gpu_type,
+        }
+        for cluster_name, start_time, gres_gpu, gpu_type in zip(
+            cluster_names, start_times, gres_gpu, gpu_type
+        )
+    ]
+    frame = pandas.DataFrame(rows)
+    assert frame.shape == (len(gres_gpu), 4 if len(gres_gpu) else 0)
+    return frame
+
+
+def _read_json(filename):
+    with open(filename, "r", encoding="utf-8") as file:
+        return json.load(file)
+
+
+# Below, we generate fixtures for cluster configs used in these tests.
+# There are 5 clusters:
+# - no rgu date, no RGU mapping
+# - no rgu date, only RGU mapping
+# - only rgu date, no RGU mapping
+# - rgu date, empty RGU mapping
+# - rgu date, RGU mapping
+# With 4 first configs, frame should not be updated,
+# as either rgu date is missing or RGU mapping is missing or empty.
+# With 5th config, frame should be updated, as all required data are available.
+
+
+@pytest.mark.usefixtures("read_only_db", "tzlocal_is_mtl")
+@pytest.fixture
+def clusters_config():
+    clusters: Dict[str, ClusterConfig] = config().clusters
+    return clusters
+
+
+@pytest.fixture
+def cluster_no_rgu(clusters_config):
+    return clusters_config["hyrule"]
+
+
+@pytest.fixture
+def cluster_only_rgu_start_date(clusters_config):
+    return clusters_config["local"]
+
+
+@pytest.fixture
+def cluster_only_rgu_billing(clusters_config):
+    return clusters_config["patate"]
+
+
+@pytest.fixture
+def cluster_full_rgu_empty_billing(clusters_config):
+    return clusters_config["gerudo"]
+
+
+@pytest.fixture
+def cluster_full_rgu(clusters_config):
+    return clusters_config["raisin"]
+
+
+@pytest.mark.usefixtures("read_only_db", "tzlocal_is_mtl")
+def test_clusters_rgu_config(
+    cluster_no_rgu,
+    cluster_only_rgu_start_date,
+    cluster_only_rgu_billing,
+    cluster_full_rgu_empty_billing,
+    cluster_full_rgu,
+):
+    """Just check clusters config."""
+    assert cluster_no_rgu.rgu_start_date is None
+    assert cluster_no_rgu.gpu_to_rgu_billing is None
+
+    assert cluster_only_rgu_start_date.rgu_start_date is not None
+    assert cluster_only_rgu_start_date.gpu_to_rgu_billing is None
+
+    assert cluster_only_rgu_billing.rgu_start_date is None
+    assert cluster_only_rgu_billing.gpu_to_rgu_billing is not None
+
+    assert cluster_full_rgu_empty_billing.rgu_start_date is not None
+    assert cluster_full_rgu_empty_billing.gpu_to_rgu_billing is not None
+    assert _read_json(cluster_full_rgu_empty_billing.gpu_to_rgu_billing) == {}
+
+    assert cluster_full_rgu.rgu_start_date is not None
+    assert cluster_full_rgu.gpu_to_rgu_billing is not None
+    gpu_to_rgu_billing = _read_json(cluster_full_rgu.gpu_to_rgu_billing)
+    assert isinstance(gpu_to_rgu_billing, dict)
+    assert len(gpu_to_rgu_billing)
+
+
+@pytest.mark.usefixtures("read_only_db", "tzlocal_is_mtl")
+def test_data_frame_output_size(
+    cluster_no_rgu,
+    cluster_only_rgu_start_date,
+    cluster_only_rgu_billing,
+    cluster_full_rgu_empty_billing,
+    cluster_full_rgu,
+):
+    """
+    Check that nothing is computed if cluster does not have both
+    RGU start time and non-empty RGU/GPU ratio JSON file.
+    """
+    cluster_names = ["raisin"] * 5
+    start_times = [
+        datetime.strptime(date, "%Y-%m-%d").astimezone(MTL)
+        for date in (
+            "2023-02-14",
+            "2023-02-15",
+            "2023-02-16",
+            "2023-02-17",
+            "2023-02-18",
+        )
+    ]
+    gres_gpu = [1, 2, 3, 4, 5]
+    gpu_type = [
+        "raisin_gpu_1",
+        "raisin_gpu_2",
+        "raisin_gpu_3",
+        "raisin_gpu_4",
+        "raisin_gpu_5",
+    ]
+
+    nans = pandas.Series([np.nan] * 5)
+
+    frame = _gen_data_frame(cluster_names, start_times, gres_gpu, gpu_type)
+    assert frame.shape == (5, 4)
+    assert "allocated.gres_rgu" not in frame.columns
+    assert "allocated.gpu_type_rgu" not in frame.columns
+
+    update_cluster_job_series_rgu(frame, cluster_no_rgu)
+    assert frame.shape == (5, 6)
+    assert frame["allocated.gres_rgu"].equals(nans)
+    assert frame["allocated.gpu_type_rgu"].equals(nans)
+
+    update_cluster_job_series_rgu(frame, cluster_only_rgu_start_date)
+    assert frame.shape == (5, 6)
+    assert frame["allocated.gres_rgu"].equals(nans)
+    assert frame["allocated.gpu_type_rgu"].equals(nans)
+
+    update_cluster_job_series_rgu(frame, cluster_only_rgu_billing)
+    assert frame.shape == (5, 6)
+    assert frame["allocated.gres_rgu"].equals(nans)
+    assert frame["allocated.gpu_type_rgu"].equals(nans)
+
+    update_cluster_job_series_rgu(frame, cluster_full_rgu_empty_billing)
+    assert frame.shape == (5, 6)
+    assert frame["allocated.gres_rgu"].equals(nans)
+    assert frame["allocated.gpu_type_rgu"].equals(nans)
+
+    # Then, with full config, we should have updates.
+    update_cluster_job_series_rgu(frame, cluster_full_rgu)
+    assert frame.shape == (5, 6)
+    assert not frame["allocated.gres_rgu"].equals(nans)
+    assert not frame["allocated.gpu_type_rgu"].equals(nans)
+
+
+def _gen_complex_data_frame():
+    cluster_names = (["raisin"] * 9) + ["fromage", "patate", "fromage"]
+    start_times = [
+        datetime.strptime(date, "%Y-%m-%d").astimezone(MTL)
+        for date in (
+            "2023-02-12",
+            "2023-02-13",
+            "2023-02-14",
+            "2023-02-15",
+            "2023-02-16",
+            "2023-02-17",
+            "2023-02-18",
+            "2023-02-19",
+            "2023-02-20",
+            "2023-02-21",  # job belongs to cluster fromage
+            "2023-02-21",  # job belongs to cluster patate
+            "2023-02-22",  # job belongs to cluster fromage
+        )
+    ]
+    gres_gpu = [1, 2, 3, 4, 5000, 6000, 7000, 8000, 9000, 123, 5678, 91011]
+    gpu_type = [
+        "raisin_gpu_unknown_1",
+        "raisin_gpu_unknown_2",
+        "raisin_gpu_3",
+        "raisin_gpu_4",
+        "raisin_gpu_5",
+        "raisin_gpu_unknown_6",
+        "A100",
+        "raisin_gpu_unknown_8",
+        "raisin_gpu_unknown_9",
+        "fromage_gpu_1",  # job belongs to cluster fromage
+        "patate_gpu_9",  # job belongs to cluster patate
+        "fromage_gpu_2",  # job belongs to cluster fromage
+    ]
+    return _gen_data_frame(cluster_names, start_times, gres_gpu, gpu_type)
+
+
+def _get_expected_columns_with_cluster_raisin():
+    """
+    Return expected columns when complex data frame is updated using only cluster raisin.
+    """
+    expected_gres_gpu = [
+        1.0,  # before 2023-02-16, should not change (even if GPU type is unknown)
+        2.0,  # before 2023-02-16, should not change (even if GPU type is unknown)
+        3.0,  # before 2023-02-16, should not change
+        4.0,  # before 2023-02-16, should not change
+        5000 / 500,  # from 2023-12-16, should be divided by RGU/GPU ratio
+        np.nan,  # from 2023-12-16, unknown GPU type, should be nan
+        7000 / 700,  # from 2023-12-16, should be divided by RGU/GPU ratio
+        np.nan,  # from 2023-12-16, unknown GPU type, should be nan
+        np.nan,  # from 2023-12-16, unknown GPU type, should be nan
+        123,  # job does not belong to cluster raisin, then should not change
+        5678,  # job does not belong to cluster raisin, then should not change
+        91011,  # job does not belong to cluster raisin, then should not change
+    ]
+    expected_gres_rgu = [
+        np.nan,  # before 2023-12-16, unknown GPU type, should be nan
+        np.nan,  # before 2023-12-16, unknown GPU type, should be nan
+        3 * 300.0,  # before 2023-12-16, should be gres_gpu * RGU/GPU ratio
+        4 * 400.0,  # before 2023-12-16, should be gres_gpu * RGU/GPU ratio
+        5000.0,  # from 2023-12-16, should be gres_gpu
+        6000.0,  # from 2023-12-16, should be gres_gpu (even if GPU type is unknown)
+        7000.0,  # from 2023-12-16, should be gres_gpu
+        8000.0,  # from 2023-12-16, should be gres_gpu (even if GPU type is unknown)
+        9000.0,  # from 2023-12-16, should be gres_gpu (even if GPU type is unknown)
+        np.nan,  # job does not belong to cluster raisin, then should have nan here
+        np.nan,  # job does not belong to cluster raisin, then should have nan here
+        np.nan,  # job does not belong to cluster raisin, then should have nan here
+    ]
+    expected_gpu_type_rgu = [
+        np.nan,  # GPU type unknown, should be nan
+        np.nan,  # GPU type unknown, should be nan
+        300,  # GPU type exists in RGU map, should be copied here
+        400,  # GPU type exists in RGU map, should be copied here
+        500,  # GPU type exists in RGU map, should be copied here
+        np.nan,  # GPU type unknown, should be nan
+        700,  # GPU type exists in RGU map, should be copied here
+        np.nan,  # GPU type unknown, should be nan
+        np.nan,  # GPU type unknown, should be nan
+        np.nan,  # job does not belong to cluster raisin, then should have nan here
+        np.nan,  # job does not belong to cluster raisin, then should have nan here
+        np.nan,  # job does not belong to cluster raisin, then should have nan here
+    ]
+
+    return expected_gres_gpu, expected_gres_rgu, expected_gpu_type_rgu
+
+
+@pytest.mark.usefixtures("read_only_db", "tzlocal_is_mtl")
+def test_update_cluster_job_series_rgu(cluster_full_rgu):
+    """Concrete test for 1 cluster with a generated frame."""
+    assert cluster_full_rgu.rgu_start_date == "2023-02-16"
+    frame = _gen_complex_data_frame()
+    assert frame.shape == (12, 4)
+    assert "allocated.gres_rgu" not in frame.columns
+    assert "allocated.gpu_type_rgu" not in frame.columns
+
+    returned_frame = update_cluster_job_series_rgu(frame, cluster_full_rgu)
+    assert frame is returned_frame
+    assert frame.shape == (12, 6)
+    assert "allocated.gres_rgu" in frame.columns
+    assert "allocated.gpu_type_rgu" in frame.columns
+
+    (
+        expected_gres_gpu,
+        expected_gres_rgu,
+        expected_gpu_type_rgu,
+    ) = _get_expected_columns_with_cluster_raisin()
+    assert frame["allocated.gres_gpu"].equals(pandas.Series(expected_gres_gpu))
+    assert frame["allocated.gres_rgu"].equals(pandas.Series(expected_gres_rgu))
+    assert frame["allocated.gpu_type_rgu"].equals(pandas.Series(expected_gpu_type_rgu))
+
+
+@pytest.mark.usefixtures("read_only_db", "tzlocal_is_mtl")
+def test_update_job_series_rgu():
+    """Concrete test for all clusters with a generated frame."""
+    frame = _gen_complex_data_frame()
+    assert frame.shape == (12, 4)
+    assert "allocated.gres_rgu" not in frame.columns
+    assert "allocated.gpu_type_rgu" not in frame.columns
+
+    returned_frame = update_job_series_rgu(frame)
+    assert frame is returned_frame
+    assert frame.shape == (12, 6)
+    assert "allocated.gres_rgu" in frame.columns
+    assert "allocated.gpu_type_rgu" in frame.columns
+
+    (
+        expected_gres_gpu,
+        expected_gres_rgu,
+        expected_gpu_type_rgu,
+    ) = _get_expected_columns_with_cluster_raisin()
+    expected_gres_gpu[-3:] = [
+        123.0,  # job belongs to cluster fromage before RGU, should not change
+        5678.0,  # job belongs to cluster patate, no RGU, then no change
+        91011 / 200,  # job belongs to cluster fromage after RGU, divided by RGU/GPU
+    ]
+    expected_gres_rgu[-3:] = [
+        123 * 100.0,  # job from to cluster fromage before RGU: gres_gpu * RGU/GPU ratio
+        np.nan,  # job belongs to cluster patate, no RGU, then should have nan here
+        91011.0,  # job belongs to cluster fromage after RGU, should be gres_gpu
+    ]
+    expected_gpu_type_rgu[-3:] = [
+        100.0,  # job belongs to cluster fromage, GPU type should be copied here
+        np.nan,  # job belongs to cluster patate, no RGU, then should have nan here
+        200.0,  # job belongs to cluster fromage, GPU type should be copied here
+    ]
+    assert frame["allocated.gres_gpu"].equals(pandas.Series(expected_gres_gpu))
+    assert frame["allocated.gres_rgu"].equals(pandas.Series(expected_gres_rgu))
+    assert frame["allocated.gpu_type_rgu"].equals(pandas.Series(expected_gpu_type_rgu))
+
+
+@pytest.mark.freeze_time(MOCK_TIME)
+@pytest.mark.usefixtures("read_only_db", "tzlocal_is_mtl")
+def test_update_job_series_rgu_with_real_test_data(cluster_full_rgu, file_regression):
+    """Concrete tests with jobs from read_only_db"""
+    frame = load_job_series()
+    update_cluster_job_series_rgu(frame, cluster_full_rgu)
+
+    def _df_to_pretty_str(df: pandas.DataFrame) -> str:
+        fields = [
+            "job_id",
+            "cluster_name",
+            "start_time",
+            "allocated.gpu_type",
+            "allocated.gres_gpu",
+            "allocated.gres_rgu",
+            "allocated.gpu_type_rgu",
+        ]
+        return df[fields].to_markdown()
+
+    file_regression.check(
+        f"Update job series RGU for {frame.shape[0]} job(s):\n\n"
+        f"RGU start date: {cluster_full_rgu.rgu_start_date}\n\n"
+        f"gpu_to_rgu_billing:\n{pformat(_read_json(cluster_full_rgu.gpu_to_rgu_billing))}\n\n"
+        f"{_df_to_pretty_str(frame)}"
+    )
diff --git a/tests/functional/jobs/test_func_update_job_series_rgu/test_update_job_series_rgu_with_real_test_data.txt b/tests/functional/jobs/test_func_update_job_series_rgu/test_update_job_series_rgu_with_real_test_data.txt
new file mode 100644
index 00000000..b4574e6c
--- /dev/null
+++ b/tests/functional/jobs/test_func_update_job_series_rgu/test_update_job_series_rgu_with_real_test_data.txt
@@ -0,0 +1,41 @@
+Update job series RGU for 24 job(s):
+
+RGU start date: 2023-02-16
+
+gpu_to_rgu_billing:
+{'A100': 700,
+ 'raisin_gpu_1': 100,
+ 'raisin_gpu_2': 200,
+ 'raisin_gpu_3': 300,
+ 'raisin_gpu_4': 400,
+ 'raisin_gpu_5': 500,
+ 'raisin_gpu_6': 600,
+ 'raisin_gpu_8': 800,
+ 'raisin_gpu_9': 900}
+
+|    |    job_id | cluster_name   | start_time                | allocated.gpu_type   |   allocated.gres_gpu |   allocated.gres_rgu |   allocated.gpu_type_rgu |
+|---:|----------:|:---------------|:--------------------------|:---------------------|---------------------:|---------------------:|-------------------------:|
+|  0 |         1 | raisin         | 2023-02-14 00:01:00-05:00 |                      |           1          |                  nan |                      nan |
+|  1 |         2 | raisin         | 2023-02-14 06:01:00-05:00 |                      |           1          |                  nan |                      nan |
+|  2 |         3 | raisin         | 2023-02-14 12:01:00-05:00 |                      |           1          |                  nan |                      nan |
+|  3 |         4 | raisin         | 2023-02-14 18:01:00-05:00 |                      |           1          |                  nan |                      nan |
+|  4 |         5 | raisin         | 2023-02-15 00:01:00-05:00 |                      |           1          |                  nan |                      nan |
+|  5 |         6 | raisin         | 2023-02-15 06:01:00-05:00 |                      |           1          |                  nan |                      nan |
+|  6 |         7 | raisin         | 2023-11-21 07:00:00-05:00 |                      |         nan          |                    1 |                      nan |
+|  7 |         8 | raisin         | 2023-11-21 07:00:00-05:00 |                      |         nan          |                    1 |                      nan |
+|  8 |         9 | raisin         | 2023-02-16 00:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+|  9 |        10 | raisin         | 2023-02-16 00:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 10 |        11 | raisin         | 2023-02-16 00:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 11 |        12 | raisin         | 2023-02-16 18:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 12 |        13 | raisin         | 2023-02-17 00:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 13 |        14 | raisin         | 2023-02-17 06:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 14 |        15 | fromage        | 2023-02-17 12:01:00-05:00 |                      |           1          |                  nan |                      nan |
+| 15 |        16 | patate         | 2023-02-17 18:01:00-05:00 |                      |           1          |                  nan |                      nan |
+| 16 |        17 | raisin         | 2023-02-18 00:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 17 |        18 | raisin         | 2023-02-18 06:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 18 |        19 | raisin         | 2023-02-18 12:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 19 |        20 | raisin         | 2023-02-18 18:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 20 |   1000000 | raisin         | 2023-02-19 00:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 21 |   1000000 | raisin         | 2023-02-19 06:01:00-05:00 |                      |         nan          |                    1 |                      nan |
+| 22 |        23 | raisin         | 2023-02-19 12:01:00-05:00 | A100                 |           0.00285714 |                    2 |                      700 |
+| 23 | 999999999 | raisin         | 2023-02-19 18:01:00-05:00 |                      |         nan          |                    0 |                      nan |
\ No newline at end of file
diff --git a/tests/not-so-secrets/fromage_prometheus/gpu_to_rgu_billing.json b/tests/not-so-secrets/fromage_prometheus/gpu_to_rgu_billing.json
new file mode 100644
index 00000000..8a58e422
--- /dev/null
+++ b/tests/not-so-secrets/fromage_prometheus/gpu_to_rgu_billing.json
@@ -0,0 +1,4 @@
+{
+  "fromage_gpu_1": 100,
+  "fromage_gpu_2": 200
+}
diff --git a/tests/not-so-secrets/gerudo_prometheus/gpu_to_rgu_billing_empty.json b/tests/not-so-secrets/gerudo_prometheus/gpu_to_rgu_billing_empty.json
new file mode 100644
index 00000000..0967ef42
--- /dev/null
+++ b/tests/not-so-secrets/gerudo_prometheus/gpu_to_rgu_billing_empty.json
@@ -0,0 +1 @@
+{}
diff --git a/tests/not-so-secrets/patate_prometheus/gpu_to_rgu_billing.json b/tests/not-so-secrets/patate_prometheus/gpu_to_rgu_billing.json
new file mode 100644
index 00000000..c021e828
--- /dev/null
+++ b/tests/not-so-secrets/patate_prometheus/gpu_to_rgu_billing.json
@@ -0,0 +1,3 @@
+{
+  "patate_gpu_1": 700
+}
diff --git a/tests/not-so-secrets/raisin_prometheus/gpu_to_rgu_billing.json b/tests/not-so-secrets/raisin_prometheus/gpu_to_rgu_billing.json
new file mode 100644
index 00000000..b49f1135
--- /dev/null
+++ b/tests/not-so-secrets/raisin_prometheus/gpu_to_rgu_billing.json
@@ -0,0 +1,11 @@
+{
+  "raisin_gpu_1": 100,
+  "raisin_gpu_2": 200,
+  "raisin_gpu_3": 300,
+  "raisin_gpu_4": 400,
+  "raisin_gpu_5": 500,
+  "raisin_gpu_6": 600,
+  "A100": 700,
+  "raisin_gpu_8": 800,
+  "raisin_gpu_9": 900
+}
diff --git a/tests/sarc-test.json b/tests/sarc-test.json
index c1355e4a..e6f286f2 100644
--- a/tests/sarc-test.json
+++ b/tests/sarc-test.json
@@ -29,7 +29,9 @@
             "duc_inodes_command": null,
             "duc_storage_command": null,
             "diskusage_report_command": null,
-            "prometheus_url": "http://monitoring.server.raisin.quebec:9090/"
+            "prometheus_url": "http://monitoring.server.raisin.quebec:9090/",
+            "rgu_start_date": "2023-02-16",
+            "gpu_to_rgu_billing": "tests/not-so-secrets/raisin_prometheus/gpu_to_rgu_billing.json"
         },
         "raisin_no_prometheus": {
             "host": "raisin_no_prometheus",
@@ -51,7 +53,9 @@
             "duc_storage_command": "duc ls -d /project/.duc_databases/rrg-bonhomme-ad.sqlite /project/rrg-bonhomme-ad",
             "diskusage_report_command": "diskusage_report --project --all_users",
             "prometheus_url": "https://fromage-thanos.calcul.ca",
-            "prometheus_headers_file": "tests/not-so-secrets/fromage_prometheus/headers.json"
+            "prometheus_headers_file": "tests/not-so-secrets/fromage_prometheus/headers.json",
+            "rgu_start_date": "2023-02-22",
+            "gpu_to_rgu_billing": "tests/not-so-secrets/fromage_prometheus/gpu_to_rgu_billing.json"
         },
         "patate": {
             "host": "patate",
@@ -62,7 +66,8 @@
             "duc_storage_command": "duc ls -d /project/.duc_databases/rrg-bonhomme-ad.sqlite /project/rrg-bonhomme-ad",
             "diskusage_report_command": "diskusage_report --project --all_users",
             "prometheus_url": "https://fromage-thanos.calcul.ca",
-            "prometheus_headers_file": "tests/not-so-secrets/patate_prometheus/headers.json"
+            "prometheus_headers_file": "tests/not-so-secrets/patate_prometheus/headers.json",
+            "gpu_to_rgu_billing": "tests/not-so-secrets/patate_prometheus/gpu_to_rgu_billing.json"
         },
         "gerudo": {
             "host": "gerudo",
@@ -73,7 +78,9 @@
             "duc_storage_command": "duc ls -d /project/.duc_databases/rrg-bonhomme-ad.sqlite /project/rrg-bonhomme-ad",
             "diskusage_report_command": "diskusage_report --project --all_users",
             "prometheus_url": "https://gerudo-thanos.calcul.ca",
-            "prometheus_headers_file": "tests/not-so-secrets/patate_prometheus/headers.json"
+            "prometheus_headers_file": "tests/not-so-secrets/patate_prometheus/headers.json",
+            "rgu_start_date": "2023-02-16",
+            "gpu_to_rgu_billing": "tests/not-so-secrets/gerudo_prometheus/gpu_to_rgu_billing_empty.json"
         },
         "hyrule": {
             "host": "hyrule",
@@ -106,7 +113,8 @@
             "duc_inodes_command": null,
             "duc_storage_command": null,
             "diskusage_report_command": null,
-            "prometheus_url": null
+            "prometheus_url": null,
+            "rgu_start_date": "2023-02-16"
         }
     }
 }