split calculated stats into two and created a base calculated stats

team4099 · Mar 13, 2024 · 73a34e3 · 73a34e3
1 parent 83b2b68
commit 73a34e3
Show file tree

Hide file tree

Showing 5 changed files with 113 additions and 32 deletions.
diff --git a/src/utils/base_calculated_stats.py b/src/utils/base_calculated_stats.py
@@ -0,0 +1,57 @@
+"""File that contains the class which calculates statistics for a team/event/for other purposes."""
+
+from typing import Callable
+
+import numpy as np
+from numpy import percentile
+from pandas import DataFrame, Series
+
+from .functions import retrieve_team_list
+
+
+class BaseCalculatedStats:
+    """Base class defining methods that are used across both quantitative and qualitative calculated stats implementations."""
+
+    def __init__(self, data: DataFrame):
+        self.data = data
+
+    # Percentile methods
+    def quantile_stat(self, quantile: float, predicate: Callable) -> float:
+        """Calculates a scalar value for a percentile of a dataset.
+
+        Used for comparisons between teams (eg passing in 0.5 will return the median).
+
+        :param quantile: Quantile used to find the scalar value at.
+        :param predicate: Predicate called per team in the scouting data to create the dataset (self and team number must be arguments).
+        :return: A float representing the scalar value for a percentile of a dataset.
+        """
+        dataset = [predicate(self, team) for team in retrieve_team_list(self.data)]
+        return percentile(dataset, quantile * 100)
+
+    def calculate_iqr(self, dataset: Series) -> float:
+        """Calculates the IQR of a dataset (75th percentile - 25th percentile).
+
+        :param dataset: The dataset to calculate the IQR for.
+        :return: A float representing the IQR.
+        """
+        return percentile(dataset, 75) - percentile(dataset, 25)
+
+    def cartesian_product(
+            self,
+            dataset_x: list,
+            dataset_y: list,
+            dataset_z: list,
+            reduce_with_sum: bool = False
+    ) -> np.ndarray:
+        """Creates a cartesian product (permutations of each element in the three datasets).
+
+        :param dataset_x: A dataset containing x values.
+        :param dataset_y: A dataset containing y values.
+        :param dataset_z: A dataset containing z values.
+        :param reduce_with_sum: Whether or not to add up the cartesian product for each tuple yielded.
+        :return: A list containing the cartesian products or the sum of it if `reduce_with_sum` is True.
+        """
+        return np.array([
+            (x + y + z if reduce_with_sum else (x, y, z))
+            for x in dataset_x for y in dataset_y for z in dataset_z
+        ])
diff --git a/src/utils/calculated_qualitative_stats.py b/src/utils/calculated_qualitative_stats.py
@@ -0,0 +1,14 @@
+"""File that contains the class which calculates statistics for a team/event/for other purposes."""
+
+from pandas import DataFrame
+
+from .base_calculated_stats import BaseCalculatedStats
+
+__all__ = ["CalculatedQualitativeStats"]
+
+
+class CalculatedQualitativeStats(BaseCalculatedStats):
+    """Utility class for calculating qualitative statistics derived from note scouting in an event."""
+
+    def __init__(self, data: DataFrame):
+        super().__init__(data)
diff --git a/src/utils/calculated_stats.py b/src/utils/calculated_stats.py
@@ -10,17 +10,19 @@
 from scipy.integrate import quad
 from scipy.stats import norm
 
+
+from .base_calculated_stats import BaseCalculatedStats
 from .constants import Criteria, Queries
 from .functions import _convert_to_float_from_numpy_type, scouting_data_for_team, retrieve_team_list, retrieve_pit_scouting_data
 
 __all__ = ["CalculatedStats"]
 
 
-class CalculatedStats:
+class CalculatedStats(BaseCalculatedStats):
     """Utility class for calculating statistics in an event."""
 
     def __init__(self, data: DataFrame):
-        self.data = data
+        super().__init__(data)
 
     # Point contribution methods
     @_convert_to_float_from_numpy_type
@@ -336,34 +338,6 @@ def stat_per_match(self, team_number: int, stat: str, criteria: dict | None = No
             lambda datum: criteria.get(datum, 0) if criteria is not None else datum
         )
 
-    def calculate_iqr(self, dataset: Series) -> float:
-        """Calculates the IQR of a dataset (75th percentile - 25th percentile).
-
-        :param dataset: The dataset to calculate the IQR for.
-        :return: A float representing the IQR.
-        """
-        return percentile(dataset, 75) - percentile(dataset, 25)
-
-    def cartesian_product(
-        self,
-        dataset_x: list,
-        dataset_y: list,
-        dataset_z: list,
-        reduce_with_sum: bool = False
-    ) -> np.ndarray:
-        """Creates a cartesian product (permutations of each element in the three datasets).
-
-        :param dataset_x: A dataset containing x values.
-        :param dataset_y: A dataset containing y values.
-        :param dataset_z: A dataset containing z values.
-        :param reduce_with_sum: Whether or not to add up the cartesian product for each tuple yielded.
-        :return: A list containing the cartesian products or the sum of it if `reduce_with_sum` is True.
-        """
-        return np.array([
-            (x + y + z if reduce_with_sum else (x, y, z))
-            for x in dataset_x for y in dataset_y for z in dataset_z
-        ])
-
     def driving_index(self, team_number: int) -> float:
         """Determines how fast a team is based on multiplying their teleop cycles by their counter defense rating
 

diff --git a/src/utils/constants.py b/src/utils/constants.py
@@ -56,6 +56,7 @@ class EventSpecificConstants:
     EVENT_CODE = "2024vaash"
     EVENT_NAME = "Ashland"
     URL = f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_match_data.json"
+    NOTE_SCOUTING_URL = f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_qualitative_data.json"
     PIT_SCOUTING_URL = (
         f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_pit_scouting_data.csv"
     )

diff --git a/src/utils/functions.py b/src/utils/functions.py
@@ -13,9 +13,11 @@
 from .constants import EventSpecificConstants, GeneralConstants, Queries
 
 __all__ = [
+    "note_scouting_data_for_team",
     "populate_missing_data",
     "retrieve_match_schedule",
     "retrieve_match_data",
+    "retrieve_note_scouting_data",
     "retrieve_pit_scouting_data",
     "retrieve_team_list",
     "retrieve_scouting_data",
@@ -58,6 +60,22 @@ def retrieve_scouting_data() -> DataFrame:
     return scouting_data.sort_values(by=Queries.MATCH_NUMBER).reset_index(drop=True)
 
 
+@st.cache_data(ttl=GeneralConstants.SECONDS_TO_CACHE)
+def retrieve_note_scouting_data() -> DataFrame:
+    """Retrieves the latest note scouting data from team4099/ScoutingAppData on GitHub based on the current event.
+
+    :return: A dataframe containing the scouting data from an event.
+    """
+    scouting_data = DataFrame.from_dict(
+        loads(get(EventSpecificConstants.URL).text)
+    )
+    scouting_data[Queries.MATCH_NUMBER] = scouting_data[Queries.MATCH_KEY].apply(
+        lambda match_key: int(search(r"\d+", match_key).group(0))
+    )
+
+    return scouting_data.sort_values(by=Queries.MATCH_NUMBER).reset_index(drop=True)
+
+
 @st.cache_data(ttl=GeneralConstants.SECONDS_TO_CACHE)
 def retrieve_pit_scouting_data() -> DataFrame | None:
     """Retrieves the latest pit scouting data from team4099/ScoutingAppData on GitHub based on the current event.
@@ -154,12 +172,29 @@ def scouting_data_for_team(team_number: int, scouting_data: DataFrame | None = N
         ]
 
 
-def retrieve_team_list() -> list:
+def note_scouting_data_for_team(team_number: int, scouting_data: DataFrame | None = None) -> DataFrame:
+    """Retrieves the submissions within the note scouting data for a certain team.
+
+    :param team_number: The number of the team to retrieve the submissions for.
+    :param scouting_data: An optional argument allowing the user to pass in the scouting data if already retrieved.
+    :return: A dataframe containing th submissions within the scouting data for the team passed in.
+    """
+    if scouting_data is None:
+        scouting_data = retrieve_note_scouting_data()
+
+    return scouting_data[
+        scouting_data["TeamNumber"] == team_number
+        ]
+
+
+def retrieve_team_list(scouting_data: DataFrame = None) -> list:
     """Retrieves the team list at the current event via the scouting data.
 
     :return: A list containing the teams at the current event.
     """
-    scouting_data = retrieve_scouting_data()
+    if scouting_data is None:
+        scouting_data = retrieve_scouting_data()
+
     # Filter out empty team numbers
     scouting_data = scouting_data[scouting_data["TeamNumber"] != ""]