diff --git a/src/utils/base_calculated_stats.py b/src/utils/base_calculated_stats.py new file mode 100644 index 0000000..de58e53 --- /dev/null +++ b/src/utils/base_calculated_stats.py @@ -0,0 +1,57 @@ +"""File that contains the class which calculates statistics for a team/event/for other purposes.""" + +from typing import Callable + +import numpy as np +from numpy import percentile +from pandas import DataFrame, Series + +from .functions import retrieve_team_list + + +class BaseCalculatedStats: + """Base class defining methods that are used across both quantitative and qualitative calculated stats implementations.""" + + def __init__(self, data: DataFrame): + self.data = data + + # Percentile methods + def quantile_stat(self, quantile: float, predicate: Callable) -> float: + """Calculates a scalar value for a percentile of a dataset. + + Used for comparisons between teams (eg passing in 0.5 will return the median). + + :param quantile: Quantile used to find the scalar value at. + :param predicate: Predicate called per team in the scouting data to create the dataset (self and team number must be arguments). + :return: A float representing the scalar value for a percentile of a dataset. + """ + dataset = [predicate(self, team) for team in retrieve_team_list(self.data)] + return percentile(dataset, quantile * 100) + + def calculate_iqr(self, dataset: Series) -> float: + """Calculates the IQR of a dataset (75th percentile - 25th percentile). + + :param dataset: The dataset to calculate the IQR for. + :return: A float representing the IQR. + """ + return percentile(dataset, 75) - percentile(dataset, 25) + + def cartesian_product( + self, + dataset_x: list, + dataset_y: list, + dataset_z: list, + reduce_with_sum: bool = False + ) -> np.ndarray: + """Creates a cartesian product (permutations of each element in the three datasets). + + :param dataset_x: A dataset containing x values. + :param dataset_y: A dataset containing y values. + :param dataset_z: A dataset containing z values. + :param reduce_with_sum: Whether or not to add up the cartesian product for each tuple yielded. + :return: A list containing the cartesian products or the sum of it if `reduce_with_sum` is True. + """ + return np.array([ + (x + y + z if reduce_with_sum else (x, y, z)) + for x in dataset_x for y in dataset_y for z in dataset_z + ]) \ No newline at end of file diff --git a/src/utils/calculated_qualitative_stats.py b/src/utils/calculated_qualitative_stats.py new file mode 100644 index 0000000..891a02a --- /dev/null +++ b/src/utils/calculated_qualitative_stats.py @@ -0,0 +1,14 @@ +"""File that contains the class which calculates statistics for a team/event/for other purposes.""" + +from pandas import DataFrame + +from .base_calculated_stats import BaseCalculatedStats + +__all__ = ["CalculatedQualitativeStats"] + + +class CalculatedQualitativeStats(BaseCalculatedStats): + """Utility class for calculating qualitative statistics derived from note scouting in an event.""" + + def __init__(self, data: DataFrame): + super().__init__(data) diff --git a/src/utils/calculated_stats.py b/src/utils/calculated_stats.py index c47cd26..badb86f 100644 --- a/src/utils/calculated_stats.py +++ b/src/utils/calculated_stats.py @@ -10,17 +10,19 @@ from scipy.integrate import quad from scipy.stats import norm + +from .base_calculated_stats import BaseCalculatedStats from .constants import Criteria, Queries from .functions import _convert_to_float_from_numpy_type, scouting_data_for_team, retrieve_team_list, retrieve_pit_scouting_data __all__ = ["CalculatedStats"] -class CalculatedStats: +class CalculatedStats(BaseCalculatedStats): """Utility class for calculating statistics in an event.""" def __init__(self, data: DataFrame): - self.data = data + super().__init__(data) # Point contribution methods @_convert_to_float_from_numpy_type @@ -336,34 +338,6 @@ def stat_per_match(self, team_number: int, stat: str, criteria: dict | None = No lambda datum: criteria.get(datum, 0) if criteria is not None else datum ) - def calculate_iqr(self, dataset: Series) -> float: - """Calculates the IQR of a dataset (75th percentile - 25th percentile). - - :param dataset: The dataset to calculate the IQR for. - :return: A float representing the IQR. - """ - return percentile(dataset, 75) - percentile(dataset, 25) - - def cartesian_product( - self, - dataset_x: list, - dataset_y: list, - dataset_z: list, - reduce_with_sum: bool = False - ) -> np.ndarray: - """Creates a cartesian product (permutations of each element in the three datasets). - - :param dataset_x: A dataset containing x values. - :param dataset_y: A dataset containing y values. - :param dataset_z: A dataset containing z values. - :param reduce_with_sum: Whether or not to add up the cartesian product for each tuple yielded. - :return: A list containing the cartesian products or the sum of it if `reduce_with_sum` is True. - """ - return np.array([ - (x + y + z if reduce_with_sum else (x, y, z)) - for x in dataset_x for y in dataset_y for z in dataset_z - ]) - def driving_index(self, team_number: int) -> float: """Determines how fast a team is based on multiplying their teleop cycles by their counter defense rating diff --git a/src/utils/constants.py b/src/utils/constants.py index 66a1ac9..847b2ee 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -56,6 +56,7 @@ class EventSpecificConstants: EVENT_CODE = "2024vaash" EVENT_NAME = "Ashland" URL = f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_match_data.json" + NOTE_SCOUTING_URL = f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_qualitative_data.json" PIT_SCOUTING_URL = ( f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_pit_scouting_data.csv" ) diff --git a/src/utils/functions.py b/src/utils/functions.py index 68fc9b5..df83d05 100644 --- a/src/utils/functions.py +++ b/src/utils/functions.py @@ -13,9 +13,11 @@ from .constants import EventSpecificConstants, GeneralConstants, Queries __all__ = [ + "note_scouting_data_for_team", "populate_missing_data", "retrieve_match_schedule", "retrieve_match_data", + "retrieve_note_scouting_data", "retrieve_pit_scouting_data", "retrieve_team_list", "retrieve_scouting_data", @@ -58,6 +60,22 @@ def retrieve_scouting_data() -> DataFrame: return scouting_data.sort_values(by=Queries.MATCH_NUMBER).reset_index(drop=True) +@st.cache_data(ttl=GeneralConstants.SECONDS_TO_CACHE) +def retrieve_note_scouting_data() -> DataFrame: + """Retrieves the latest note scouting data from team4099/ScoutingAppData on GitHub based on the current event. + + :return: A dataframe containing the scouting data from an event. + """ + scouting_data = DataFrame.from_dict( + loads(get(EventSpecificConstants.URL).text) + ) + scouting_data[Queries.MATCH_NUMBER] = scouting_data[Queries.MATCH_KEY].apply( + lambda match_key: int(search(r"\d+", match_key).group(0)) + ) + + return scouting_data.sort_values(by=Queries.MATCH_NUMBER).reset_index(drop=True) + + @st.cache_data(ttl=GeneralConstants.SECONDS_TO_CACHE) def retrieve_pit_scouting_data() -> DataFrame | None: """Retrieves the latest pit scouting data from team4099/ScoutingAppData on GitHub based on the current event. @@ -154,12 +172,29 @@ def scouting_data_for_team(team_number: int, scouting_data: DataFrame | None = N ] -def retrieve_team_list() -> list: +def note_scouting_data_for_team(team_number: int, scouting_data: DataFrame | None = None) -> DataFrame: + """Retrieves the submissions within the note scouting data for a certain team. + + :param team_number: The number of the team to retrieve the submissions for. + :param scouting_data: An optional argument allowing the user to pass in the scouting data if already retrieved. + :return: A dataframe containing th submissions within the scouting data for the team passed in. + """ + if scouting_data is None: + scouting_data = retrieve_note_scouting_data() + + return scouting_data[ + scouting_data["TeamNumber"] == team_number + ] + + +def retrieve_team_list(scouting_data: DataFrame = None) -> list: """Retrieves the team list at the current event via the scouting data. :return: A list containing the teams at the current event. """ - scouting_data = retrieve_scouting_data() + if scouting_data is None: + scouting_data = retrieve_scouting_data() + # Filter out empty team numbers scouting_data = scouting_data[scouting_data["TeamNumber"] != ""]