Skip to content

Commit

Permalink
split calculated stats into two and created a base calculated stats
Browse files Browse the repository at this point in the history
  • Loading branch information
Shom770 committed Mar 13, 2024
1 parent 83b2b68 commit 73a34e3
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 32 deletions.
57 changes: 57 additions & 0 deletions src/utils/base_calculated_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""File that contains the class which calculates statistics for a team/event/for other purposes."""

from typing import Callable

import numpy as np
from numpy import percentile
from pandas import DataFrame, Series

from .functions import retrieve_team_list


class BaseCalculatedStats:
"""Base class defining methods that are used across both quantitative and qualitative calculated stats implementations."""

def __init__(self, data: DataFrame):
self.data = data

# Percentile methods
def quantile_stat(self, quantile: float, predicate: Callable) -> float:
"""Calculates a scalar value for a percentile of a dataset.
Used for comparisons between teams (eg passing in 0.5 will return the median).
:param quantile: Quantile used to find the scalar value at.
:param predicate: Predicate called per team in the scouting data to create the dataset (self and team number must be arguments).
:return: A float representing the scalar value for a percentile of a dataset.
"""
dataset = [predicate(self, team) for team in retrieve_team_list(self.data)]
return percentile(dataset, quantile * 100)

def calculate_iqr(self, dataset: Series) -> float:
"""Calculates the IQR of a dataset (75th percentile - 25th percentile).
:param dataset: The dataset to calculate the IQR for.
:return: A float representing the IQR.
"""
return percentile(dataset, 75) - percentile(dataset, 25)

def cartesian_product(
self,
dataset_x: list,
dataset_y: list,
dataset_z: list,
reduce_with_sum: bool = False
) -> np.ndarray:
"""Creates a cartesian product (permutations of each element in the three datasets).
:param dataset_x: A dataset containing x values.
:param dataset_y: A dataset containing y values.
:param dataset_z: A dataset containing z values.
:param reduce_with_sum: Whether or not to add up the cartesian product for each tuple yielded.
:return: A list containing the cartesian products or the sum of it if `reduce_with_sum` is True.
"""
return np.array([
(x + y + z if reduce_with_sum else (x, y, z))
for x in dataset_x for y in dataset_y for z in dataset_z
])
14 changes: 14 additions & 0 deletions src/utils/calculated_qualitative_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""File that contains the class which calculates statistics for a team/event/for other purposes."""

from pandas import DataFrame

from .base_calculated_stats import BaseCalculatedStats

__all__ = ["CalculatedQualitativeStats"]


class CalculatedQualitativeStats(BaseCalculatedStats):
"""Utility class for calculating qualitative statistics derived from note scouting in an event."""

def __init__(self, data: DataFrame):
super().__init__(data)
34 changes: 4 additions & 30 deletions src/utils/calculated_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@
from scipy.integrate import quad
from scipy.stats import norm


from .base_calculated_stats import BaseCalculatedStats
from .constants import Criteria, Queries
from .functions import _convert_to_float_from_numpy_type, scouting_data_for_team, retrieve_team_list, retrieve_pit_scouting_data

__all__ = ["CalculatedStats"]


class CalculatedStats:
class CalculatedStats(BaseCalculatedStats):
"""Utility class for calculating statistics in an event."""

def __init__(self, data: DataFrame):
self.data = data
super().__init__(data)

# Point contribution methods
@_convert_to_float_from_numpy_type
Expand Down Expand Up @@ -336,34 +338,6 @@ def stat_per_match(self, team_number: int, stat: str, criteria: dict | None = No
lambda datum: criteria.get(datum, 0) if criteria is not None else datum
)

def calculate_iqr(self, dataset: Series) -> float:
"""Calculates the IQR of a dataset (75th percentile - 25th percentile).
:param dataset: The dataset to calculate the IQR for.
:return: A float representing the IQR.
"""
return percentile(dataset, 75) - percentile(dataset, 25)

def cartesian_product(
self,
dataset_x: list,
dataset_y: list,
dataset_z: list,
reduce_with_sum: bool = False
) -> np.ndarray:
"""Creates a cartesian product (permutations of each element in the three datasets).
:param dataset_x: A dataset containing x values.
:param dataset_y: A dataset containing y values.
:param dataset_z: A dataset containing z values.
:param reduce_with_sum: Whether or not to add up the cartesian product for each tuple yielded.
:return: A list containing the cartesian products or the sum of it if `reduce_with_sum` is True.
"""
return np.array([
(x + y + z if reduce_with_sum else (x, y, z))
for x in dataset_x for y in dataset_y for z in dataset_z
])

def driving_index(self, team_number: int) -> float:
"""Determines how fast a team is based on multiplying their teleop cycles by their counter defense rating
Expand Down
1 change: 1 addition & 0 deletions src/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class EventSpecificConstants:
EVENT_CODE = "2024vaash"
EVENT_NAME = "Ashland"
URL = f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_match_data.json"
NOTE_SCOUTING_URL = f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_qualitative_data.json"
PIT_SCOUTING_URL = (
f"https://raw.githubusercontent.com/team4099/ScoutingAppData/main/{EVENT_CODE}_pit_scouting_data.csv"
)
Expand Down
39 changes: 37 additions & 2 deletions src/utils/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
from .constants import EventSpecificConstants, GeneralConstants, Queries

__all__ = [
"note_scouting_data_for_team",
"populate_missing_data",
"retrieve_match_schedule",
"retrieve_match_data",
"retrieve_note_scouting_data",
"retrieve_pit_scouting_data",
"retrieve_team_list",
"retrieve_scouting_data",
Expand Down Expand Up @@ -58,6 +60,22 @@ def retrieve_scouting_data() -> DataFrame:
return scouting_data.sort_values(by=Queries.MATCH_NUMBER).reset_index(drop=True)


@st.cache_data(ttl=GeneralConstants.SECONDS_TO_CACHE)
def retrieve_note_scouting_data() -> DataFrame:
"""Retrieves the latest note scouting data from team4099/ScoutingAppData on GitHub based on the current event.
:return: A dataframe containing the scouting data from an event.
"""
scouting_data = DataFrame.from_dict(
loads(get(EventSpecificConstants.URL).text)
)
scouting_data[Queries.MATCH_NUMBER] = scouting_data[Queries.MATCH_KEY].apply(
lambda match_key: int(search(r"\d+", match_key).group(0))
)

return scouting_data.sort_values(by=Queries.MATCH_NUMBER).reset_index(drop=True)


@st.cache_data(ttl=GeneralConstants.SECONDS_TO_CACHE)
def retrieve_pit_scouting_data() -> DataFrame | None:
"""Retrieves the latest pit scouting data from team4099/ScoutingAppData on GitHub based on the current event.
Expand Down Expand Up @@ -154,12 +172,29 @@ def scouting_data_for_team(team_number: int, scouting_data: DataFrame | None = N
]


def retrieve_team_list() -> list:
def note_scouting_data_for_team(team_number: int, scouting_data: DataFrame | None = None) -> DataFrame:
"""Retrieves the submissions within the note scouting data for a certain team.
:param team_number: The number of the team to retrieve the submissions for.
:param scouting_data: An optional argument allowing the user to pass in the scouting data if already retrieved.
:return: A dataframe containing th submissions within the scouting data for the team passed in.
"""
if scouting_data is None:
scouting_data = retrieve_note_scouting_data()

return scouting_data[
scouting_data["TeamNumber"] == team_number
]


def retrieve_team_list(scouting_data: DataFrame = None) -> list:
"""Retrieves the team list at the current event via the scouting data.
:return: A list containing the teams at the current event.
"""
scouting_data = retrieve_scouting_data()
if scouting_data is None:
scouting_data = retrieve_scouting_data()

# Filter out empty team numbers
scouting_data = scouting_data[scouting_data["TeamNumber"] != ""]

Expand Down

0 comments on commit 73a34e3

Please sign in to comment.