-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactoring stats, graph and interfaces into independent files
- Loading branch information
1 parent
f525d15
commit 7118660
Showing
4 changed files
with
151 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import annotations | ||
from typing import Protocol | ||
import pandas as pd | ||
|
||
class TimeSerie(Protocol): | ||
df: pd.DataFrame | ||
format: str | ||
features: str | ||
|
||
def copy(self) -> TimeSerie: | ||
... | ||
|
||
def to_wide(self) -> None: | ||
... | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import annotations | ||
from typing import Protocol | ||
import pandas as pd | ||
import matplotlib.pyplot as plt # type: ignore | ||
from t8s.interfaces import TimeSerie | ||
|
||
class TSPlotting: | ||
def __init__(self, ts: TimeSerie, **kwargs): | ||
self.ts = ts | ||
self.__kwargs = kwargs | ||
|
||
def line(self, **kwargs): | ||
df = self.to_new_df() | ||
# use `self.__kwargs` and `args` to decide what and how to plot | ||
time_col = str(df.columns[0]) | ||
# ax = df.plot(kind='line', x='t', y=['y1', 'y2']) | ||
for chave, valor in kwargs.items(): | ||
print(f'chave = {chave}: valor = {valor} -> tipo = {type(valor)}') | ||
|
||
features = [ x for x in df.columns[1:] ] | ||
ax = df.plot(kind='line', x=time_col, y=features, figsize=(12, 5), grid=True) | ||
|
||
plt.show() | ||
|
||
def scatter(self, **kwargs): | ||
pass | ||
|
||
def bar(self, **kwargs): | ||
pass | ||
|
||
def hist(self, **kwargs): | ||
pass | ||
|
||
def box(self, **kwargs): | ||
pass | ||
|
||
def stackplot(self, **kwargs): | ||
pass | ||
|
||
# Retorna uma cópia do Dataframe para ser usada nos gráficos sem afetar | ||
# o objeto original. | ||
def to_new_df(self) -> pd.DataFrame: | ||
if self.ts.format == 'wide': | ||
return self.ts.df.copy() | ||
else: | ||
# Atenção: o método to_wide() altera o objeto ts original, por isso faço uma deep-copy antes. | ||
ts_copy = self.ts.copy() | ||
ts_copy.to_wide() | ||
result: TimeSerie = ts_copy | ||
return result.df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import annotations | ||
from typing import Protocol | ||
from pathlib import Path | ||
from datetime import datetime | ||
import numpy as np | ||
import pandas as pd | ||
from pandas.core.series import Series | ||
from t8s.log_config import LogConfig | ||
|
||
logger = LogConfig().getLogger() | ||
|
||
class TSStats: | ||
def __init__(self, df: pd.DataFrame): | ||
assert isinstance(df, pd.DataFrame), "df must be a Pandas DataFrame" | ||
# Obtendo o resumo estatístico do DataFrame | ||
summary_en_us: pd.DataFrame = df.describe() | ||
self.summary_en_us = summary_en_us | ||
# Renomeando os índices do DataFrame resultante para PT_BR | ||
summary_pt_br = summary_en_us.rename(index={'count': 'Contagem', 'mean': 'Média', 'std': 'Desvio padrão', 'min': 'Mínimo', '25%': 'Primeiro quartil', '50%': 'Mediana', '75%': 'Terceiro quartil', 'max': 'Máximo'}) | ||
self.summary_pt_br: pd.DataFrame = summary_pt_br | ||
logger.info(f'summary_pt_br =\n{self.summary_pt_br}\n') | ||
|
||
def __str__(self) -> str: | ||
return str(self.summary_pt_br) | ||
|
||
# obtem a contagem de elementos na coluna `column_name` | ||
def count(self, column_name:str) -> float: | ||
# Extraindo o valor da quantidade de elementos na coluna `column_name` | ||
return float(self.summary_en_us.loc['count', column_name]) # type: ignore | ||
|
||
# obtem a média dos elementos na coluna `column_name` | ||
def mean(self, column_name:str) -> float: | ||
# Extraindo o valor da média dos elementos na coluna `column_name` | ||
return float(self.summary_en_us.loc['mean', column_name]) # type: ignore | ||
|
||
# obtem o desvio padrão dos elementos na coluna `column_name` | ||
def std(self, column_name:str) -> float: | ||
# Extraindo o valor do desvio padrão dos elementos na coluna `column_name` | ||
return float(self.summary_en_us.loc['std', column_name]) # type: ignore | ||
|
||
# obtem o valor mínimo dos elementos na coluna `column_name` | ||
def min(self, column_name:str) -> float: | ||
# Extraindo o valor mínimo dos elementos na coluna `column_name` | ||
return float(self.summary_en_us.loc['min', column_name]) # type: ignore | ||
|
||
# obtem o primeiro quartil dos elementos na coluna `column_name` | ||
def q1(self, column_name:str) -> float: | ||
# Extraindo o valor do primeiro quartil dos elementos na coluna `column_name` | ||
return float(self.summary_en_us.loc['25%', column_name]) # type: ignore | ||
|
||
|
||
# obtem a mediana dos elementos na coluna `column_name` | ||
def median(self, column_name:str) -> float: | ||
# Extraindo o valor da mediana dos elementos na coluna `column_name` | ||
return float(self.summary_en_us.loc['50%', column_name]) # type: ignore | ||
|
||
# obtem o segundo quartil dos elementos na coluna `column_name` | ||
def q2(self, column_name:str) -> float: | ||
# Extraindo o valor do segundo quartil dos elementos na coluna `column_name` | ||
return self.median(column_name) | ||
|
||
# obtem o terceiro quartil dos elementos na coluna `column_name` | ||
def q3(self, column_name:str) -> float: | ||
# Extraindo o valor do terceiro quartil dos elementos na coluna `column_name` | ||
return float(self.summary_en_us.loc['75%', column_name]) # type: ignore | ||
|
||
# obtem o valor máximo dos elementos na coluna `column_name` | ||
def max(self, column_name:str) -> float: | ||
# Extraindo o valor máximo dos elementos na coluna `column_name` | ||
return float(self.summary_en_us.loc['max', column_name]) # type: ignore | ||
|
||
# obtem o valor da amplitude dos elementos na coluna `column_name` | ||
def amplitude(self, column_name:str) -> float: | ||
# Extraindo o valor da amplitude dos elementos na coluna `column_name` | ||
return self.max(column_name) - self.min(column_name) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters