Skip to content

Commit

Permalink
reorg tests (#141)
Browse files Browse the repository at this point in the history
* reorg tests

* add seed
  • Loading branch information
david26694 authored Jan 15, 2024
1 parent ee33c1f commit 6c22bff
Show file tree
Hide file tree
Showing 15 changed files with 206 additions and 144 deletions.
14 changes: 14 additions & 0 deletions tests/analysis/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pandas as pd
import pytest


@pytest.fixture
def analysis_df():
return pd.DataFrame(
{
"target": [0, 1, 0, 1],
"treatment": ["A", "B", "B", "A"],
"cluster": ["Cluster 1", "Cluster 1", "Cluster 1", "Cluster 1"],
"date": ["2022-01-01", "2022-01-01", "2022-01-01", "2022-01-01"],
}
)
8 changes: 4 additions & 4 deletions tests/analysis/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
PairedTTestClusteredAnalysis,
TTestClusteredAnalysis,
)
from tests.examples import analysis_df, generate_clustered_data, generate_random_data
from tests.utils import generate_clustered_data, generate_random_data


@pytest.fixture
Expand All @@ -27,14 +27,14 @@ def analysis_df_diff():
return analysis_df_full


def test_cluster_column():
def test_cluster_column(analysis_df):
analyser = GeeExperimentAnalysis(
cluster_cols=["cluster", "date"],
)
assert (analyser._get_cluster_column(analysis_df) == "Cluster 12022-01-01").all()


def test_binary_treatment():
def test_binary_treatment(analysis_df):
analyser = GeeExperimentAnalysis(
cluster_cols=["cluster", "date"],
)
Expand All @@ -44,7 +44,7 @@ def test_binary_treatment():
).all()


def test_get_pvalue():
def test_get_pvalue(analysis_df):
analysis_df_full = pd.concat([analysis_df for _ in range(100)])
analyser = GeeExperimentAnalysis(
cluster_cols=["cluster", "date"],
Expand Down
10 changes: 5 additions & 5 deletions tests/analysis/test_hypothesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
OLSAnalysis,
TTestClusteredAnalysis,
)
from tests.examples import analysis_df, generate_clustered_data
from tests.utils import generate_clustered_data


@pytest.mark.parametrize("hypothesis", ["less", "greater", "two-sided"])
@pytest.mark.parametrize("analysis_class", [OLSAnalysis])
def test_get_pvalue_hypothesis(analysis_class, hypothesis):
def test_get_pvalue_hypothesis(analysis_class, hypothesis, analysis_df):
analysis_df_full = pd.concat([analysis_df for _ in range(100)])
analyser = analysis_class(hypothesis=hypothesis)
assert analyser.get_pvalue(analysis_df_full) >= 0
Expand All @@ -37,14 +37,14 @@ def test_get_pvalue_hypothesis_clustered(analysis_class, hypothesis):


@pytest.mark.parametrize("analysis_class", [OLSAnalysis])
def test_get_pvalue_hypothesis_default(analysis_class):
def test_get_pvalue_hypothesis_default(analysis_class, analysis_df):
analysis_df_full = pd.concat([analysis_df for _ in range(100)])
analyser = analysis_class()
assert analyser.get_pvalue(analysis_df_full) >= 0


@pytest.mark.parametrize("analysis_class", [OLSAnalysis])
def test_get_pvalue_hypothesis_wrong_input(analysis_class):
def test_get_pvalue_hypothesis_wrong_input(analysis_class, analysis_df):
analysis_df_full = pd.concat([analysis_df for _ in range(100)])

# Use pytest.raises to check for ValueError
Expand All @@ -57,7 +57,7 @@ def test_get_pvalue_hypothesis_wrong_input(analysis_class):


@pytest.mark.parametrize("analysis_class", [OLSAnalysis])
def test_several_hypothesis(analysis_class):
def test_several_hypothesis(analysis_class, analysis_df):
analysis_df_full = pd.concat([analysis_df for _ in range(100)])
analysis_less = analysis_class(hypothesis="less")
analysis_greater = analysis_class(hypothesis="greater")
Expand Down
5 changes: 2 additions & 3 deletions tests/analysis/test_ols_analysis.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
import pandas as pd

from cluster_experiments.experiment_analysis import OLSAnalysis
from tests.examples import analysis_df


def test_binary_treatment():
def test_binary_treatment(analysis_df):
analyser = OLSAnalysis()
assert (
analyser._create_binary_treatment(analysis_df)["treatment"]
== pd.Series([0, 1, 1, 0])
).all()


def test_get_pvalue():
def test_get_pvalue(analysis_df):
analysis_df_full = pd.concat([analysis_df for _ in range(100)])
analyser = OLSAnalysis()
assert analyser.get_pvalue(analysis_df_full) >= 0
12 changes: 12 additions & 0 deletions tests/cupac/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pandas as pd
import pytest


@pytest.fixture
def binary_df():
return pd.DataFrame(
{
"target": [0, 1, 0, 1],
"treatment": ["A", "B", "B", "A"],
}
)
13 changes: 6 additions & 7 deletions tests/cupac/test_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@
import pandas as pd

from cluster_experiments.cupac import TargetAggregation
from tests.examples import binary_df


def split_x_y(binary_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.Series]:
return binary_df.drop("target", axis=1), binary_df["target"]
def split_x_y(binary_df_agg: pd.DataFrame) -> Tuple[pd.DataFrame, pd.Series]:
return binary_df_agg.drop("target", axis=1), binary_df_agg["target"]


def test_set_target_aggs():
def test_set_target_aggs(binary_df):
binary_df["user"] = [1, 1, 1, 1]
ta = TargetAggregation(agg_col="user")
X, y = split_x_y(binary_df)
Expand All @@ -20,7 +19,7 @@ def test_set_target_aggs():
assert ta.pre_experiment_mean == 0.5


def test_smoothing_0():
def test_smoothing_0(binary_df):
binary_df["user"] = binary_df["target"]
ta = TargetAggregation(agg_col="user", smoothing_factor=0)
X, y = split_x_y(binary_df)
Expand All @@ -31,7 +30,7 @@ def test_smoothing_0():
).all()


def test_smoothing_non_0():
def test_smoothing_non_0(binary_df):
binary_df["user"] = binary_df["target"]
ta = TargetAggregation(agg_col="user", smoothing_factor=2)
X, y = split_x_y(binary_df)
Expand All @@ -45,7 +44,7 @@ def test_smoothing_non_0():
).all()


def test_add_aggs():
def test_add_aggs(binary_df):
binary_df["user"] = binary_df["target"]
ta = TargetAggregation(agg_col="user", smoothing_factor=2)
X, y = split_x_y(binary_df)
Expand Down
2 changes: 1 addition & 1 deletion tests/cupac/test_cupac_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sklearn.ensemble import HistGradientBoostingRegressor

from cluster_experiments.cupac import CupacHandler, TargetAggregation
from tests.examples import generate_random_data
from tests.utils import generate_random_data

N = 1_000

Expand Down
93 changes: 0 additions & 93 deletions tests/examples.py

This file was deleted.

57 changes: 57 additions & 0 deletions tests/perturbator/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import pandas as pd
import pytest


@pytest.fixture
def binary_df():
return pd.DataFrame(
{
"target": [0, 1, 0, 1],
"treatment": ["A", "B", "B", "A"],
}
)


@pytest.fixture
def continuous_df():
return pd.DataFrame(
{
"target": [0.5, 0.5, 0.5, 0.5],
"treatment": ["A", "B", "B", "A"],
}
)


@pytest.fixture
def generate_clustered_data() -> pd.DataFrame:
analysis_df = pd.DataFrame(
{
"country_code": ["ES"] * 4 + ["IT"] * 4 + ["PL"] * 4 + ["RO"] * 4,
"city_code": ["BCN", "BCN", "MAD", "BCN"]
+ ["NAP"] * 4
+ ["WAW"] * 4
+ ["BUC"] * 4,
"user_id": [1, 1, 2, 1, 3, 4, 5, 6, 7, 8, 8, 8, 9, 9, 9, 10],
"date": ["2022-01-01", "2022-01-02", "2022-01-03", "2022-01-04"] * 4,
"treatment": [
"A",
"A",
"B",
"A",
"B",
"B",
"A",
"B",
"B",
"A",
"A",
"A",
"B",
"B",
"B",
"A",
], # Randomization is done at user level, so same user will always have same treatment
"target": [0.01] * 15 + [0.1],
}
)
return analysis_df
Loading

0 comments on commit 6c22bff

Please sign in to comment.