Skip to content

Commit

Permalink
wip: Cenários de BDD usando o modulo behave
Browse files Browse the repository at this point in the history
  • Loading branch information
joao-parana committed Jul 26, 2023
1 parent 992bbd3 commit 1725615
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 27 deletions.
13 changes: 12 additions & 1 deletion features/01.create_timeserie.feature
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Value Statement:
Then I have a time series with the `correct` number of rows and columns, schema and time interval
And I have a CSV file in T8S_WORKSPACE/data/csv correctelly formated
And I have a Parquet file in T8S_WORKSPACE/data/parquet correctelly formated with metadata annotations
And I have a text representation for the first time serie like this:
And I have a text representation for the first time serie like this below
"""
TimeSerie(format=wide, features=3, df=
timestamp temperatura velocidade
Expand All @@ -25,3 +25,14 @@ Value Statement:
types: [<class 'pandas._libs.tslibs.timestamps.Timestamp'>, <class 'numpy.float32'>, <class 'numpy.float32'>]
"""
# Constraint: The first Dataframe doesn't have nulls or invalid values, but the second does

Scenario: Second, I create 1 time series at T8S_WORKSPACE/data directory using a literal sample data
Given a the table below as input
| datetime | float | float |
| timestamp | temperatura | velocidade |
| 2022-01-01 00:00:00 | 3.0 | 30.0 |
| 2022-01-01 01:00:00 | 4.1 | 1124 |
| 2022-01-01 02:00:00 | 5.2 | 3276 |
When converted to a data frame using 1 row as column names and 1 column as index
And printed using data_frame_to_table function
Then I build a time series with the `correct` number of rows and columns
4 changes: 4 additions & 0 deletions features/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
from behave import given, when, then, model, step
from behave.model import Feature, Scenario
from logging import INFO, DEBUG, WARNING, ERROR, CRITICAL, log
from behave import use_step_matcher

# use_step_matcher("re")
use_step_matcher("parse")

LogConfig().initialize_logger(DEBUG)
logger = LogConfig().getLogger()
Expand Down
107 changes: 82 additions & 25 deletions features/steps/01.create_timeserie_steps.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,3 @@
"""
Feature: Create a time series set using Dataframe, CSV and Parquet
Value Statement:
As a data analyst
I want the hability to create a timeseries set using Dataframe, CSV and Parquet
So that I can start studying the data right away and propose solutions for the business.
Background:
Given that I have a T8S_WORKSPACE_DIR and a bunch of CSV and Parquet files to analise
Create 2 time series with sample data using Dataframes and save in CSV files at T8S_WORKSPACE/data/csv directory
Given a start timestamp, a number of records and a time interval
When I create 2 time series using Dataframes with sample data
Then I have a time series with the `correct` number of rows and columns, schema and time interval
And I have a CSV file in T8S_WORKSPACE/data/csv correctelly formated
# Constraint: The first Dataframe doesn't have nulls or invalid values, but the second does
"""

import os
from pathlib import Path
from datetime import datetime
Expand All @@ -28,25 +9,49 @@
from t8s.io import IO
from t8s.ts import TimeSerie
from t8s.ts_writer import TSWriter, WriteParquetFile
from behave import given, when, then, step
from behave import given, when, then, use_step_matcher, step
from behave.model import Table
from behave_pandas import table_to_dataframe, dataframe_to_table
from logging import INFO, DEBUG, WARNING, ERROR, CRITICAL

LogConfig().initialize_logger(DEBUG)
logger = LogConfig().getLogger()

def clean_data_dir():
logger.info(f'clean_data_dir() called ...')
pass
"""
Feature: Create a time series set using Dataframe, CSV and Parquet
Value Statement:
As a data analyst
I want the hability to create a timeseries set using Dataframe, CSV and Parquet
So that I can start studying the data right away and propose solutions for the business.
Background:
Given that I have a T8S_WORKSPACE_DIR and a bunch of CSV and Parquet files to analise
"""

@given(u'that I have a T8S_WORKSPACE_DIR and a bunch of CSV and Parquet files to analyze')
def setup(context):
def clean_data_dir():
logger.info(f'clean_data_dir() called ...')
pass

clean_data_dir()
logger.info(f'-------------------------------------------------')
logger.info(f'Background @given: T8S_WORKSPACE_DIR = {context.T8S_WORKSPACE_DIR}')
logger.info(f'Background@given: CSV_PATH = {context.CSV_PATH}')
logger.info(f'Background@given: PARQUET_PATH = {context.PARQUET_PATH}')
logger.info(f'-------------------------------------------------')
# A forma de passar estes dados para os steps seguintes é usando o objeto context

"""
Scenario: Create 2 time series with sample data using Dataframes and save in CSV files at T8S_WORKSPACE/data/csv directory
Given a start timestamp, a number of records and a time interval
When I create 2 time series using Dataframes with sample data
Then I have a time series with the `correct` number of rows and columns, schema and time interval
And I have a CSV file in T8S_WORKSPACE/data/csv correctelly formated
# Constraint: The first Dataframe doesn't have nulls or invalid values, but the second does
"""

@given(u'a start timestamp, a number of records and a time interval')
def create_dataframe(context):
start_timestamp = datetime(2022, 1, 1, 0, 0, 0)
Expand Down Expand Up @@ -79,7 +84,7 @@ def create_time_series(context):
df1 = context.dataframe1
context.ts1 = TimeSerie(df1, format='wide', features_qty=len(df1.columns))
df2 = context.dataframe2
context.ts2 = TimeSerie(df2, format='wide', features_qty=len(df1.columns))
context.ts2 = TimeSerie(df2, format='wide', features_qty=len(df2.columns))

@then(u'I have a time series with the `correct` number of rows and columns, schema and time interval')
def check_schema(context):
Expand Down Expand Up @@ -135,7 +140,7 @@ def write_ts_to_parquet_file(ts, parquet_path, filename: str):
# Grava a série temporal ts2 em parquet
# write_ts_to_parquet_file(context.ts2, context.PARQUET_PATH, 'ts_02.parquet')

@then(u'I have a text representation for the first time serie like this:')
@then(u'I have a text representation for the first time serie like this below')
def check_text(context):
logger.debug(f'check_text : context.text -> \n{str(context.text)}')
logger.debug(f'\ncheck_text : context.ts1 -> \n{str(context.ts1)}')
Expand All @@ -148,3 +153,55 @@ def assert_line_by_line(context):
assert_line_by_line(context)
logger.info('\ncheck_text passed !')

# -----------------------------------------------------------------------------------------------

"""
Scenario: Second, I create 1 time series at T8S_WORKSPACE/data directory using a literal sample data
Given a the table below as input
| datetime | float | float |
| timestamp | temperatura | velocidade |
| 2022-01-01 00:00:00 | 3.0 | 30.0 |
| 2022-01-01 01:00:00 | 4.1 | 1124 |
| 2022-01-01 02:00:00 | 5.2 | 3276 |
When converted to a data frame using 1 row as column names and 1 column as index
And printed using data_frame_to_table function
Then I build a time series with the `correct` number of rows and columns
"""

use_step_matcher("parse")

@given(u'a the table below as input')
def the_table(context):
table: Table = context.table
context.input = table
table.headings
logger.info('the_table() called ...\n' + str(context.input) + ' ->' + str(type(context.input)))
logger.info(f'{table.headings}')
for row in table.rows:
logger.info(f'{row}')
logger.info(f'the_table() passed !')

@when(u'converted to a data frame using {column_levels:d} row as column names and {index_levels:d} column as index')
def to_dataframe(context, column_levels, index_levels):
parsed: pd.DataFrame = table_to_dataframe(context.input, column_levels=column_levels)
for idx, col in enumerate(parsed.columns):
logger.info(f'{idx}\t-> {col}, {type(parsed[col])}, {type(parsed[col][0])}')
context.parsed = parsed
logger.info(f'context.parsed =\n{context.parsed}\nType of context.parsed =\n{type(context.parsed)}')
logger.info(f'context.parsed.columns = {context.parsed.columns}')
logger.info(f'Test of to_dataframe passed !')

@when(u'printed using data_frame_to_table function')
def print_to_table(context):
table = dataframe_to_table(context.parsed)
assert isinstance(table, str)
logger.info(f'table =\n{table}\nType of table = {type(table)}')
logger.info(f'Test of print_to_table passed !')

@then(u'I build a time series with the `correct` number of rows and columns')
def table_to_ts(context):
df3: pd.DataFrame = context.parsed
logger.info(f'parsed =\n{df3}\nType of parsed = {type(df3)}')
context.ts3 = TimeSerie(df3, format='wide', features_qty=len(df3.columns))
logger.info(f'context.ts3 =\n{context.ts3}')
logger.info(f'Test of table_to_ts passed !')
2 changes: 1 addition & 1 deletion src/t8s/__about__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2023-present João Antonio Ferreira <[email protected]>
#
# SPDX-License-Identifier: MIT
__version__ = "0.1.11"
__version__ = "0.1.12"

0 comments on commit 1725615

Please sign in to comment.