diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 7bdf16dd0a..eb9638a7f6 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -735,25 +735,11 @@ def ts_to_table(idx, time_series: np.array, window_size: int, is_lag: bool = Fal ``updated_idx`` -> clipped indices of time series\n ``features_columns`` -> lagged time series feature table """ - # Convert data to lagged form - lagged_dataframe = pd.DataFrame({'t_id': time_series}) - vals = lagged_dataframe['t_id'] - for i in range(1, window_size): - frames = [lagged_dataframe, vals.shift(i)] - lagged_dataframe = pd.concat(frames, axis=1) - - # Remove incomplete rows - lagged_dataframe.dropna(inplace=True) - - transformed = np.array(lagged_dataframe) - - # Generate dataset with features - features_columns = np.fliplr(transformed) + _temp = [time_series[i:-(window_size - i - 1)] for i in range(window_size - 1)] + [time_series[window_size - 1:]] + features_columns = np.array(_temp).T if is_lag: - updated_idx = list(idx[window_size:]) - updated_idx.append(idx[-1]) - updated_idx = np.array(updated_idx) + updated_idx = np.concatenate([idx[window_size:], idx[-1:]]) else: updated_idx = idx[:len(idx) - window_size + 1] @@ -848,16 +834,9 @@ def prepare_target(all_idx, idx, features_columns: np.array, target, forecast_le # Multi-target transformation if forecast_length > 1: - # Target transformation - df = pd.DataFrame({'t_id': ts_target}) - vals = df['t_id'] - for i in range(1, forecast_length): - frames = [df, vals.shift(-i)] - df = pd.concat(frames, axis=1) - - # Remove incomplete rows - df.dropna(inplace=True) - updated_target = np.array(df) + _temp = ([ts_target[i:-(forecast_length - i - 1)] for i in range(forecast_length - 1)] + + [ts_target[forecast_length - 1:]]) + updated_target = np.array(_temp).T updated_idx = idx[: -forecast_length + 1] updated_features = features_columns[: -forecast_length] diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py index aa27213e30..3c7b071fe1 100644 --- a/test/unit/data_operations/test_time_series_operations.py +++ b/test/unit/data_operations/test_time_series_operations.py @@ -1,4 +1,7 @@ import numpy as np +import pytest + +from fedot.core.data.data_split import train_test_data_setup from golem.core.log import default_log from fedot.core.data.data import InputData @@ -42,6 +45,27 @@ def synthetic_univariate_ts(): return train_input, predict_input, ts_test +def get_timeseries(length=10, features_count=1, + target_count=1, forecast_length=_FORECAST_LENGTH): + task = Task(TaskTypesEnum.ts_forecasting, + TsForecastingParams(forecast_length=forecast_length)) + features = np.arange(0, length * features_count) * 10 + if features_count > 1: + features = np.reshape(features, (features_count, length)).T + for i in range(features_count): + features[:, i] += i + target = np.arange(0, length * target_count) * 100 + if target_count > 1: + target = np.reshape(target, (target_count, length)).T + + train_input = InputData(idx=np.arange(0, length), + features=features, + target=target, + task=task, + data_type=DataTypesEnum.ts) + return train_input + + def synthetic_with_exogenous_ts(): """ Method returns InputData for time series forecasting task with exogenous variable """ @@ -190,3 +214,27 @@ def test_forecast_with_exog(): prediction = np.ravel(np.array(forecast.predict)) assert tuple(prediction) == tuple(ts_test) + + +@pytest.mark.parametrize(('length', 'features_count', 'target_count', 'window_size'), + [(10 + _FORECAST_LENGTH * 2, 1, 1, 5), + (10 + _FORECAST_LENGTH * 2, 2, 1, 5), + ]) +def test_lagged_node(length, features_count, target_count, window_size): + data = get_timeseries(length=length, features_count=features_count, target_count=target_count) + train, test = train_test_data_setup(data, split_ratio=0.5) + forecast_length = data.task.task_params.forecast_length + node = PipelineNode('lagged') + node.parameters = {'window_size': window_size} + fit_res = node.fit(train) + + assert np.all(fit_res.idx == train.idx[window_size:-forecast_length + 1]) + assert np.all(np.ravel(fit_res.features[0, :]) == + np.reshape(train.features[:window_size].T, (-1, ))) + assert np.all(np.ravel(fit_res.features[-1, :]) == + np.reshape(train.features[:-forecast_length][-window_size:].T, (-1, ))) + assert np.all(fit_res.target[0, :] == train.target[window_size:window_size + forecast_length]) + assert np.all(fit_res.target[-1, :] == train.target[-forecast_length:]) + + predict = node.predict(test) + assert np.all(predict.predict[-1, :] == np.reshape(test.features[-window_size:].T, (-1, )))