Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace exogenous example #1108

Merged
merged 2 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 56 additions & 98 deletions examples/advanced/time_series_forecasting/exogenous.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,23 @@
import timeit
import os
import warnings

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

from fedot.api.main import Fedot
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.data.multi_modal import MultiModalData
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TsForecastingParams, TaskTypesEnum
from fedot.core.utils import fedot_project_root
from fedot.core.utils import set_random_seed

warnings.filterwarnings('ignore')
np.random.seed(2020)


def make_forecast(pipeline, train: InputData, predict: InputData,
train_exog: InputData, predict_exog: InputData):
"""
Function for predicting values in a time series

:return predicted_values: numpy array, forecast of model
"""

# Fit it
start_time = timeit.default_timer()

second_node_name = 'exog_ts'

if train_exog is None:
second_node_name = 'data_source_ts/2'
train_exog = train
predict_exog = predict

train_dataset = MultiModalData({
'data_source_ts/1': train,
second_node_name: train_exog})

predict_dataset = MultiModalData({
'data_source_ts/1': predict,
second_node_name: predict_exog})

pipeline.fit_from_scratch(train_dataset)
amount_of_seconds = timeit.default_timer() - start_time

print(f'\nIt takes {amount_of_seconds:.2f} seconds to train pipeline\n')

# Predict
predicted_values = pipeline.predict(predict_dataset)
predicted_values = predicted_values.predict

return predicted_values


def run_exogenous_experiment(path_to_file, len_forecast=250, with_exog=True,
visualization=False) -> None:
def run_exogenous_experiment(path_to_file, len_forecast=250, with_exog=True, visualization=False) -> np.array:
""" Function with example how time series forecasting can be made with using
exogenous features

Expand All @@ -68,72 +27,71 @@ def run_exogenous_experiment(path_to_file, len_forecast=250, with_exog=True,
:param visualization: is it needed to make visualizations
"""

# Read the file
df = pd.read_csv(path_to_file)
time_series = np.array(df['Level'])
exog_variable = np.array(df['Neighboring level'])

task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=len_forecast))
# Source time series
train_input, predict_input = train_test_data_setup(InputData(idx=range(len(time_series)),
features=time_series,
target=time_series,
task=task,
data_type=DataTypesEnum.ts))

# Exogenous time series
predict_input_exog = InputData(idx=np.arange(len(exog_variable)),
features=exog_variable, target=time_series,
task=task, data_type=DataTypesEnum.ts)

train_input_exog, predict_input_exog = train_test_data_setup(predict_input_exog)
valiadion_blocks = 2

if with_exog is True:
# Example with exogenous time series
node_source = PipelineNode('data_source_ts/1')
node_lagged = PipelineNode('lagged', nodes_from=[node_source])
# Target time series for lagged transformation
train_lagged, predict_lagged = train_test_data_setup(InputData(idx=np.arange(len(time_series)),
features=time_series,
target=time_series,
task=task,
data_type=DataTypesEnum.ts),
validation_blocks=valiadion_blocks)

node_exog = PipelineNode('exog_ts')

node_final = PipelineNode('ridge', nodes_from=[node_lagged, node_exog])
pipeline = Pipeline(node_final)
# Exogenous time series
train_exog, predict_exog = train_test_data_setup(InputData(idx=np.arange(len(exog_variable)),
features=exog_variable,
target=time_series,
task=task,
data_type=DataTypesEnum.ts),
validation_blocks=valiadion_blocks)

if with_exog:
train_dataset = MultiModalData({
'lagged': train_lagged,
'exog_ts': train_exog
})

predict_dataset = MultiModalData({
'lagged': predict_lagged,
'exog_ts': predict_exog
})

# Create a pipeline with different data sources in th nodes
pipeline = PipelineBuilder().add_node('lagged', 0).add_node('exog_ts', 1).join_branches('ridge').build()
else:
# Simple example without exogenous time series
node_source_1 = PipelineNode('data_source_ts/1')
node_source_2 = PipelineNode('data_source_ts/2')

node_lagged_1 = PipelineNode('lagged', nodes_from=[node_source_1])
node_lagged_2 = PipelineNode('lagged', nodes_from=[node_source_2])
train_dataset = train_lagged
predict_dataset = predict_lagged

node_ridge_1 = PipelineNode('ridge', nodes_from=[node_lagged_1])
node_ridge_2 = PipelineNode('ridge', nodes_from=[node_lagged_2])
node_final = PipelineNode('ridge', nodes_from=[node_ridge_1, node_ridge_2])
train_input_exog = None
predict_input_exog = None
pipeline = Pipeline(node_final)

predicted = make_forecast(pipeline, train_input, predict_input, train_input_exog, predict_input_exog)

predicted = np.ravel(np.array(predicted))
test_data = np.ravel(predict_input.target)
# Simple example without exogenous time series
pipeline = PipelineBuilder().add_sequence('lagged', 'ridge').build()

print(f'Predicted values: {predicted[:5]}')
print(f'Actual values: {test_data[:5]}')
# Fit it
fedot = Fedot(problem='ts_forecasting',
task_params=task.task_params,
timeout=10,
initial_assumption=pipeline,
max_pipeline_fit_time=1,
n_jobs=-1)
fedot.fit(train_dataset)

mse_before = mean_squared_error(test_data, predicted, squared=False)
mae_before = mean_absolute_error(test_data, predicted)
print(f'RMSE - {mse_before:.4f}')
print(f'MAE - {mae_before:.4f}\n')
# Predict
predicted = fedot.predict(predict_dataset, validation_blocks=valiadion_blocks)
print(fedot.get_metrics(metric_names='mae', validation_blocks=valiadion_blocks))

if visualization:
plt.plot(range(0, len(time_series)), time_series, label='Actual time series')
plt.plot(range(len(train_input.target), len(time_series)), predicted, label='Forecast')
plt.legend()
plt.grid()
plt.show()
fedot.current_pipeline.show()
# Plot predictions and true values
fedot.plot_prediction(target='lagged')

return predicted

if __name__ == '__main__':
set_random_seed(2020)

data_path = fedot_project_root().joinpath('examples/data/ts', 'ts_sea_level.csv')
if __name__ == '__main__':
data_path = os.path.join(f'{fedot_project_root()}', 'examples/data/ts', 'ts_sea_level.csv')
run_exogenous_experiment(path_to_file=data_path, len_forecast=250, with_exog=True, visualization=True)
2 changes: 1 addition & 1 deletion fedot/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def get_metrics(self,

if target is not None:
if self.test_data is None:
self.test_data = InputData(idx=range(len(self.prediction.predict)),
self.test_data = InputData(idx=np.arange(len(self.prediction.predict)),
features=None,
target=target[:len(self.prediction.predict)],
task=self.train_data.task,
Expand Down
5 changes: 0 additions & 5 deletions fedot/core/pipelines/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,6 @@ def _assign_data_to_nodes(self, input_data: Union[InputData, MultiModalData]) ->
node.node_data = input_data[node.operation.operation_type]
node.direct_set = True
else:
print(f'Node info: operation={node.operation}, operation_type{node.operation.operation_type},'
f' input_data{input_data}, all_nodes={self.nodes},'
f' pipeline_nodes={[node for node in self.nodes if isinstance(node, PipelineNode)]},',
f' true_node_types={[type(node) for node in self.nodes]},',
f' primary_nodes={[node for node in self.nodes if node.is_primary]}')
raise ValueError(f'No data for primary node {node}')
return None
return input_data
Expand Down
Loading