diff --git a/botorch/models/gp_regression_fidelity.py b/botorch/models/gp_regression_fidelity.py index 7b300f7a21..2ff0a64cd5 100644 --- a/botorch/models/gp_regression_fidelity.py +++ b/botorch/models/gp_regression_fidelity.py @@ -25,7 +25,9 @@ from __future__ import annotations -from typing import Any, Dict, List, Optional, Tuple +import warnings + +from typing import Any, Dict, List, Optional, Tuple, Union import torch from botorch.exceptions.errors import UnsupportedError @@ -58,7 +60,7 @@ class SingleTaskMultiFidelityGP(SingleTaskGP): Example: >>> train_X = torch.rand(20, 4) >>> train_Y = train_X.pow(2).sum(dim=-1, keepdim=True) - >>> model = SingleTaskMultiFidelityGP(train_X, train_Y, data_fidelity=3) + >>> model = SingleTaskMultiFidelityGP(train_X, train_Y, data_fidelities=[3]) """ def __init__( @@ -66,6 +68,7 @@ def __init__( train_X: Tensor, train_Y: Tensor, iteration_fidelity: Optional[int] = None, + data_fidelities: Optional[Union[List[int], Tuple[int]]] = None, data_fidelity: Optional[int] = None, linear_truncated: bool = True, nu: float = 2.5, @@ -81,8 +84,11 @@ def __init__( train_Y: A `batch_shape x n x m` tensor of training observations. iteration_fidelity: The column index for the training iteration fidelity parameter (optional). + data_fidelities: The column indices for the downsampling fidelity parameter. + If a list/tuple of indices is provided, a kernel will be constructed for + each index (optional). data_fidelity: The column index for the downsampling fidelity parameter - (optional). + (optional). Deprecated in favor of `data_fidelities`. linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead of the default kernel. nu: The smoothness parameter for the Matern kernel: either 1/2, 3/2, or @@ -96,14 +102,26 @@ def __init__( input_transform: An input transform that is applied in the model's forward pass. """ + if data_fidelity is not None: + warnings.warn( + "The `data_fidelity` argument is deprecated and will be removed in " + "a future release. Please use `data_fidelities` instead.", + DeprecationWarning, + ) + if data_fidelities is not None: + raise ValueError( + "Cannot specify both `data_fidelity` and `data_fidelities`." + ) + data_fidelities = [data_fidelity] + self._init_args = { "iteration_fidelity": iteration_fidelity, - "data_fidelity": data_fidelity, + "data_fidelities": data_fidelities, "linear_truncated": linear_truncated, "nu": nu, "outcome_transform": outcome_transform, } - if iteration_fidelity is None and data_fidelity is None: + if iteration_fidelity is None and data_fidelities is None: raise UnsupportedError( "SingleTaskMultiFidelityGP requires at least one fidelity parameter." ) @@ -117,7 +135,7 @@ def __init__( dim=transformed_X.size(-1), aug_batch_shape=self._aug_batch_shape, iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, linear_truncated=linear_truncated, nu=nu, ) @@ -150,11 +168,8 @@ def construct_inputs( training_data: Dictionary of `SupervisedDataset`. fidelity_features: Index of fidelity parameter as input columns. """ - if len(fidelity_features) != 1: - raise UnsupportedError("Multiple fidelity features not supported.") - inputs = super().construct_inputs(training_data=training_data, **kwargs) - inputs["data_fidelity"] = fidelity_features[0] + inputs["data_fidelities"] = fidelity_features return inputs @@ -175,7 +190,7 @@ class FixedNoiseMultiFidelityGP(FixedNoiseGP): >>> train_X, >>> train_Y, >>> train_Yvar, - >>> data_fidelity=3, + >>> data_fidelities=[3], >>> ) """ @@ -185,6 +200,7 @@ def __init__( train_Y: Tensor, train_Yvar: Tensor, iteration_fidelity: Optional[int] = None, + data_fidelities: Optional[Union[List[int], Tuple[int]]] = None, data_fidelity: Optional[int] = None, linear_truncated: bool = True, nu: float = 2.5, @@ -200,8 +216,11 @@ def __init__( train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise. iteration_fidelity: The column index for the training iteration fidelity parameter (optional). + data_fidelities: The column indices for the downsampling fidelity parameter. + If a list of indices is provided, a kernel will be constructed for + each index (optional). data_fidelity: The column index for the downsampling fidelity parameter - (optional). + (optional). Deprecated in favor of `data_fidelities`. linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead of the default kernel. nu: The smoothness parameter for the Matern kernel: either 1/2, 3/2, or @@ -213,7 +232,26 @@ def __init__( input_transform: An input transform that is applied in the model's forward pass. """ - if iteration_fidelity is None and data_fidelity is None: + if data_fidelity is not None: + warnings.warn( + "The `data_fidelity` argument is deprecated and will be removed in " + "a future release. Please use `data_fidelities` instead.", + DeprecationWarning, + ) + if data_fidelities is not None: + raise ValueError( + "Cannot specify both `data_fidelity` and `data_fidelities`." + ) + data_fidelities = [data_fidelity] + + self._init_args = { + "iteration_fidelity": iteration_fidelity, + "data_fidelities": data_fidelities, + "linear_truncated": linear_truncated, + "nu": nu, + "outcome_transform": outcome_transform, + } + if iteration_fidelity is None and data_fidelities is None: raise UnsupportedError( "FixedNoiseMultiFidelityGP requires at least one fidelity parameter." ) @@ -226,7 +264,7 @@ def __init__( dim=transformed_X.size(-1), aug_batch_shape=self._aug_batch_shape, iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, linear_truncated=linear_truncated, nu=nu, ) @@ -259,11 +297,8 @@ def construct_inputs( training_data: Dictionary of `SupervisedDataset`. fidelity_features: Column indices of fidelity features. """ - if len(fidelity_features) != 1: - raise UnsupportedError("Multiple fidelity features not supported.") - inputs = super().construct_inputs(training_data=training_data, **kwargs) - inputs["data_fidelity"] = fidelity_features[0] + inputs["data_fidelities"] = fidelity_features return inputs @@ -271,7 +306,7 @@ def _setup_multifidelity_covar_module( dim: int, aug_batch_shape: torch.Size, iteration_fidelity: Optional[int], - data_fidelity: Optional[int], + data_fidelities: Optional[List[int]], linear_truncated: bool, nu: float, ) -> Tuple[ScaleKernel, Dict]: @@ -284,7 +319,7 @@ def _setup_multifidelity_covar_module( `BatchedMultiOutputGPyTorchModel`. iteration_fidelity: The column index for the training iteration fidelity parameter (optional). - data_fidelity: The column index for the downsampling fidelity parameter + data_fidelities: The column indices for the downsampling fidelity parameters (optional). linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead of the default kernel. @@ -297,76 +332,100 @@ def _setup_multifidelity_covar_module( if iteration_fidelity is not None and iteration_fidelity < 0: iteration_fidelity = dim + iteration_fidelity - if data_fidelity is not None and data_fidelity < 0: - data_fidelity = dim + data_fidelity + if data_fidelities is not None: + for i in range(len(data_fidelities)): + if data_fidelities[i] < 0: + data_fidelities[i] = dim + data_fidelities[i] + + kernels = [] if linear_truncated: - fidelity_dims = [ - i for i in (iteration_fidelity, data_fidelity) if i is not None - ] - kernel = LinearTruncatedFidelityKernel( - fidelity_dims=fidelity_dims, - dimension=dim, - nu=nu, - batch_shape=aug_batch_shape, - power_prior=GammaPrior(3.0, 3.0), + leading_dims = [iteration_fidelity] if iteration_fidelity is not None else [] + trailing_dims = ( + [[i] for i in data_fidelities] if data_fidelities is not None else [[]] ) + for tdims in trailing_dims: + kernels.append( + LinearTruncatedFidelityKernel( + fidelity_dims=leading_dims + tdims, + dimension=dim, + nu=nu, + batch_shape=aug_batch_shape, + power_prior=GammaPrior(3.0, 3.0), + ) + ) else: - active_dimsX = [ - i for i in range(dim) if i not in {iteration_fidelity, data_fidelity} - ] - kernel = RBFKernel( - ard_num_dims=len(active_dimsX), - batch_shape=aug_batch_shape, - lengthscale_prior=GammaPrior(3.0, 6.0), - active_dims=active_dimsX, - ) - additional_kernels = [] + non_active_dims = set(data_fidelities or []) if iteration_fidelity is not None: - exp_kernel = ExponentialDecayKernel( + non_active_dims.add(iteration_fidelity) + active_dimsX = sorted(set(range(dim)) - non_active_dims) + kernels.append( + RBFKernel( + ard_num_dims=len(active_dimsX), batch_shape=aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), - offset_prior=GammaPrior(3.0, 6.0), - power_prior=GammaPrior(3.0, 6.0), - active_dims=[iteration_fidelity], + active_dims=active_dimsX, ) - additional_kernels.append(exp_kernel) - if data_fidelity is not None: - ds_kernel = DownsamplingKernel( - batch_shape=aug_batch_shape, - offset_prior=GammaPrior(3.0, 6.0), - power_prior=GammaPrior(3.0, 6.0), - active_dims=[data_fidelity], + ) + if iteration_fidelity is not None: + kernels.append( + ExponentialDecayKernel( + batch_shape=aug_batch_shape, + lengthscale_prior=GammaPrior(3.0, 6.0), + offset_prior=GammaPrior(3.0, 6.0), + power_prior=GammaPrior(3.0, 6.0), + active_dims=[iteration_fidelity], + ) ) - additional_kernels.append(ds_kernel) - kernel = ProductKernel(kernel, *additional_kernels) + if data_fidelities is not None: + for data_fidelity in data_fidelities: + kernels.append( + DownsamplingKernel( + batch_shape=aug_batch_shape, + offset_prior=GammaPrior(3.0, 6.0), + power_prior=GammaPrior(3.0, 6.0), + active_dims=[data_fidelity], + ) + ) + + kernel = ProductKernel(*kernels) covar_module = ScaleKernel( kernel, batch_shape=aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15) ) + key_prefix = "covar_module.base_kernel.kernels" if linear_truncated: - subset_batch_dict = { - "covar_module.base_kernel.raw_power": -2, - "covar_module.base_kernel.covar_module_unbiased.raw_lengthscale": -3, - "covar_module.base_kernel.covar_module_biased.raw_lengthscale": -3, - } + subset_batch_dict = {} + for i in range(len(kernels)): + subset_batch_dict.update( + { + f"{key_prefix}.{i}.raw_power": -2, + f"{key_prefix}.{i}.covar_module_unbiased.raw_lengthscale": -3, + f"{key_prefix}.{i}.covar_module_biased.raw_lengthscale": -3, + } + ) else: subset_batch_dict = { - "covar_module.base_kernel.kernels.0.raw_lengthscale": -3, - "covar_module.base_kernel.kernels.1.raw_power": -2, - "covar_module.base_kernel.kernels.1.raw_offset": -2, + f"{key_prefix}.0.raw_lengthscale": -3, } + if iteration_fidelity is not None: - subset_batch_dict = { - "covar_module.base_kernel.kernels.1.raw_lengthscale": -3, - **subset_batch_dict, - } - if data_fidelity is not None: - subset_batch_dict = { - "covar_module.base_kernel.kernels.2.raw_power": -2, - "covar_module.base_kernel.kernels.2.raw_offset": -2, - **subset_batch_dict, + subset_batch_dict.update( + { + f"{key_prefix}.1.raw_power": -2, + f"{key_prefix}.1.raw_offset": -2, + f"{key_prefix}.1.raw_lengthscale": -3, } + ) + if data_fidelities is not None: + start_idx = 2 if iteration_fidelity is not None else 1 + for i in range(start_idx, len(data_fidelities) + start_idx): + subset_batch_dict.update( + { + f"{key_prefix}.{i}.raw_power": -2, + f"{key_prefix}.{i}.raw_offset": -2, + } + ) return covar_module, subset_batch_dict diff --git a/test/models/test_gp_regression_fidelity.py b/test/models/test_gp_regression_fidelity.py index ad5f748dd3..778a829b5b 100644 --- a/test/models/test_gp_regression_fidelity.py +++ b/test/models/test_gp_regression_fidelity.py @@ -33,7 +33,7 @@ def _get_random_data_with_fidelity( batch_shape: torch.Size, m: int, n_fidelity: int, d: int = 1, n: int = 10, **tkwargs ) -> Tuple[Tensor, Tensor]: r"""Construct test data. - For this test, by convention the trailing dimesions are the fidelity dimensions + For this test, by convention the trailing dimensions are the fidelity dimensions """ train_x, train_y = _get_random_data( batch_shape=batch_shape, m=m, d=d, n=n, **tkwargs @@ -46,12 +46,21 @@ def _get_random_data_with_fidelity( class TestSingleTaskMultiFidelityGP(BotorchTestCase): - FIDELITY_TEST_PAIRS = ((None, 1), (1, None), (None, -1), (-1, None), (1, 2)) + FIDELITY_TEST_PAIRS = ( + (None, [1]), + (1, None), + (None, [-1]), + (-1, None), + (1, [2]), + (1, [2, 3]), + (None, [1, 2]), + (-1, [1, -2]), + ) def _get_model_and_data( self, iteration_fidelity, - data_fidelity, + data_fidelities, batch_shape, m, lin_truncated, @@ -59,17 +68,23 @@ def _get_model_and_data( input_transform=None, **tkwargs, ): - n_fidelity = (iteration_fidelity is not None) + (data_fidelity is not None) + model_kwargs = {} + n_fidelity = iteration_fidelity is not None + if data_fidelities is not None: + n_fidelity += len(data_fidelities) + model_kwargs["data_fidelities"] = data_fidelities train_X, train_Y = _get_random_data_with_fidelity( batch_shape=batch_shape, m=m, n_fidelity=n_fidelity, **tkwargs ) - model_kwargs = { - "train_X": train_X, - "train_Y": train_Y, - "iteration_fidelity": iteration_fidelity, - "data_fidelity": data_fidelity, - "linear_truncated": lin_truncated, - } + model_kwargs.update( + { + "train_X": train_X, + "train_Y": train_Y, + "iteration_fidelity": iteration_fidelity, + "linear_truncated": lin_truncated, + } + ) + if outcome_transform is not None: model_kwargs["outcome_transform"] = outcome_transform if input_transform is not None: @@ -85,10 +100,20 @@ def test_init_error(self): SingleTaskMultiFidelityGP( train_X, train_Y, linear_truncated=lin_truncated ) + with self.assertRaises(ValueError): + SingleTaskMultiFidelityGP( + train_X, train_Y, data_fidelities=[1], data_fidelity=2 + ) + with self.assertWarnsRegex(DeprecationWarning, "data_fidelity"): + SingleTaskMultiFidelityGP( + train_X, train_Y, data_fidelity=1, linear_truncated=False + ) def test_gp(self): - for (iteration_fidelity, data_fidelity) in self.FIDELITY_TEST_PAIRS: - num_dim = 1 + (iteration_fidelity is not None) + (data_fidelity is not None) + for (iteration_fidelity, data_fidelities) in self.FIDELITY_TEST_PAIRS: + num_dim = 1 + (iteration_fidelity is not None) + if data_fidelities is not None: + num_dim += len(data_fidelities) bounds = torch.zeros(2, num_dim) bounds[1] = 1 for ( @@ -111,7 +136,7 @@ def test_gp(self): intf = Normalize(d=num_dim, bounds=bounds) if use_intf else None model, model_kwargs = self._get_model_and_data( iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, batch_shape=batch_shape, m=m, lin_truncated=lin_trunc, @@ -146,10 +171,13 @@ def test_gp(self): # test param sizes params = dict(model.named_parameters()) - for p in params: - self.assertEqual( - params[p].numel(), m * torch.tensor(batch_shape).prod().item() - ) + + if data_fidelities is not None and len(data_fidelities) == 1: + for p in params: + self.assertEqual( + params[p].numel(), + m * torch.tensor(batch_shape).prod().item(), + ) # test posterior # test non batch evaluation @@ -185,8 +213,10 @@ def test_gp(self): self.assertAllClose(posterior.variance, expected_var) def test_condition_on_observations(self): - for (iteration_fidelity, data_fidelity) in self.FIDELITY_TEST_PAIRS: - n_fidelity = (iteration_fidelity is not None) + (data_fidelity is not None) + for (iteration_fidelity, data_fidelities) in self.FIDELITY_TEST_PAIRS: + n_fidelity = iteration_fidelity is not None + if data_fidelities is not None: + n_fidelity += len(data_fidelities) num_dim = 1 + n_fidelity for batch_shape, m, dtype, lin_trunc in itertools.product( (torch.Size(), torch.Size([2])), @@ -197,7 +227,7 @@ def test_condition_on_observations(self): tkwargs = {"device": self.device, "dtype": dtype} model, model_kwargs = self._get_model_and_data( iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, batch_shape=batch_shape, m=m, lin_truncated=lin_trunc, @@ -262,7 +292,7 @@ def test_condition_on_observations(self): for k, v in model_kwargs.items(): if k in ( "iteration_fidelity", - "data_fidelity", + "data_fidelities", "linear_truncated", "input_transform", ): @@ -305,8 +335,10 @@ def test_condition_on_observations(self): ) def test_fantasize(self): - for (iteration_fidelity, data_fidelity) in self.FIDELITY_TEST_PAIRS: - n_fidelity = (iteration_fidelity is not None) + (data_fidelity is not None) + for (iteration_fidelity, data_fidelities) in self.FIDELITY_TEST_PAIRS: + n_fidelity = iteration_fidelity is not None + if data_fidelities is not None: + n_fidelity += len(data_fidelities) num_dim = 1 + n_fidelity for batch_shape, m, dtype, lin_trunc in itertools.product( (torch.Size(), torch.Size([2])), @@ -317,7 +349,7 @@ def test_fantasize(self): tkwargs = {"device": self.device, "dtype": dtype} model, model_kwargs = self._get_model_and_data( iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, batch_shape=batch_shape, m=m, lin_truncated=lin_trunc, @@ -334,8 +366,10 @@ def test_fantasize(self): self.assertIsInstance(fm, model.__class__) def test_subset_model(self): - for (iteration_fidelity, data_fidelity) in self.FIDELITY_TEST_PAIRS: - num_dim = 1 + (iteration_fidelity is not None) + (data_fidelity is not None) + for (iteration_fidelity, data_fidelities) in self.FIDELITY_TEST_PAIRS: + num_dim = 1 + (iteration_fidelity is not None) + if data_fidelities is not None: + num_dim += len(data_fidelities) for batch_shape, dtype, lin_trunc in itertools.product( (torch.Size(), torch.Size([2])), (torch.float, torch.double), @@ -344,7 +378,7 @@ def test_subset_model(self): tkwargs = {"device": self.device, "dtype": dtype} model, _ = self._get_model_and_data( iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, batch_shape=batch_shape, m=2, lin_truncated=lin_trunc, @@ -367,7 +401,7 @@ def test_subset_model(self): ) def test_construct_inputs(self): - for (iteration_fidelity, data_fidelity) in self.FIDELITY_TEST_PAIRS: + for (iteration_fidelity, data_fidelities) in self.FIDELITY_TEST_PAIRS: for batch_shape, dtype, lin_trunc in itertools.product( (torch.Size(), torch.Size([2])), (torch.float, torch.double), @@ -376,7 +410,7 @@ def test_construct_inputs(self): tkwargs = {"device": self.device, "dtype": dtype} model, kwargs = self._get_model_and_data( iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, batch_shape=batch_shape, m=1, lin_truncated=lin_trunc, @@ -388,13 +422,9 @@ def test_construct_inputs(self): with self.assertRaisesRegex(TypeError, "argument: 'fidelity_features'"): model.construct_inputs(training_data) - # multiple fidelity features - with self.assertRaisesRegex(UnsupportedError, "Multiple fidelity f"): - model.construct_inputs(training_data, fidelity_features=[0, 1]) - data_dict = model.construct_inputs(training_data, fidelity_features=[1]) - self.assertTrue("data_fidelity" in data_dict) - self.assertEqual(data_dict["data_fidelity"], 1) + self.assertTrue("data_fidelities" in data_dict) + self.assertEqual(data_dict["data_fidelities"], [1]) self.assertTrue(kwargs["train_X"].equal(data_dict["train_X"])) self.assertTrue(kwargs["train_Y"].equal(data_dict["train_Y"])) @@ -403,7 +433,7 @@ class TestFixedNoiseMultiFidelityGP(TestSingleTaskMultiFidelityGP): def _get_model_and_data( self, iteration_fidelity, - data_fidelity, + data_fidelities, batch_shape, m, lin_truncated, @@ -411,19 +441,24 @@ def _get_model_and_data( input_transform=None, **tkwargs, ): - n_fidelity = (iteration_fidelity is not None) + (data_fidelity is not None) + model_kwargs = {} + n_fidelity = iteration_fidelity is not None + if data_fidelities is not None: + n_fidelity += len(data_fidelities) + model_kwargs["data_fidelities"] = data_fidelities train_X, train_Y = _get_random_data_with_fidelity( batch_shape=batch_shape, m=m, n_fidelity=n_fidelity, **tkwargs ) train_Yvar = torch.full_like(train_Y, 0.01) - model_kwargs = { - "train_X": train_X, - "train_Y": train_Y, - "train_Yvar": train_Yvar, - "iteration_fidelity": iteration_fidelity, - "data_fidelity": data_fidelity, - "linear_truncated": lin_truncated, - } + model_kwargs.update( + { + "train_X": train_X, + "train_Y": train_Y, + "train_Yvar": train_Yvar, + "iteration_fidelity": iteration_fidelity, + "linear_truncated": lin_truncated, + } + ) if outcome_transform is not None: model_kwargs["outcome_transform"] = outcome_transform if input_transform is not None: @@ -440,9 +475,17 @@ def test_init_error(self): FixedNoiseMultiFidelityGP( train_X, train_Y, train_Yvar, linear_truncated=lin_truncated ) + with self.assertRaises(ValueError): + FixedNoiseMultiFidelityGP( + train_X, train_Y, train_Yvar, data_fidelities=[1], data_fidelity=2 + ) + with self.assertWarnsRegex(DeprecationWarning, "data_fidelity"): + FixedNoiseMultiFidelityGP( + train_X, train_Y, train_Yvar, data_fidelity=1, linear_truncated=False + ) def test_fixed_noise_likelihood(self): - for (iteration_fidelity, data_fidelity) in self.FIDELITY_TEST_PAIRS: + for (iteration_fidelity, data_fidelities) in self.FIDELITY_TEST_PAIRS: for batch_shape, m, dtype, lin_trunc in itertools.product( (torch.Size(), torch.Size([2])), (1, 2), @@ -452,7 +495,7 @@ def test_fixed_noise_likelihood(self): tkwargs = {"device": self.device, "dtype": dtype} model, model_kwargs = self._get_model_and_data( iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, batch_shape=batch_shape, m=m, lin_truncated=lin_trunc, @@ -467,7 +510,7 @@ def test_fixed_noise_likelihood(self): ) def test_construct_inputs(self): - for (iteration_fidelity, data_fidelity) in self.FIDELITY_TEST_PAIRS: + for (iteration_fidelity, data_fidelities) in self.FIDELITY_TEST_PAIRS: for batch_shape, dtype, lin_trunc in itertools.product( (torch.Size(), torch.Size([2])), (torch.float, torch.double), @@ -476,7 +519,7 @@ def test_construct_inputs(self): tkwargs = {"device": self.device, "dtype": dtype} model, kwargs = self._get_model_and_data( iteration_fidelity=iteration_fidelity, - data_fidelity=data_fidelity, + data_fidelities=data_fidelities, batch_shape=batch_shape, m=1, lin_truncated=lin_trunc, @@ -497,12 +540,8 @@ def test_construct_inputs(self): with self.assertRaisesRegex(TypeError, "argument: 'fidelity_features'"): model.construct_inputs(training_data) - # multiple fidelity features - with self.assertRaisesRegex(UnsupportedError, "Multiple fidelity f"): - model.construct_inputs(training_data, fidelity_features=[0, 1]) - data_dict = model.construct_inputs(training_data, fidelity_features=[1]) self.assertTrue("train_Yvar" in data_dict) - self.assertEqual(data_dict.get("data_fidelity", None), 1) + self.assertEqual(data_dict.get("data_fidelities", None), [1]) self.assertTrue(kwargs["train_X"].equal(data_dict["train_X"])) self.assertTrue(kwargs["train_Y"].equal(data_dict["train_Y"])) diff --git a/tutorials/discrete_multi_fidelity_bo.ipynb b/tutorials/discrete_multi_fidelity_bo.ipynb index 81261b87c4..936e5a1f5c 100644 --- a/tutorials/discrete_multi_fidelity_bo.ipynb +++ b/tutorials/discrete_multi_fidelity_bo.ipynb @@ -1,12 +1,4 @@ { - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 2, "cells": [ { "cell_type": "markdown", @@ -32,7 +24,9 @@ }, { "cell_type": "code", + "execution_count": 1, "metadata": {}, + "outputs": [], "source": [ "import os\n", "import torch\n", @@ -43,9 +37,7 @@ " \"device\": torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\"),\n", "}\n", "SMOKE_TEST = os.environ.get(\"SMOKE_TEST\")" - ], - "execution_count": 1, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -58,16 +50,16 @@ }, { "cell_type": "code", + "execution_count": 2, "metadata": {}, + "outputs": [], "source": [ "from botorch.test_functions.multi_fidelity import AugmentedHartmann\n", "\n", "\n", "problem = AugmentedHartmann(negate=True).to(**tkwargs)\n", "fidelities = torch.tensor([0.5, 0.75, 1.0], **tkwargs)" - ], - "execution_count": 2, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -80,7 +72,9 @@ }, { "cell_type": "code", + "execution_count": 3, "metadata": {}, + "outputs": [], "source": [ "from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP\n", "from botorch.models.transforms.outcome import Standardize\n", @@ -100,13 +94,11 @@ " # define a surrogate model suited for a \"training data\"-like fidelity parameter\n", " # in dimension 6, as in [2]\n", " model = SingleTaskMultiFidelityGP(\n", - " train_x, train_obj, outcome_transform=Standardize(m=1), data_fidelity=6\n", + " train_x, train_obj, outcome_transform=Standardize(m=1), data_fidelities=[6]\n", " )\n", " mll = ExactMarginalLogLikelihood(model.likelihood, model)\n", " return mll, model" - ], - "execution_count": 3, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -125,7 +117,9 @@ }, { "cell_type": "code", + "execution_count": 4, "metadata": {}, + "outputs": [], "source": [ "from botorch import fit_gpytorch_mll\n", "from botorch.models.cost import AffineFidelityCostModel\n", @@ -172,9 +166,7 @@ " cost_aware_utility=cost_aware_utility,\n", " project=project,\n", " )" - ], - "execution_count": 4, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -186,7 +178,9 @@ }, { "cell_type": "code", + "execution_count": 5, "metadata": {}, + "outputs": [], "source": [ "from botorch.optim.optimize import optimize_acqf_mixed\n", "\n", @@ -220,9 +214,7 @@ " print(f\"candidates:\\n{new_x}\\n\")\n", " print(f\"observations:\\n{new_obj}\\n\\n\")\n", " return new_x, new_obj, cost" - ], - "execution_count": 5, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -234,12 +226,12 @@ }, { "cell_type": "code", + "execution_count": 6, "metadata": {}, + "outputs": [], "source": [ "train_x, train_obj = generate_initial_data(n=16)" - ], - "execution_count": 6, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -250,43 +242,82 @@ }, { "cell_type": "code", - "metadata": {}, - "source": [ - "cumulative_cost = 0.0\n", - "N_ITER = 3 if not SMOKE_TEST else 1\n", - "\n", - "for i in range(N_ITER):\n", - " mll, model = initialize_model(train_x, train_obj)\n", - " fit_gpytorch_mll(mll)\n", - " mfkg_acqf = get_mfkg(model)\n", - " new_x, new_obj, cost = optimize_mfkg_and_get_observation(mfkg_acqf)\n", - " train_x = torch.cat([train_x, new_x])\n", - " train_obj = torch.cat([train_obj, new_obj])\n", - " cumulative_cost += cost" - ], "execution_count": 7, + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "candidates:\ntensor([[0.199, 0.101, 0.436, 0.433, 0.197, 0.421, 0.750],\n [0.142, 0.274, 0.308, 0.413, 0.298, 0.570, 0.750],\n [0.097, 0.141, 0.417, 0.453, 0.477, 0.536, 0.500],\n [0.123, 0.022, 0.328, 0.430, 0.270, 0.689, 0.500]], device='cuda:0',\n dtype=torch.float64)\n\nobservations:\ntensor([[1.369],\n [2.308],\n [1.404],\n [2.297]], device='cuda:0', dtype=torch.float64)\n\n\n" + "candidates:\n", + "tensor([[0.199, 0.101, 0.436, 0.433, 0.197, 0.421, 0.750],\n", + " [0.142, 0.274, 0.308, 0.413, 0.298, 0.570, 0.750],\n", + " [0.097, 0.141, 0.417, 0.453, 0.477, 0.536, 0.500],\n", + " [0.123, 0.022, 0.328, 0.430, 0.270, 0.689, 0.500]], device='cuda:0',\n", + " dtype=torch.float64)\n", + "\n", + "observations:\n", + "tensor([[1.369],\n", + " [2.308],\n", + " [1.404],\n", + " [2.297]], device='cuda:0', dtype=torch.float64)\n", + "\n", + "\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "candidates:\ntensor([[0.276, 0.159, 0.231, 0.462, 0.295, 0.633, 1.000],\n [0.213, 0.163, 0.297, 0.336, 0.276, 0.671, 0.750],\n [0.029, 0.235, 0.236, 0.405, 0.290, 0.709, 0.500],\n [0.159, 0.205, 0.360, 0.397, 0.361, 0.717, 1.000]], device='cuda:0',\n dtype=torch.float64)\n\nobservations:\ntensor([[2.170],\n [2.984],\n [2.197],\n [2.588]], device='cuda:0', dtype=torch.float64)\n\n\n" + "candidates:\n", + "tensor([[0.276, 0.159, 0.231, 0.462, 0.295, 0.633, 1.000],\n", + " [0.213, 0.163, 0.297, 0.336, 0.276, 0.671, 0.750],\n", + " [0.029, 0.235, 0.236, 0.405, 0.290, 0.709, 0.500],\n", + " [0.159, 0.205, 0.360, 0.397, 0.361, 0.717, 1.000]], device='cuda:0',\n", + " dtype=torch.float64)\n", + "\n", + "observations:\n", + "tensor([[2.170],\n", + " [2.984],\n", + " [2.197],\n", + " [2.588]], device='cuda:0', dtype=torch.float64)\n", + "\n", + "\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "candidates:\ntensor([[0.268, 0.224, 0.340, 0.334, 0.230, 0.751, 0.500],\n [0.263, 0.181, 0.242, 0.307, 0.335, 0.735, 0.500],\n [0.166, 0.163, 0.345, 0.260, 0.278, 0.711, 0.500],\n [0.257, 0.238, 0.337, 0.311, 0.316, 0.639, 0.750]], device='cuda:0',\n dtype=torch.float64)\n\nobservations:\ntensor([[2.565],\n [2.818],\n [3.036],\n [3.036]], device='cuda:0', dtype=torch.float64)\n\n\n" + "candidates:\n", + "tensor([[0.268, 0.224, 0.340, 0.334, 0.230, 0.751, 0.500],\n", + " [0.263, 0.181, 0.242, 0.307, 0.335, 0.735, 0.500],\n", + " [0.166, 0.163, 0.345, 0.260, 0.278, 0.711, 0.500],\n", + " [0.257, 0.238, 0.337, 0.311, 0.316, 0.639, 0.750]], device='cuda:0',\n", + " dtype=torch.float64)\n", + "\n", + "observations:\n", + "tensor([[2.565],\n", + " [2.818],\n", + " [3.036],\n", + " [3.036]], device='cuda:0', dtype=torch.float64)\n", + "\n", + "\n" ] } + ], + "source": [ + "cumulative_cost = 0.0\n", + "N_ITER = 3 if not SMOKE_TEST else 1\n", + "\n", + "for i in range(N_ITER):\n", + " mll, model = initialize_model(train_x, train_obj)\n", + " fit_gpytorch_mll(mll)\n", + " mfkg_acqf = get_mfkg(model)\n", + " new_x, new_obj, cost = optimize_mfkg_and_get_observation(mfkg_acqf)\n", + " train_x = torch.cat([train_x, new_x])\n", + " train_obj = torch.cat([train_obj, new_obj])\n", + " cumulative_cost += cost" ] }, { @@ -299,7 +330,9 @@ }, { "cell_type": "code", + "execution_count": 8, "metadata": {}, + "outputs": [], "source": [ "def get_recommendation(model):\n", " rec_acqf = FixedFeatureAcquisitionFunction(\n", @@ -323,26 +356,32 @@ " objective_value = problem(final_rec)\n", " print(f\"recommended point:\\n{final_rec}\\n\\nobjective value:\\n{objective_value}\")\n", " return final_rec" - ], - "execution_count": 8, - "outputs": [] + ] }, { "cell_type": "code", - "metadata": {}, - "source": [ - "final_rec = get_recommendation(model)\n", - "print(f\"\\ntotal cost: {cumulative_cost}\\n\")" - ], "execution_count": 9, + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "recommended point:\ntensor([[0.213, 0.164, 0.302, 0.327, 0.283, 0.689, 1.000]], device='cuda:0',\n dtype=torch.float64)\n\nobjective value:\ntensor([3.021], device='cuda:0', dtype=torch.float64)\n\ntotal cost: 68.0\n\n" + "recommended point:\n", + "tensor([[0.213, 0.164, 0.302, 0.327, 0.283, 0.689, 1.000]], device='cuda:0',\n", + " dtype=torch.float64)\n", + "\n", + "objective value:\n", + "tensor([3.021], device='cuda:0', dtype=torch.float64)\n", + "\n", + "total cost: 68.0\n", + "\n" ] } + ], + "source": [ + "final_rec = get_recommendation(model)\n", + "print(f\"\\ntotal cost: {cumulative_cost}\\n\")" ] }, { @@ -355,7 +394,9 @@ }, { "cell_type": "code", + "execution_count": 10, "metadata": {}, + "outputs": [], "source": [ "from botorch.acquisition import qExpectedImprovement\n", "\n", @@ -392,85 +433,139 @@ " print(f\"candidates:\\n{new_x}\\n\")\n", " print(f\"observations:\\n{new_obj}\\n\\n\")\n", " return new_x, new_obj, cost" - ], - "execution_count": 10, - "outputs": [] + ] }, { "cell_type": "code", - "metadata": {}, - "source": [ - "cumulative_cost = 0.0\n", - "\n", - "train_x, train_obj = generate_initial_data(n=16)\n", - "\n", - "for _ in range(N_ITER):\n", - " mll, model = initialize_model(train_x, train_obj)\n", - " fit_gpytorch_mll(mll)\n", - " ei_acqf = get_ei(model, best_f=train_obj.max())\n", - " new_x, new_obj, cost = optimize_ei_and_get_observation(ei_acqf)\n", - " train_x = torch.cat([train_x, new_x])\n", - " train_obj = torch.cat([train_obj, new_obj])\n", - " cumulative_cost += cost" - ], "execution_count": 11, + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "candidates:\ntensor([[0.247, 0.687, 0.581, 0.760, 0.093, 0.132, 1.000],\n [0.319, 0.850, 0.639, 0.865, 0.000, 0.120, 1.000],\n [0.349, 0.666, 0.555, 0.986, 0.000, 0.126, 1.000],\n [0.297, 0.792, 0.450, 0.889, 0.034, 0.028, 1.000]], device='cuda:0',\n dtype=torch.float64)\n\nobservations:\ntensor([[0.973],\n [1.091],\n [0.340],\n [0.902]], device='cuda:0', dtype=torch.float64)\n\n\n" + "candidates:\n", + "tensor([[0.247, 0.687, 0.581, 0.760, 0.093, 0.132, 1.000],\n", + " [0.319, 0.850, 0.639, 0.865, 0.000, 0.120, 1.000],\n", + " [0.349, 0.666, 0.555, 0.986, 0.000, 0.126, 1.000],\n", + " [0.297, 0.792, 0.450, 0.889, 0.034, 0.028, 1.000]], device='cuda:0',\n", + " dtype=torch.float64)\n", + "\n", + "observations:\n", + "tensor([[0.973],\n", + " [1.091],\n", + " [0.340],\n", + " [0.902]], device='cuda:0', dtype=torch.float64)\n", + "\n", + "\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "candidates:\ntensor([[0.194, 0.858, 0.622, 0.799, 0.000, 0.095, 1.000],\n [0.341, 0.854, 0.590, 0.767, 0.000, 0.085, 1.000],\n [0.999, 0.439, 0.828, 0.975, 0.633, 0.176, 1.000],\n [0.296, 0.859, 0.677, 0.806, 0.119, 0.054, 1.000]], device='cuda:0',\n dtype=torch.float64)\n\nobservations:\ntensor([[ 0.862],\n [ 1.975],\n [ 0.000],\n [ 1.514]], device='cuda:0', dtype=torch.float64)\n\n\n" + "candidates:\n", + "tensor([[0.194, 0.858, 0.622, 0.799, 0.000, 0.095, 1.000],\n", + " [0.341, 0.854, 0.590, 0.767, 0.000, 0.085, 1.000],\n", + " [0.999, 0.439, 0.828, 0.975, 0.633, 0.176, 1.000],\n", + " [0.296, 0.859, 0.677, 0.806, 0.119, 0.054, 1.000]], device='cuda:0',\n", + " dtype=torch.float64)\n", + "\n", + "observations:\n", + "tensor([[ 0.862],\n", + " [ 1.975],\n", + " [ 0.000],\n", + " [ 1.514]], device='cuda:0', dtype=torch.float64)\n", + "\n", + "\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ - "/data/sandcastle/boxes/fbsource/fbcode/buck-out/opt/gen/bento/kernels/bento_kernel_ae#link-tree/gpytorch/utils/cholesky.py:40: NumericalWarning:\n\nA not p.d., added jitter of 1.0e-08 to the diagonal\n\n" + "/data/sandcastle/boxes/fbsource/fbcode/buck-out/opt/gen/bento/kernels/bento_kernel_ae#link-tree/gpytorch/utils/cholesky.py:40: NumericalWarning:\n", + "\n", + "A not p.d., added jitter of 1.0e-08 to the diagonal\n", + "\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "candidates:\ntensor([[0.360, 0.891, 0.588, 0.749, 0.019, 0.036, 1.000],\n [0.049, 0.894, 0.345, 0.210, 0.482, 0.463, 1.000],\n [0.398, 0.970, 0.504, 0.213, 0.814, 0.724, 1.000],\n [0.817, 0.879, 0.691, 0.842, 0.455, 0.937, 1.000]], device='cuda:0',\n dtype=torch.float64)\n\nobservations:\ntensor([[2.271],\n [0.216],\n [0.055],\n [0.036]], device='cuda:0', dtype=torch.float64)\n\n\n" + "candidates:\n", + "tensor([[0.360, 0.891, 0.588, 0.749, 0.019, 0.036, 1.000],\n", + " [0.049, 0.894, 0.345, 0.210, 0.482, 0.463, 1.000],\n", + " [0.398, 0.970, 0.504, 0.213, 0.814, 0.724, 1.000],\n", + " [0.817, 0.879, 0.691, 0.842, 0.455, 0.937, 1.000]], device='cuda:0',\n", + " dtype=torch.float64)\n", + "\n", + "observations:\n", + "tensor([[2.271],\n", + " [0.216],\n", + " [0.055],\n", + " [0.036]], device='cuda:0', dtype=torch.float64)\n", + "\n", + "\n" ] } + ], + "source": [ + "cumulative_cost = 0.0\n", + "\n", + "train_x, train_obj = generate_initial_data(n=16)\n", + "\n", + "for _ in range(N_ITER):\n", + " mll, model = initialize_model(train_x, train_obj)\n", + " fit_gpytorch_mll(mll)\n", + " ei_acqf = get_ei(model, best_f=train_obj.max())\n", + " new_x, new_obj, cost = optimize_ei_and_get_observation(ei_acqf)\n", + " train_x = torch.cat([train_x, new_x])\n", + " train_obj = torch.cat([train_obj, new_obj])\n", + " cumulative_cost += cost" ] }, { "cell_type": "code", - "metadata": {}, - "source": [ - "final_rec = get_recommendation(model)\n", - "print(f\"\\ntotal cost: {cumulative_cost}\\n\")" - ], "execution_count": 12, + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "recommended point:\ntensor([[0.352, 0.874, 0.589, 0.756, 0.008, 0.060, 1.000]], device='cuda:0',\n dtype=torch.float64)\n\nobjective value:\ntensor([2.166], device='cuda:0', dtype=torch.float64)\n\ntotal cost: 72.0\n\n" + "recommended point:\n", + "tensor([[0.352, 0.874, 0.589, 0.756, 0.008, 0.060, 1.000]], device='cuda:0',\n", + " dtype=torch.float64)\n", + "\n", + "objective value:\n", + "tensor([2.166], device='cuda:0', dtype=torch.float64)\n", + "\n", + "total cost: 72.0\n", + "\n" ] } + ], + "source": [ + "final_rec = get_recommendation(model)\n", + "print(f\"\\ntotal cost: {cumulative_cost}\\n\")" ] }, { "cell_type": "code", - "metadata": {}, - "source": [ - "" - ], "execution_count": 12, - "outputs": [] + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "name": "python3" } - ] + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/tutorials/multi_fidelity_bo.ipynb b/tutorials/multi_fidelity_bo.ipynb index aadaeba6d0..6ea3ddc480 100644 --- a/tutorials/multi_fidelity_bo.ipynb +++ b/tutorials/multi_fidelity_bo.ipynb @@ -8,7 +8,8 @@ "\n", "In this tutorial, we show how to perform continuous multi-fidelity Bayesian optimization (BO) in BoTorch using the multi-fidelity Knowledge Gradient (qMFKG) acquisition function [1, 2].\n", "\n", - "[1] [J. Wu, P.I. Frazier. Continuous-Fidelity Bayesian Optimization with Knowledge Gradient. NIPS Workshop on Bayesian Optimization, 2017.](https://bayesopt.github.io/papers/2017/20.pdf)\n\n", + "[1] [J. Wu, P.I. Frazier. Continuous-Fidelity Bayesian Optimization with Knowledge Gradient. NIPS Workshop on Bayesian Optimization, 2017.](https://bayesopt.github.io/papers/2017/20.pdf)\n", + "\n", "[2] [J. Wu, S. Toscano-Palmerin, P.I. Frazier, A.G. Wilson. Practical Multi-fidelity Bayesian Optimization for Hyperparameter Tuning. Conference on Uncertainty in Artificial Intelligence (UAI), 2019](https://arxiv.org/pdf/1903.04703.pdf)" ] }, @@ -90,7 +91,7 @@ " # define a surrogate model suited for a \"training data\"-like fidelity parameter\n", " # in dimension 6, as in [2]\n", " model = SingleTaskMultiFidelityGP(\n", - " train_x, train_obj, outcome_transform=Standardize(m=1), data_fidelity=6\n", + " train_x, train_obj, outcome_transform=Standardize(m=1), data_fidelities=[6]\n", " )\n", " mll = ExactMarginalLogLikelihood(model.likelihood, model)\n", " return mll, model"