From 00cc54755ae814238dcd0825d9d7ab01af8b2258 Mon Sep 17 00:00:00 2001 From: szcf-weiya Date: Sat, 17 Feb 2024 23:39:23 -0500 Subject: [PATCH] trace loss of beta --- src/boot.jl | 7 ++++++- src/boot.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/boot.jl b/src/boot.jl index fa4de86..f08f27c 100755 --- a/src/boot.jl +++ b/src/boot.jl @@ -518,6 +518,8 @@ function py_train_G_lambda(y::AbstractVector, B::AbstractMatrix, L::AbstractMatr gpu_id = 0, niter_per_epoch = 100, disable_tqdm = false, + λs_opt_train = nothing, λs_opt_val = nothing, + βs_opt_train = nothing, βs_opt_val = nothing, kw... ) Ghat, LOSS = _py_boot."train_G_lambda"(Float32.(y), Float32.(B), Float32.(L), eta = η, K = K, @@ -533,7 +535,10 @@ function py_train_G_lambda(y::AbstractVector, B::AbstractMatrix, L::AbstractMatr patience0 = patience0, patience=patience, disable_early_stopping = disable_early_stopping, niter_per_epoch = niter_per_epoch, nhidden = nhidden, depth = depth, - disable_tqdm = disable_tqdm)#::Tuple{PyObject, PyArray} + disable_tqdm = disable_tqdm, + lams_opt_train = λs_opt_train, lams_opt_val = λs_opt_val, + betas_opt_train = βs_opt_train, betas_opt_val = βs_opt_val + )#::Tuple{PyObject, PyArray} #println(typeof(py_ret)) #Tuple{PyCall.PyObject, Matrix{Float32}} # ....................... # Tuple{PyCall.PyObject, PyCall.PyArray{Float32, 2}} #LOSS = Matrix(py_ret[2]) # NB: not necessarily a matrix, but possibly a matrix diff --git a/src/boot.py b/src/boot.py index 5e1f283..c78fd06 100755 --- a/src/boot.py +++ b/src/boot.py @@ -43,13 +43,19 @@ def train_G_lambda(y, B, L, K = 10, K0 = 10, patience = 100, patience0 = 100, disable_early_stopping = True, # TODO: early stopping eval_sigma_adaptive = False, # if False, use `model0` to evaluate sigma model_file = "model_G.pt", - step2_use_tensor = True, amsgrad = True, # no need to modified + step2_use_tensor = True, amsgrad = True, # no need to modified, + lams_opt_train = None, lams_opt_val = None, # each lam corresponds to a beta (dim: N) + betas_opt_train = None, betas_opt_val = None, # evaluate the loss between the OPT solution and GpBS solution here (dim NxJ) disable_tqdm = False): # device = f"cuda:{gpu_id}" if torch.cuda.is_available() and gpu_id != -1 else "cpu" y = torch.from_numpy(y[None, :]).to(device, non_blocking=True) B = torch.from_numpy(B).to(device, non_blocking=True) L = torch.from_numpy(L).to(device, non_blocking=True) + if lams_opt_train is not None: + betas_opt_train = torch.from_numpy(betas_opt_train).to(device, non_blocking = True) + betas_opt_val = torch.from_numpy(betas_opt_val).to(device, non_blocking = True) + LOSS_betas = torch.zeros(nepoch0, 2).to(device) n, J = B.size() dim_lam = 8 model = Model(n+dim_lam, J, nhidden, depth, use_torchsort, sort_reg_strength).to(device) @@ -100,6 +106,23 @@ def aug(lam): ypred = torch.matmul(beta, B.t()) LOSS0[epoch, i+1] = loss_fn(ypred, y) + lam * torch.square(torch.matmul(beta, L)).mean() * J / n print(f"epoch = {epoch}, L(lam) = {LOSS0[epoch, 0]:.6f}, L(lam_lo) = {LOSS0[epoch, 1]:.6f}, L(lam_up) = {LOSS0[epoch, 2]:.6f}") + if lams_opt_train is not None: + loss_betas = [] + for i, lam in enumerate(lams_opt_train): + aug_lam = torch.tensor(aug(lam), dtype=torch.float32, device = device) + ylam = torch.cat((y, aug_lam.repeat((1, 1))), dim=1) + beta = model(ylam) + loss_betas.append(loss_fn(betas_opt_train[i, :], beta[0]).item()) # beta is 1xJ + LOSS_betas[epoch, 0] = np.mean(loss_betas) + + loss_betas = [] + for i, lam in enumerate(lams_opt_val): + aug_lam = torch.tensor(aug(lam), dtype=torch.float32, device = device) + ylam = torch.cat((y, aug_lam.repeat((1, 1))), dim=1) + beta = model(ylam) + loss_betas.append(loss_fn(betas_opt_val[i, :], beta[0]).item()) + LOSS_betas[epoch, 1] = np.mean(loss_betas) + # sch1.step() if not disable_early_stopping: early_stopping0(LOSS0[epoch, 1:].mean(), model) @@ -201,6 +224,10 @@ def aug(lam): ret_loss = loss_warmup else: ret_loss = np.r_[loss_warmup, loss_boot] + if lams_opt_train is not None: + beta_loss = LOSS_betas.cpu().detach().numpy() + #return G, train_loss, ret_loss, beta_loss + return G, beta_loss, ret_loss # keep the same number of return parameters, then in Julia, LOSS is the beta_loss return G, train_loss, ret_loss def load_model(n, dim_lam, J, nhidden, model_file, gpu_id = 3):