diff --git a/datasets/ijcnn1.py b/datasets/ijcnn1.py new file mode 100644 index 0000000..6c468ce --- /dev/null +++ b/datasets/ijcnn1.py @@ -0,0 +1,25 @@ +from benchopt import BaseDataset, safe_import_context + + +with safe_import_context() as import_ctx: + # Dependencies of download_libsvm are scikit-learn, download and tqdm + from libsvmdata import fetch_libsvm + + +class Dataset(BaseDataset): + name = "ijcnn1" + is_sparse = True + + install_cmd = "conda" + requirements = ["pip:libsvmdata"] + + def __init__(self): + self.X, self.y = None, None + + def get_data(self): + + if self.X is None: + self.X, self.y = fetch_libsvm("ijcnn1") + # column scaling + self.X = (self.X - self.X.mean(0)) / self.X.std(0) + return dict(X=self.X, y=self.y) diff --git a/datasets/madelon.py b/datasets/madelon.py index c304d94..dc37862 100644 --- a/datasets/madelon.py +++ b/datasets/madelon.py @@ -14,5 +14,4 @@ class Dataset(BaseDataset): def get_data(self): X, y = fetch_libsvm("madelon") X_test, y_test = fetch_libsvm("madelon_test") - data = dict(X=X, y=y, X_test=X_test, y_test=y_test) - return data + return dict(X=X, y=y, X_test=X_test, y_test=y_test) diff --git a/datasets/news20.py b/datasets/news20.py index 36f0af0..83bd5f3 100644 --- a/datasets/news20.py +++ b/datasets/news20.py @@ -21,6 +21,4 @@ def get_data(self): if self.X is None: self.X, self.y = fetch_libsvm("news20.binary") - data = dict(X=self.X, y=self.y) - - return data + return dict(X=self.X, y=self.y) diff --git a/datasets/rcv1.py b/datasets/rcv1.py index 24bdb1a..ec1eca9 100644 --- a/datasets/rcv1.py +++ b/datasets/rcv1.py @@ -24,6 +24,4 @@ def get_data(self): 'rcv1.binary_test', min_nnz=0 ) - data = dict(X=self.X, y=self.y, X_test=self.X_test, y_test=self.y_test) - - return data + return dict(X=self.X, y=self.y, X_test=self.X_test, y_test=self.y_test) diff --git a/objective.py b/objective.py index 203cb74..f4955f5 100644 --- a/objective.py +++ b/objective.py @@ -14,7 +14,7 @@ class Objective(BaseObjective): name = "L2 Logistic Regression" parameters = { - 'lmbd': [1., 0.01] + 'lmbd': [0.1, 0.001] } def __init__(self, lmbd=.1): @@ -23,8 +23,11 @@ def __init__(self, lmbd=.1): def set_data(self, X, y, X_test=None, y_test=None): self.X, self.y = X, y self.X_test, self.y_test = X_test, y_test - msg = "Logistic loss is implemented with y in [-1, 1]" - assert set(self.y) == {-1, 1}, msg + + msg = "Logistic loss is implemented with binary y" + assert len(np.unique(y)) == 2, msg + y[y == y.max()] = 1 + y[y == y.min()] = -1 def get_one_solution(self): return np.zeros((self.X.shape[1])) diff --git a/solvers/julia_stochopt.jl b/solvers/julia_stochopt.jl new file mode 100644 index 0000000..b9b4601 --- /dev/null +++ b/solvers/julia_stochopt.jl @@ -0,0 +1,129 @@ +using StochOpt +using Match +using PyCall, SparseArrays + +function scipyCSC_to_julia(A) + m, n = A.shape + colPtr = Int[i+1 for i in PyArray(A."indptr")] + rowVal = Int[i+1 for i in PyArray(A."indices")] + nzVal = Vector{Float64}(PyArray(A."data")) + B = SparseMatrixCSC{Float64,Int}(m, n, colPtr, rowVal, nzVal) + return B +end + + +function solve_logreg(X, y::Vector{Float64}, lambda::Float64, n_iter::Int64, + method_name::AbstractString = "SVRG", batch_size::Int64 = 100, + numinneriters::Int64 = 1) + + println("type X", typeof(X)); + + # Set option for StochOpt solvers + options = set_options( + max_iter=n_iter, # run n_iter epochs of the considered function + batchsize=batch_size, + embeddim=0, + printiters=false, + stepsize_multiplier=1.0, + exacterror=false, + ); + + # In StochOpt, the objective is a mean logistic loss compared to the sum in + # benchopt, so we need to divide lambda per the number of sample. We cannot + # use regularizor_parameter as this only work when lambda == -1. + n_samples = size(X, 2); + lambda /= n_samples; + + # Load logistic problem with the given matrices + prob = load_logistic_from_matrices( + X, y, "benchopt", options, lambda=lambda, scaling="none" + ); + + if method_name in ["SVRG_bubeck", "Free_SVRG", "Leap_SVRG", "L_SVRG_D", + "Free_SVRG_2n", "Free_SVRG_lmax", "Free_SVRG_mstar"] + + # This gives the theoretical step size for convergence of the methods, which is not + # available for batchsize != 1 + options.batchsize = 1; + options.stepsize_multiplier = -1.0; + # sampling strategy for the stochastic estimates + sampling = StochOpt.build_sampling("nice", prob.numdata, options); + end + + n = prob.numdata + d = prob.numfeatures + mu = prob.mu + Lmax = prob.Lmax + L = prob.L + + m_star = round(Int64, (3*Lmax)/mu) # theoretical optimal inner loop size for Free-SVRG with 1-nice sampling + m_star_inv = round(Int64, mu/(3*Lmax)) + ## List of mini-batch sizes + numinneriters_list = [n, 2*n, round(Int64, Lmax/mu), m_star] + + # numinneriters is the mini-batch size in the innerloop, check references + @match method_name begin + "SVRG_bubeck" => (method = StochOpt.initiate_SVRG_bubeck( + prob, options, sampling, numinneriters=-1 + )) + "Free_SVRG" => (method = StochOpt.initiate_Free_SVRG( + prob, options, sampling, numinneriters=n, averaged_reference_point=true + )) + "Free_SVRG_2n" => (method = StochOpt.initiate_Free_SVRG( + prob, options, sampling, numinneriters=2*n, averaged_reference_point=true + )) + "Free_SVRG_lmax" => (method = StochOpt.initiate_Free_SVRG( + prob, options, sampling, numinneriters=round(Int64, Lmax/mu), averaged_reference_point=true + )) + "Free_SVRG_mstar" => (method = StochOpt.initiate_Free_SVRG( + prob, options, sampling, numinneriters=m_star, averaged_reference_point=true + )) + "Leap_SVRG" => (method = StochOpt.initiate_Leap_SVRG( + prob, options, sampling, 1/prob.numdata + )) + "L_SVRG_D" => (method = StochOpt.initiate_L_SVRG_D( + prob, options, sampling, m_star_inv + )) + "SAGA_nice" => (method = StochOpt.initiate_SAGA_nice(prob, options)) + _ => (method = method_name) + end # + + prob.fsol = 0.0 # disable loading of the solution + return minimize_func(prob, method, options); + +end + + +# Different naming with minimizeFunc which already exists inside StochOpt +function minimize_func(prob::Prob, method_input, options::MyOptions;) + if options.initial_point == "randn" # set initial point + x = randn(prob.numfeatures); + elseif(options.initial_point == "rand") + x = rand(prob.numfeatures); # + elseif(options.initial_point == "ones") + x = ones(prob.numfeatures); # + else + x = zeros(prob.numfeatures); # + end + + if typeof(method_input) == String + method = StochOpt.boot_method(method_input, prob, options); + if method=="METHOD DOES NOT EXIST" + println("FAIL: unknown method name:") + return + end + else + method = method_input; + method.bootmethod(prob, method, options); + end + + d = zeros(prob.numfeatures); # Search direction vector + + for iter = 1:options.max_iter + ## Taking a step + method.stepmethod(x, prob, options, method, iter, d) # mutating function + x[:] = x + method.stepsize * d + end # End of For loop + + return x +end diff --git a/solvers/julia_stochopt.py b/solvers/julia_stochopt.py new file mode 100644 index 0000000..827bef7 --- /dev/null +++ b/solvers/julia_stochopt.py @@ -0,0 +1,96 @@ +from pathlib import Path +from benchopt import safe_import_context +from benchopt.stopping_criterion import SufficientProgressCriterion + +from benchopt.helpers.julia import JuliaSolver +from benchopt.helpers.julia import get_jl_interpreter +from benchopt.helpers.julia import assert_julia_installed + +with safe_import_context() as import_ctx: + assert_julia_installed() + from scipy import sparse + + +# File containing the function to be called from julia +JULIA_SOLVER_FILE = Path(__file__).with_suffix('.jl') + + +class Solver(JuliaSolver): + + name = 'julia-stochopt' + references = [ + '[1] Gazagnadou, Gower, and Salmon,' + ' "Optimal mini-batch and step sizes for SAGA." ' + 'International conference on machine learning. PMLR, 2019.' + '[2] Sebbouh et al. Towards closing the gap between the theory and' + 'practice of SVRG. Advances in Neural Information Processing Systems, 2019' + 'https://github.com/gowerrobert/StochOpt.jl' + ] + + # List of dependencies can be found on the package github + julia_requirements = [ + 'StochOpt::https://github.com/tommoral/StochOpt.jl#FIX_linear_algebra_deps', # noqa: E501 + 'PyCall', + ] + + parameters = { + 'method': [ + "SVRG", + "Free_SVRG", # SVRG with inner loop not averaged at any point [2] + "BFGS", + "AMgauss", # A variant of SVRG that uses a embed Hessian matrix + "SAGA_nice", # SAGA sampling with closed-form optimal mini-batch [1] + "SVRG_bubeck", + "Leap_SVRG", # SVRG without outer loop [2] + "L_SVRG_D", # Loopless-SVRG-Decreasing [2] + "Free_SVRG_2n", + "Free_SVRG_lmax", + "Free_SVRG_mstar", + ], + 'batch_size': [ + 1, # to allow calculation of theoretical step size to some methods + 128 + ], + 'numinneriters': [ + 1, + -1 + ] + } + + stopping_criterion = SufficientProgressCriterion( + strategy='iteration', patience=10 + ) + + def set_objective(self, X, y, lmbd): + + self.X, self.y, self.lmbd = X, y, lmbd + + jl = get_jl_interpreter() + jl.include(str(JULIA_SOLVER_FILE)) + self.solve_logreg = jl.solve_logreg + + # transform scipy sparse array to julia sparse array + if isinstance(X, sparse.csc_array): + scipyCSC_to_julia = jl.pyfunctionret( + jl.Main.scipyCSC_to_julia, jl.Any, jl.PyObject + ) + self.X = scipyCSC_to_julia(X) + + # set optimal inner_loop_iter equals n_samples -- to check table 1 [2] + if self.method == 'Free_SVRG' and self.numinneriters != -1: + self.numinneriters = X.shape[0] + + # run iteration once to cache the JIT computation + self.solve_logreg( + self.X.T, self.y, self.lmbd, 2, + self.method, self.batch_size, self.numinneriters + ) + + def run(self, n_iter): + self.coef_ = self.solve_logreg( + self.X.T, self.y, self.lmbd, n_iter, + self.method, self.batch_size, self.numinneriters + ) + + def get_result(self): + return self.coef_.ravel()