Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Mini-batch solvers from StochOpt.jl #29

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
25 changes: 25 additions & 0 deletions datasets/ijcnn1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from benchopt import BaseDataset, safe_import_context


with safe_import_context() as import_ctx:
# Dependencies of download_libsvm are scikit-learn, download and tqdm
from libsvmdata import fetch_libsvm


class Dataset(BaseDataset):
name = "ijcnn1"
is_sparse = True

install_cmd = "conda"
requirements = ["pip:libsvmdata"]

def __init__(self):
self.X, self.y = None, None

def get_data(self):

if self.X is None:
self.X, self.y = fetch_libsvm("ijcnn1")
# column scaling
self.X = (self.X - self.X.mean(0)) / self.X.std(0)
return dict(X=self.X, y=self.y)
3 changes: 1 addition & 2 deletions datasets/madelon.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@ class Dataset(BaseDataset):
def get_data(self):
X, y = fetch_libsvm("madelon")
X_test, y_test = fetch_libsvm("madelon_test")
data = dict(X=X, y=y, X_test=X_test, y_test=y_test)
return data
return dict(X=X, y=y, X_test=X_test, y_test=y_test)
4 changes: 1 addition & 3 deletions datasets/news20.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,4 @@ def get_data(self):
if self.X is None:
self.X, self.y = fetch_libsvm("news20.binary")

data = dict(X=self.X, y=self.y)

return data
return dict(X=self.X, y=self.y)
4 changes: 1 addition & 3 deletions datasets/rcv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,4 @@ def get_data(self):
'rcv1.binary_test', min_nnz=0
)

data = dict(X=self.X, y=self.y, X_test=self.X_test, y_test=self.y_test)

return data
return dict(X=self.X, y=self.y, X_test=self.X_test, y_test=self.y_test)
9 changes: 6 additions & 3 deletions objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Objective(BaseObjective):
name = "L2 Logistic Regression"

parameters = {
'lmbd': [1., 0.01]
'lmbd': [0.1, 0.001]
}

def __init__(self, lmbd=.1):
Expand All @@ -23,8 +23,11 @@ def __init__(self, lmbd=.1):
def set_data(self, X, y, X_test=None, y_test=None):
self.X, self.y = X, y
self.X_test, self.y_test = X_test, y_test
msg = "Logistic loss is implemented with y in [-1, 1]"
assert set(self.y) == {-1, 1}, msg

msg = "Logistic loss is implemented with binary y"
assert len(np.unique(y)) == 2, msg
y[y == y.max()] = 1
y[y == y.min()] = -1

def get_one_solution(self):
return np.zeros((self.X.shape[1]))
Expand Down
129 changes: 129 additions & 0 deletions solvers/julia_stochopt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
using StochOpt
using Match
using PyCall, SparseArrays

function scipyCSC_to_julia(A)
m, n = A.shape
colPtr = Int[i+1 for i in PyArray(A."indptr")]
rowVal = Int[i+1 for i in PyArray(A."indices")]
nzVal = Vector{Float64}(PyArray(A."data"))
B = SparseMatrixCSC{Float64,Int}(m, n, colPtr, rowVal, nzVal)
return B
end


function solve_logreg(X, y::Vector{Float64}, lambda::Float64, n_iter::Int64,
method_name::AbstractString = "SVRG", batch_size::Int64 = 100,
numinneriters::Int64 = 1)

println("type X", typeof(X));

# Set option for StochOpt solvers
options = set_options(
max_iter=n_iter, # run n_iter epochs of the considered function
batchsize=batch_size,
embeddim=0,
printiters=false,
stepsize_multiplier=1.0,
exacterror=false,
);

# In StochOpt, the objective is a mean logistic loss compared to the sum in
# benchopt, so we need to divide lambda per the number of sample. We cannot
# use regularizor_parameter as this only work when lambda == -1.
n_samples = size(X, 2);
lambda /= n_samples;

# Load logistic problem with the given matrices
prob = load_logistic_from_matrices(
X, y, "benchopt", options, lambda=lambda, scaling="none"
);

if method_name in ["SVRG_bubeck", "Free_SVRG", "Leap_SVRG", "L_SVRG_D",
"Free_SVRG_2n", "Free_SVRG_lmax", "Free_SVRG_mstar"]

# This gives the theoretical step size for convergence of the methods, which is not
# available for batchsize != 1
options.batchsize = 1;
options.stepsize_multiplier = -1.0;
# sampling strategy for the stochastic estimates
sampling = StochOpt.build_sampling("nice", prob.numdata, options);
end

n = prob.numdata
d = prob.numfeatures
mu = prob.mu
Lmax = prob.Lmax
L = prob.L

m_star = round(Int64, (3*Lmax)/mu) # theoretical optimal inner loop size for Free-SVRG with 1-nice sampling
m_star_inv = round(Int64, mu/(3*Lmax))
## List of mini-batch sizes
numinneriters_list = [n, 2*n, round(Int64, Lmax/mu), m_star]

# numinneriters is the mini-batch size in the innerloop, check references
@match method_name begin
"SVRG_bubeck" => (method = StochOpt.initiate_SVRG_bubeck(
prob, options, sampling, numinneriters=-1
))
"Free_SVRG" => (method = StochOpt.initiate_Free_SVRG(
prob, options, sampling, numinneriters=n, averaged_reference_point=true
))
"Free_SVRG_2n" => (method = StochOpt.initiate_Free_SVRG(
prob, options, sampling, numinneriters=2*n, averaged_reference_point=true
))
"Free_SVRG_lmax" => (method = StochOpt.initiate_Free_SVRG(
prob, options, sampling, numinneriters=round(Int64, Lmax/mu), averaged_reference_point=true
))
"Free_SVRG_mstar" => (method = StochOpt.initiate_Free_SVRG(
prob, options, sampling, numinneriters=m_star, averaged_reference_point=true
))
"Leap_SVRG" => (method = StochOpt.initiate_Leap_SVRG(
prob, options, sampling, 1/prob.numdata
))
"L_SVRG_D" => (method = StochOpt.initiate_L_SVRG_D(
prob, options, sampling, m_star_inv
))
"SAGA_nice" => (method = StochOpt.initiate_SAGA_nice(prob, options))
_ => (method = method_name)
end #

prob.fsol = 0.0 # disable loading of the solution
return minimize_func(prob, method, options);

end


# Different naming with minimizeFunc which already exists inside StochOpt
function minimize_func(prob::Prob, method_input, options::MyOptions;)
if options.initial_point == "randn" # set initial point
x = randn(prob.numfeatures);
elseif(options.initial_point == "rand")
x = rand(prob.numfeatures); #
elseif(options.initial_point == "ones")
x = ones(prob.numfeatures); #
else
x = zeros(prob.numfeatures); #
end

if typeof(method_input) == String
method = StochOpt.boot_method(method_input, prob, options);
if method=="METHOD DOES NOT EXIST"
println("FAIL: unknown method name:")
return
end
else
method = method_input;
method.bootmethod(prob, method, options);
end

d = zeros(prob.numfeatures); # Search direction vector

for iter = 1:options.max_iter
## Taking a step
method.stepmethod(x, prob, options, method, iter, d) # mutating function
x[:] = x + method.stepsize * d
end # End of For loop

return x
end
96 changes: 96 additions & 0 deletions solvers/julia_stochopt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from pathlib import Path
from benchopt import safe_import_context
from benchopt.stopping_criterion import SufficientProgressCriterion

from benchopt.helpers.julia import JuliaSolver
from benchopt.helpers.julia import get_jl_interpreter
from benchopt.helpers.julia import assert_julia_installed

with safe_import_context() as import_ctx:
assert_julia_installed()
from scipy import sparse


# File containing the function to be called from julia
JULIA_SOLVER_FILE = Path(__file__).with_suffix('.jl')


class Solver(JuliaSolver):

name = 'julia-stochopt'
references = [
'[1] Gazagnadou, Gower, and Salmon,'
' "Optimal mini-batch and step sizes for SAGA." '
'International conference on machine learning. PMLR, 2019.'
'[2] Sebbouh et al. Towards closing the gap between the theory and'
'practice of SVRG. Advances in Neural Information Processing Systems, 2019'
'https://github.com/gowerrobert/StochOpt.jl'
]

# List of dependencies can be found on the package github
julia_requirements = [
'StochOpt::https://github.com/tommoral/StochOpt.jl#FIX_linear_algebra_deps', # noqa: E501
'PyCall',
]

parameters = {
'method': [
"SVRG",
"Free_SVRG", # SVRG with inner loop not averaged at any point [2]
"BFGS",
"AMgauss", # A variant of SVRG that uses a embed Hessian matrix
"SAGA_nice", # SAGA sampling with closed-form optimal mini-batch [1]
"SVRG_bubeck",
"Leap_SVRG", # SVRG without outer loop [2]
"L_SVRG_D", # Loopless-SVRG-Decreasing [2]
"Free_SVRG_2n",
"Free_SVRG_lmax",
"Free_SVRG_mstar",
],
'batch_size': [
1, # to allow calculation of theoretical step size to some methods
128
],
'numinneriters': [
1,
-1
]
}

stopping_criterion = SufficientProgressCriterion(
strategy='iteration', patience=10
)

def set_objective(self, X, y, lmbd):

self.X, self.y, self.lmbd = X, y, lmbd

jl = get_jl_interpreter()
jl.include(str(JULIA_SOLVER_FILE))
self.solve_logreg = jl.solve_logreg

# transform scipy sparse array to julia sparse array
if isinstance(X, sparse.csc_array):
scipyCSC_to_julia = jl.pyfunctionret(
jl.Main.scipyCSC_to_julia, jl.Any, jl.PyObject
)
self.X = scipyCSC_to_julia(X)

# set optimal inner_loop_iter equals n_samples -- to check table 1 [2]
if self.method == 'Free_SVRG' and self.numinneriters != -1:
self.numinneriters = X.shape[0]

# run iteration once to cache the JIT computation
self.solve_logreg(
self.X.T, self.y, self.lmbd, 2,
self.method, self.batch_size, self.numinneriters
)

def run(self, n_iter):
self.coef_ = self.solve_logreg(
self.X.T, self.y, self.lmbd, n_iter,
self.method, self.batch_size, self.numinneriters
)

def get_result(self):
return self.coef_.ravel()