diff --git a/Corrfunc/tests.py b/Corrfunc/tests.py
index 200f2124..98875fb3 100644
--- a/Corrfunc/tests.py
+++ b/Corrfunc/tests.py
@@ -65,38 +65,33 @@ def test_linear_binning_mocks(isa='fallback'):
     autocorr = 1
     numbins_to_print = 5
     cosmology = 1
+    bin_types = ['custom', 'lin']
 
-    def allclose(a, b):
-        return all(np.allclose(a[name], b[name]) for name in a.dtype.names)
+    def allclose(a, *others):
+        toret = True
+        for b in others:
+            toret &= all(np.allclose(a[name], b[name]) for name in ['npairs'])
+        return toret
+
+    def test_bin_type(pair_counter, *args, **kwargs):
+        res = (pair_counter(*args, bin_type=bin_type, weight_type=weight_type, **kwargs) for bin_type in bin_types for weight_type in [None, "pair_product"])
+        assert allclose(*res)
 
     print("\nRunning 2-D correlation function xi(rp,pi)")
-    results_DDrppi_ref = DDrppi_mocks(autocorr, cosmology, nthreads,
-                                      pimax, binfile,
-                                      ra, dec, cz,
-                                      weights1=w, weight_type='pair_product',
-                                      output_rpavg=True, verbose=True, isa=isa, bin_type='custom')
-    results_DDrppi = DDrppi_mocks(autocorr, cosmology, nthreads,
-                                  pimax, binfile,
-                                  ra, dec, cz,
-                                  weights1=w, weight_type='pair_product',
-                                  output_rpavg=True, verbose=False, isa=isa, bin_type='lin')
-    assert allclose(results_DDrppi, results_DDrppi_ref)
+    test_bin_type(DDrppi_mocks, autocorr, cosmology, nthreads,
+                  pimax, binfile,
+                  ra, dec, cz, weights1=w,
+                  output_rpavg=True, verbose=True,
+                  isa=isa)
 
     nmu_bins = 10
     mu_max = 1.0
-
     print("\nRunning 2-D correlation function xi(s,mu)")
-    results_DDsmu_ref = DDsmu_mocks(autocorr, cosmology, nthreads,
-                                    mu_max, nmu_bins, binfile,
-                                    ra, dec, cz, weights1=w,
-                                    output_savg=True, verbose=True,
-                                    weight_type='pair_product', isa=isa, bin_type='custom')
-    results_DDsmu = DDsmu_mocks(autocorr, cosmology, nthreads,
-                                mu_max, nmu_bins, binfile,
-                                ra, dec, cz, weights1=w,
-                                output_savg=True, verbose=False,
-                                weight_type='pair_product', isa=isa, bin_type='lin')
-    assert allclose(results_DDsmu, results_DDsmu_ref)
+    test_bin_type(DDsmu_mocks, autocorr, cosmology, nthreads,
+                  mu_max, nmu_bins, binfile,
+                  ra, dec, cz, weights1=w,
+                  output_savg=True, verbose=True,
+                  isa=isa)
 
 
 def test_linear_binning_theory(isa='fallback'):
@@ -113,7 +108,7 @@ def test_linear_binning_theory(isa='fallback'):
 
     tstart = time.time()
     t0 = tstart
-    x, y, z = read_catalog()
+    x, y, z = np.array(read_catalog())[:,:1000]
     w = np.ones((1, len(x)), dtype=x.dtype)
     boxsize = 420.0
     t1 = time.time()
@@ -133,80 +128,54 @@ def test_linear_binning_theory(isa='fallback'):
             file.write("{0} {1}\n".format(low, hi))
     autocorr = 1
     periodic = 1
+    bin_types = ['custom', 'lin']
+
+    def allclose(a, *others):
+        toret = True
+        for b in others:
+            toret &= all(np.allclose(a[name], b[name]) for name in ['npairs'])
+        return toret
 
-    def allclose(a, b):
-        return all(np.allclose(a[name], b[name]) for name in a.dtype.names)
+    def test_bin_type(pair_counter, *args, **kwargs):
+        res = (pair_counter(*args, bin_type=bin_type, weight_type=weight_type, **kwargs) for bin_type in bin_types for weight_type in [None, "pair_product"])
+        assert allclose(*res)
 
     print("Running 3-D correlation function DD(r)")
-    results_DD_ref = DD(autocorr, nthreads, binfile, x, y, z,
-                        weights1=w, weight_type='pair_product',
-                        verbose=True, periodic=periodic, boxsize=boxsize,
-                        isa=isa, bin_type='custom')
-    results_DD = DD(autocorr, nthreads, binfile, x, y, z,
-                    weights1=w, weight_type='pair_product',
-                    verbose=False, periodic=periodic, boxsize=boxsize,
-                    isa=isa, bin_type='auto')
-    allclose(results_DD, results_DD_ref)
+
+    test_bin_type(DD, autocorr, nthreads, binfile, x, y, z, weights1=w,
+                  verbose=True, periodic=periodic, boxsize=boxsize, isa=isa)
+
 
     print("\nRunning 2-D correlation function DD(rp,pi)")
-    results_DDrppi_ref = DDrppi(autocorr, nthreads, pimax,
-                                binfile, x, y, z,
-                                weights1=w, weight_type='pair_product',
-                                verbose=False, periodic=periodic,
-                                boxsize=boxsize, isa=isa, bin_type='custom')
-    results_DDrppi = DDrppi(autocorr, nthreads, pimax,
-                            binfile, x, y, z,
-                            weights1=w, weight_type='pair_product',
-                            verbose=False, periodic=periodic,
-                            boxsize=boxsize, isa=isa, bin_type='lin')
-    allclose(results_DDrppi, results_DDrppi_ref)
+    test_bin_type(DDrppi, autocorr, nthreads, pimax,
+                  binfile, x, y, z, weights1=w,
+                  verbose=True, periodic=periodic,
+                  boxsize=boxsize, isa=isa)
 
     mu_max = 0.5
     nmu_bins = 10
 
     print("\nRunning 2-D correlation function DD(s,mu)")
-    results_DDsmu_ref = DDsmu(autocorr, nthreads, binfile,
-                              mu_max, nmu_bins,
-                              x, y, z,
-                              weights1=w, weight_type='pair_product',
-                              verbose=True, periodic=periodic,
-                              boxsize=boxsize, output_savg=True,
-                              isa=isa, bin_type='custom')
-    results_DDsmu = DDsmu(autocorr, nthreads, binfile,
-                          mu_max, nmu_bins,
-                          x, y, z,
-                          weights1=w, weight_type='pair_product',
-                          verbose=False, periodic=periodic,
-                          boxsize=boxsize, output_savg=True,
-                          isa=isa, bin_type='lin')
-    allclose(results_DDsmu, results_DDsmu_ref)
+    test_bin_type(DDsmu, autocorr, nthreads, binfile,
+                  mu_max, nmu_bins,
+                  x, y, z, weights1=w,
+                  verbose=True, periodic=periodic,
+                  boxsize=boxsize, output_savg=True, isa=isa)
 
     print("\nRunning 2-D projected correlation function wp(rp)")
-    results_wp_ref = wp(boxsize, pimax, nthreads,
-                        binfile, x, y, z,
-                        weights=w, weight_type='pair_product',
-                        verbose=True, isa=isa, bin_type='custom')
-    results_wp = wp(boxsize, pimax, nthreads,
-                    binfile, x, y, z,
-                    weights=w, weight_type='pair_product',
-                    verbose=False, isa=isa, bin_type='custom')
-    allclose(results_wp, results_wp_ref)
+    test_bin_type(wp, boxsize, pimax, nthreads,
+                  binfile, x, y, z, weights=w,
+                  verbose=True, isa=isa)
 
     print("\nRunning 3-D auto-correlation function xi(r)")
-    results_xi_ref = xi(boxsize, nthreads, binfile,
-                        x, y, z,
-                        weights=w, weight_type='pair_product',
-                        verbose=True, isa=isa, bin_type='custom')
-    results_xi = xi(boxsize, nthreads, binfile,
-                    x, y, z,
-                    weights=w, weight_type='pair_product',
-                    verbose=False, isa=isa, bin_type='lin')
-    allclose(results_xi, results_xi_ref)
+    test_bin_type(xi, boxsize, nthreads, binfile,
+                  x, y, z, weights=w,
+                  verbose=True, isa=isa)
 
 
 if __name__ == '__main__':
 
-    #tests()
+    tests()
     for isa in ['fallback','sse42','avx','avx512f']:
         test_linear_binning_mocks(isa=isa)
         test_linear_binning_theory(isa=isa)
diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py
index 6d9e4971..bd5e6051 100644
--- a/Corrfunc/utils.py
+++ b/Corrfunc/utils.py
@@ -548,11 +548,11 @@ def translate_bin_type_string_to_enum(bin_type):
     except NameError:
         if not isinstance(bin_type, str):
             raise TypeError(msg)
-    valid_bin_type = ['AUTO', 'CUSTOM', 'LIN']
+    valid_bin_types = ['AUTO', 'CUSTOM', 'LIN']
     bin_type_upper = bin_type.upper()
-    if bin_type_upper not in valid_bin_type:
+    if bin_type_upper not in valid_bin_types:
         msg = "Desired bin type = {0} is not in the list of valid "\
-              "bin types = {1}".format(bin_type, valid_bin_type)
+              "bin types = {1}".format(bin_type, valid_bin_types)
         raise ValueError(msg)
 
     enums = {'AUTO': 0,
diff --git a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src
index 9f898f21..04760e83 100644
--- a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src
+++ b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src
@@ -297,12 +297,13 @@ int countpairs_mocks_DOUBLE(const int64_t ND1, DOUBLE *ra1, DOUBLE *dec1, DOUBLE
     double *rupp;
     int nrpbin ;
     double rpmin,rpmax;
-    setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp,&(options->bin_type));
+    setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp);
     if( ! (rpmin > 0.0 && rpmax > 0.0 && rpmin < rpmax && nrpbin > 0)) {
         fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n",
                 rpmin, rpmax, nrpbin);
         return EXIT_FAILURE;
     }
+    detect_bin_type(rupp, nrpbin, &(options->bin_type));
     if (options->verbose) {
         if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n");
         else fprintf(stderr,"Custom R binning\n");
diff --git a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src
index ba3d0638..3e195bfc 100644
--- a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src
+++ b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src
@@ -53,12 +53,11 @@ static inline int countpairs_rp_pi_mocks_avx512_intrinsics_DOUBLE(const int64_t
     const int32_t need_rpavg = src_rpavg != NULL;
     const int32_t need_weightavg = src_weightavg != NULL;
     const DOUBLE sqr_rpmin=rpmin*rpmin, sqr_rpmax=rpmax*rpmax;
-    DOUBLE inv_rpstep=0., rpmin_invstep=0.;
     AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT();
     AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT();
     if (bin_type == BIN_LIN) {
-        inv_rpstep = (nbin - 1)/(rpmax - rpmin);
-        rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
+        const DOUBLE inv_rpstep = (nbin - 1)/(rpmax - rpmin);
+        const DOUBLE rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
         m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep);
         m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep);
     }
diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src
index 2045de9e..fb7528f1 100644
--- a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src
+++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src
@@ -297,12 +297,13 @@ int countpairs_mocks_s_mu_DOUBLE(const int64_t ND1, DOUBLE *ra1, DOUBLE *dec1, D
     double *supp;
     int nsbin;
     double smin,smax;
-    setup_bins(sbinfile,&smin,&smax,&nsbin,&supp,&(options->bin_type));
+    setup_bins(sbinfile,&smin,&smax,&nsbin,&supp);
     if( ! (smin > 0.0 && smax > 0.0 && smin < smax && nsbin > 0)) {
         fprintf(stderr,"Error: Could not setup with S bins correctly. (smin = %lf, smax = %lf, with nbins = %d). Expected non-zero smin/smax with smax > smin and nbins >=1 \n",
                 smin, smax, nsbin);
         return EXIT_FAILURE;
     }
+    detect_bin_type(supp, nsbin, &(options->bin_type));
     if (options->verbose) {
         if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear S binning\n");
         else fprintf(stderr,"Custom S binning\n");
diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src
index e2d2f377..a82574a2 100644
--- a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src
+++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src
@@ -48,12 +48,11 @@ static inline int countpairs_s_mu_mocks_avx512_intrinsics_DOUBLE(const int64_t N
     const int32_t need_savg = src_savg != NULL;
     const int32_t need_weightavg = src_weightavg != NULL;
     const DOUBLE sqr_smin=smin*smin, sqr_smax=smax*smax;
-    DOUBLE inv_sstep=0., smin_invstep=0.;
     AVX512_FLOATS m_inv_sstep = AVX512_SETZERO_FLOAT();
     AVX512_FLOATS m_smin_invstep = AVX512_SETZERO_FLOAT();
     if (bin_type == BIN_LIN) {
-        inv_sstep = (nsbin - 1)/(smax - smin);
-        smin_invstep = 1 - smin*inv_sstep; //trick to avoid adding one to (r - rmin)/rstep
+        const DOUBLE inv_sstep = (nsbin - 1)/(smax - smin);
+        const DOUBLE smin_invstep = 1 - smin*inv_sstep; //trick to avoid adding one to (r - rmin)/rstep
         m_inv_sstep = AVX512_SET_FLOAT(inv_sstep);
         m_smin_invstep = AVX512_SET_FLOAT(smin_invstep);
     }
diff --git a/theory/DD/countpairs_impl.c.src b/theory/DD/countpairs_impl.c.src
index 42ed5d30..c66f6414 100644
--- a/theory/DD/countpairs_impl.c.src
+++ b/theory/DD/countpairs_impl.c.src
@@ -198,13 +198,14 @@ int countpairs_DOUBLE(const int64_t ND1, DOUBLE *X1, DOUBLE *Y1, DOUBLE *Z1,
     double *rupp=NULL;
     int nrpbin ;
     double rpmin,rpmax;
-    setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp,&(options->bin_type));
+    setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp);
     if( ! (rpmin >=0.0 && rpmax > 0.0 && rpmin < rpmax && nrpbin > 0)) {
         fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). "
                 "Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n",
                 rpmin, rpmax, nrpbin);
         return EXIT_FAILURE;
     }
+    detect_bin_type(rupp, nrpbin, &(options->bin_type));
     if (options->verbose) {
         if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n");
         else fprintf(stderr,"Custom R binning\n");
diff --git a/theory/DD/countpairs_kernels.c.src b/theory/DD/countpairs_kernels.c.src
index d6b7b3a0..884ed55a 100644
--- a/theory/DD/countpairs_kernels.c.src
+++ b/theory/DD/countpairs_kernels.c.src
@@ -51,12 +51,11 @@ static inline int countpairs_avx512_intrinsics_DOUBLE(const int64_t N0, DOUBLE *
     const int32_t need_rpavg = src_rpavg != NULL;
     const int32_t need_weightavg = src_weightavg != NULL;
     const DOUBLE sqr_rpmin=rpmin*rpmin, sqr_rpmax=rpmax*rpmax;
-    DOUBLE inv_rpstep=0., rpmin_invstep=0.;
     AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT();
     AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT();
     if (bin_type == BIN_LIN) {
-        inv_rpstep = (nbin - 1)/(rpmax - rpmin);
-        rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
+        const DOUBLE inv_rpstep = (nbin - 1)/(rpmax - rpmin);
+        const DOUBLE rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
         m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep);
         m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep);
     }
@@ -249,7 +248,7 @@ static inline int countpairs_avx512_intrinsics_DOUBLE(const int64_t N0, DOUBLE *
                     }
                 }//backwards loop over the bins
             }
-            if(need_rpavg || need_weightavg) {
+            if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) {
                 //Do I need this step of going via the union? accessing int[] -> AVX* vector might
                 //cause alignment problems but accessing the ints from an AVX*
                 //register should always be fine
@@ -260,6 +259,9 @@ static inline int countpairs_avx512_intrinsics_DOUBLE(const int64_t N0, DOUBLE *
 #endif
                 for(int jj=0;jj<AVX512_NVEC;jj++) {
                     const int kbin = union_rpbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npairs[kbin]++;
+                    }
                     if(need_rpavg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         rpavg[kbin] += r;
@@ -525,7 +527,8 @@ static inline int countpairs_avx_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0,
                 if (!need_rpavg) {
                     union_mDperp.m_Dperp = AVX_SQRT_FLOAT(r2);
                 }
-                m_rpbin = AVX_FMA_ADD_TRUNCATE_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                m_rpbin = AVX_FMA_ADD_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                union_rpbin.m_ibin = AVX_TRUNCATE_FLOAT_TO_INT(m_rpbin);
             }
             else {
                 //Loop backwards through nbins. m_mask_left contains all the points that are less than rpmax
@@ -544,7 +547,7 @@ static inline int countpairs_avx_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0,
                     }
                 }
             }
-            if(need_rpavg || need_weightavg) {
+            if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) {
                 union_rpbin.m_ibin = AVX_TRUNCATE_FLOAT_TO_INT(m_rpbin);
                 //protect the unroll pragma in case compiler is not icc.
 #if  __INTEL_COMPILER
@@ -552,6 +555,9 @@ static inline int countpairs_avx_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0,
 #endif
                 for(int jj=0;jj<AVX_NVEC;jj++) {
                     const int kbin = union_rpbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npairs[kbin]++;
+                    }
                     if(need_rpavg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         rpavg[kbin] += r;
@@ -839,7 +845,8 @@ static inline int countpairs_sse_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0,
               if (!need_rpavg) {
                   union_mDperp.m_Dperp = SSE_SQRT_FLOAT(r2);
               }
-              m_rpbin = SSE_FMA_ADD_TRUNCATE_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+              m_rpbin = SSE_FMA_ADD_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+              union_rpbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rpbin);
           }
           else {
               for(int kbin=nbin-1;kbin>=1;kbin--) {
@@ -850,6 +857,7 @@ static inline int countpairs_sse_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0,
                   npairs[kbin] += SSE_BIT_COUNT_INT(test2);
                   if(need_rpavg || need_weightavg){
                       m_rpbin = SSE_BLEND_FLOATS_WITH_MASK(m_rpbin,m_kbin[kbin], m_bin_mask);
+                      union_rpbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rpbin);
                   }
                   int test3 = SSE_TEST_COMPARISON(m_mask_left);
                   if(test3 == 0) {
@@ -857,14 +865,16 @@ static inline int countpairs_sse_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0,
                   }
               }
           }
-          if(need_rpavg || need_weightavg) {
-              union_rpbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rpbin);
+          if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) {
               //protect the unroll pragma in case compiler is not icc.
 #if  __INTEL_COMPILER
 #pragma unroll(SSE_NVEC)
 #endif
               for(int jj=0;jj<SSE_NVEC;jj++) {
                   const int kbin = union_rpbin.ibin[jj];
+                  if(bin_type == BIN_LIN){
+                      npairs[kbin]++;
+                  }
                   if(need_rpavg){
                       const DOUBLE r = union_mDperp.Dperp[jj];
                       rpavg[kbin] += r;
diff --git a/theory/DDrppi/countpairs_rp_pi_impl.c.src b/theory/DDrppi/countpairs_rp_pi_impl.c.src
index 48367bf4..b0231bcc 100644
--- a/theory/DDrppi/countpairs_rp_pi_impl.c.src
+++ b/theory/DDrppi/countpairs_rp_pi_impl.c.src
@@ -197,12 +197,13 @@ int countpairs_rp_pi_DOUBLE(const int64_t ND1, DOUBLE *X1, DOUBLE *Y1, DOUBLE *Z
     double *rupp;
     int nrpbin ;
     double rpmin,rpmax;
-    setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp,&(options->bin_type));
+    setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp);
     if( ! (rpmin >= 0.0 && rpmax > 0.0 && rpmin < rpmax && nrpbin > 0)) {
         fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n",
                 rpmin, rpmax, nrpbin);
         return EXIT_FAILURE;
     }
+    detect_bin_type(rupp, nrpbin, &(options->bin_type));
     if (options->verbose) {
         if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n");
         else fprintf(stderr,"Custom R binning\n");
diff --git a/theory/DDrppi/countpairs_rp_pi_kernels.c.src b/theory/DDrppi/countpairs_rp_pi_kernels.c.src
index 85a7259e..d31e763b 100644
--- a/theory/DDrppi/countpairs_rp_pi_kernels.c.src
+++ b/theory/DDrppi/countpairs_rp_pi_kernels.c.src
@@ -44,12 +44,11 @@ static inline int countpairs_rp_pi_avx512_intrinsics_DOUBLE(const int64_t N0, DO
     const int32_t need_rpavg = src_rpavg != NULL;
     const int32_t need_weightavg = src_weightavg != NULL;
     const DOUBLE sqr_rpmin=rpmin*rpmin, sqr_rpmax=rpmax*rpmax;
-    DOUBLE inv_rpstep=0., rpmin_invstep=0.;
     AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT();
     AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT();
     if (bin_type == BIN_LIN) {
-        inv_rpstep = (nbin - 1)/(rpmax - rpmin);
-        rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
+        const DOUBLE inv_rpstep = (nbin - 1)/(rpmax - rpmin);
+        const DOUBLE rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
         m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep);
         m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep);
     }
diff --git a/theory/DDsmu/countpairs_s_mu_impl.c.src b/theory/DDsmu/countpairs_s_mu_impl.c.src
index 5a04c54b..61e3d488 100644
--- a/theory/DDsmu/countpairs_s_mu_impl.c.src
+++ b/theory/DDsmu/countpairs_s_mu_impl.c.src
@@ -204,12 +204,13 @@ int countpairs_s_mu_DOUBLE(const int64_t ND1, DOUBLE *X1, DOUBLE *Y1, DOUBLE *Z1
     double *supp;
     int nsbin;
     double smin,smax;
-    setup_bins(sbinfile,&smin,&smax,&nsbin,&supp,&(options->bin_type));
+    setup_bins(sbinfile,&smin,&smax,&nsbin,&supp);
     if( ! (smin >= 0.0 && smax > 0.0 && smin < smax && nsbin > 0)) {
         fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n",
                 smin, smax, nsbin);
         return EXIT_FAILURE;
     }
+    detect_bin_type(supp, nsbin, &(options->bin_type));
     if (options->verbose) {
         if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n");
         else fprintf(stderr,"Custom R binning\n");
diff --git a/theory/DDsmu/countpairs_s_mu_kernels.c.src b/theory/DDsmu/countpairs_s_mu_kernels.c.src
index 1786b9f1..8be4870e 100644
--- a/theory/DDsmu/countpairs_s_mu_kernels.c.src
+++ b/theory/DDsmu/countpairs_s_mu_kernels.c.src
@@ -45,12 +45,11 @@ static inline int countpairs_s_mu_avx512_intrinsics_DOUBLE(const int64_t N0, DOU
     const int32_t need_savg = src_savg != NULL;
     const int32_t need_weightavg = src_weightavg != NULL;
     const DOUBLE sqr_smin=smin*smin, sqr_smax=smax*smax;
-    DOUBLE inv_sstep=0., smin_invstep=0.;
     AVX512_FLOATS m_inv_sstep = AVX512_SETZERO_FLOAT();
     AVX512_FLOATS m_smin_invstep = AVX512_SETZERO_FLOAT();
     if (bin_type == BIN_LIN) {
-        inv_sstep = (nsbin - 1)/(smax - smin);
-        smin_invstep = 1 - smin*inv_sstep; //trick to avoid adding one to (r - rmin)/rstep
+        const DOUBLE inv_sstep = (nsbin - 1)/(smax - smin);
+        const DOUBLE smin_invstep = 1 - smin*inv_sstep; //trick to avoid adding one to (r - rmin)/rstep
         m_inv_sstep = AVX512_SET_FLOAT(inv_sstep);
         m_smin_invstep = AVX512_SET_FLOAT(smin_invstep);
     }
diff --git a/theory/wp/countpairs_wp_impl.c.src b/theory/wp/countpairs_wp_impl.c.src
index 5f2658a6..690dd9f1 100644
--- a/theory/wp/countpairs_wp_impl.c.src
+++ b/theory/wp/countpairs_wp_impl.c.src
@@ -218,12 +218,13 @@ int countpairs_wp_DOUBLE(const int64_t ND, DOUBLE * restrict X, DOUBLE * restric
     double *rupp;
     double rpmin,rpmax;
     int nrpbins;
-    setup_bins(binfile,&rpmin,&rpmax,&nrpbins,&rupp,&(options->bin_type));
+    setup_bins(binfile,&rpmin,&rpmax,&nrpbins,&rupp);
     if( ! (rpmin >=0 && rpmax > 0.0 && rpmin < rpmax && nrpbins > 0)) {
         fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n",
                 rpmin, rpmax, nrpbins);
         return EXIT_FAILURE;
     }
+    detect_bin_type(rupp, nrpbins, &(options->bin_type));
     if (options->verbose) {
         if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n");
         else fprintf(stderr,"Custom R binning\n");
diff --git a/theory/wp/wp_kernels.c.src b/theory/wp/wp_kernels.c.src
index 633a7162..685c61dc 100644
--- a/theory/wp/wp_kernels.c.src
+++ b/theory/wp/wp_kernels.c.src
@@ -49,12 +49,11 @@ static inline int wp_avx512_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0
     const int32_t need_rpavg = src_rpavg != NULL;
     const int32_t need_weightavg = src_weightavg != NULL;
     const DOUBLE sqr_rpmin=rpmin*rpmin, sqr_rpmax=rpmax*rpmax;
-    DOUBLE inv_rpstep=0., rpmin_invstep=0.;
     AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT();
     AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT();
     if (bin_type == BIN_LIN) {
-        inv_rpstep = (nbin - 1)/(rpmax - rpmin);
-        rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
+        const DOUBLE inv_rpstep = (nbin - 1)/(rpmax - rpmin);
+        const DOUBLE rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
         m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep);
         m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep);
     }
@@ -271,7 +270,7 @@ static inline int wp_avx512_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0
                     }
                 }//backwards loop over the bins
             }
-            if(need_rpavg || need_weightavg) {
+            if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) {
                 //Do I need this step of going via the union? accessing int[] -> AVX* vector might
                 //cause alignment problems but accessing the ints from an AVX*
                 //register should always be fine
@@ -282,6 +281,9 @@ static inline int wp_avx512_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0
 #endif
                 for(int jj=0;jj<AVX512_NVEC;jj++) {
                     const int kbin = union_rpbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npairs[kbin]++;
+                    }
                     if(need_rpavg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         rpavg[kbin] += r;
@@ -565,7 +567,8 @@ static inline int wp_avx2_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0,
                 if (!need_rpavg) {
                     union_mDperp.m_Dperp = AVX2_SQRT_FLOAT(r2);
                 }
-                m_rpbin = AVX2_FMA_ADD_TRUNCATE_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                m_rpbin = AVX2_FMA_ADD_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                union_rpbin.m_ibin = AVX2_TRUNCATE_FLOAT_TO_INT(m_rpbin);
             }
             else {
                 //Loop backwards through nbins. m_mask_left contains all the points that are less than rpmax
@@ -585,7 +588,7 @@ static inline int wp_avx2_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0,
                     }
                 }
             }
-            if(need_rpavg || need_weightavg) {
+            if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) {
                 union_rpbin.m_ibin = AVX2_TRUNCATE_FLOAT_TO_INT(m_rpbin);
                 //protect the unroll pragma in case compiler is not icc.
 #if  __INTEL_COMPILER
@@ -593,6 +596,9 @@ static inline int wp_avx2_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0,
 #endif
                 for(int jj=0;jj<AVX2_NVEC;jj++) {
                     const int kbin = union_rpbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npairs[kbin]++;
+                    }
                     if(need_rpavg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         rpavg[kbin] += r;
@@ -945,7 +951,8 @@ static inline int wp_avx_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
                 if (!need_rpavg) {
                     union_mDperp.m_Dperp = AVX_SQRT_FLOAT(r2);
                 }
-                m_rpbin = AVX_FMA_ADD_TRUNCATE_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                m_rpbin = AVX_FMA_ADD_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                union_rpbin.m_ibin = AVX_TRUNCATE_FLOAT_TO_INT(m_rpbin);
             }
             else {
                 //Loop backwards through nbins. m_mask_left contains all the points that are less than rpmax
@@ -964,7 +971,7 @@ static inline int wp_avx_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
                     }
                 }
             }
-            if(need_rpavg || need_weightavg) {
+            if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) {
                 union_rpbin.m_ibin = AVX_TRUNCATE_FLOAT_TO_INT(m_rpbin);
                 //protect the unroll pragma in case compiler is not icc.
 #if  __INTEL_COMPILER
@@ -972,6 +979,9 @@ static inline int wp_avx_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
 #endif
                 for(int jj=0;jj<AVX_NVEC;jj++) {
                     const int kbin = union_rpbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npairs[kbin]++;
+                    }
                     if(need_rpavg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         rpavg[kbin] += r;
@@ -1313,7 +1323,8 @@ static inline int wp_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
                 if (!need_rpavg) {
                     union_mDperp.m_Dperp = SSE_SQRT_FLOAT(r2);
                 }
-                m_rpbin = SSE_FMA_ADD_TRUNCATE_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                m_rpbin = SSE_FMA_ADD_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                union_rpbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rpbin);
             }
             else {
                 for(int kbin=nbin-1;kbin>=1;kbin--) {
@@ -1328,7 +1339,7 @@ static inline int wp_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
                     if(SSE_TEST_COMPARISON(m_mask_left) == 0) break;
                 }
             }
-            if(need_rpavg || need_weightavg) {
+            if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) {
                 union_rpbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rpbin);
                 //protect the unroll pragma in case compiler is not icc.
 #if  __INTEL_COMqPILER
@@ -1336,6 +1347,9 @@ static inline int wp_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
 #endif
                 for(int jj=0;jj<SSE_NVEC;jj++) {
                     const int kbin = union_rpbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npairs[kbin]++;
+                    }
                     if(need_rpavg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         rpavg[kbin] += r;
diff --git a/theory/xi/countpairs_xi_impl.c.src b/theory/xi/countpairs_xi_impl.c.src
index 51b73716..83b11663 100644
--- a/theory/xi/countpairs_xi_impl.c.src
+++ b/theory/xi/countpairs_xi_impl.c.src
@@ -203,12 +203,13 @@ int countpairs_xi_DOUBLE(const int64_t ND, DOUBLE * restrict X, DOUBLE * restric
     double *rupp;
     int nbins;
     double rmin,rmax;
-    setup_bins(binfile,&rmin,&rmax,&nbins,&rupp,&(options->bin_type));
+    setup_bins(binfile,&rmin,&rmax,&nbins,&rupp);
     if( ! (rmin >= 0.0 && rmax > 0.0 && rmin < rmax && nbins > 0)) {
         fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n",
                 rmin, rmax, nbins);
         return EXIT_FAILURE;
     }
+    detect_bin_type(rupp, nbins, &(options->bin_type));
     if (options->verbose) {
         if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n");
         else fprintf(stderr,"Custom R binning\n");
diff --git a/theory/xi/xi_kernels.c.src b/theory/xi/xi_kernels.c.src
index 3fb26565..28594a71 100644
--- a/theory/xi/xi_kernels.c.src
+++ b/theory/xi/xi_kernels.c.src
@@ -51,12 +51,11 @@ static inline int xi_avx512_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1
     const int32_t need_rpavg = src_rpavg != NULL;
     const int32_t need_weightavg = src_weightavg != NULL;
     const DOUBLE sqr_rmin=rmin*rmin, sqr_rmax=rmax*rmax;
-    DOUBLE inv_rpstep=0., rpmin_invstep=0.;
     AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT();
     AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT();
     if (bin_type == BIN_LIN) {
-        inv_rpstep = (nbin - 1)/(rmax - rmin);
-        rpmin_invstep = 1 - rmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
+        const DOUBLE inv_rpstep = (nbin - 1)/(rmax - rmin);
+        const DOUBLE rpmin_invstep = 1 - rmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep
         m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep);
         m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep);
     }
@@ -245,7 +244,7 @@ static inline int xi_avx512_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1
                     }
                 }//backwards loop over the bins
             }
-            if(need_rpavg || need_weightavg) {
+            if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) {
                 //Do I need this step of going via the union? accessing int[] -> AVX* vector might
                 //cause alignment problems but accessing the ints from an AVX*
                 //register should always be fine
@@ -256,6 +255,9 @@ static inline int xi_avx512_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1
 #endif
                 for(int jj=0;jj<AVX512_NVEC;jj++) {
                     const int kbin = union_rpbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npairs[kbin]++;
+                    }
                     if(need_rpavg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         rpavg[kbin] += r;
@@ -506,7 +508,8 @@ static inline int xi_avx_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1, c
                 if (!need_ravg) {
                     union_mDperp.m_Dperp = AVX_SQRT_FLOAT(r2);
                 }
-                m_rbin = AVX_FMA_ADD_TRUNCATE_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                m_rbin = AVX_FMA_ADD_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                union_rbin.m_ibin = AVX_TRUNCATE_FLOAT_TO_INT(m_rbin);
             }
             else {
                 //Loop backwards through nbins. m_mask_left contains all the points that are less than rmax
@@ -525,7 +528,7 @@ static inline int xi_avx_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1, c
                     }
                 }
             }
-            if(need_ravg || need_weightavg) {
+            if(need_ravg || need_weightavg || (bin_type == BIN_LIN)) {
                 union_rbin.m_ibin = AVX_TRUNCATE_FLOAT_TO_INT(m_rbin);
                 //protect the unroll pragma in case compiler is not icc.
 #if  __INTEL_COMPILER
@@ -533,6 +536,9 @@ static inline int xi_avx_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1, c
 #endif
                 for(int jj=0;jj<AVX_NVEC;jj++) {
                     const int kbin = union_rbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npair[kbin]++;
+                    }
                     if(need_ravg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         ravg[kbin] += r;
@@ -814,7 +820,8 @@ static inline int xi_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
                 if (!need_ravg) {
                     union_mDperp.m_Dperp = SSE_SQRT_FLOAT(r2);
                 }
-                m_rbin = SSE_FMA_ADD_TRUNCATE_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                m_rbin = SSE_FMA_ADD_FLOATS(union_mDperp.m_Dperp,m_inv_rpstep,m_rpmin_invstep);
+                union_rbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rbin);
             }
             else {
           			for(int kbin=nbin-1;kbin>=1;kbin--) {
@@ -832,7 +839,7 @@ static inline int xi_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
             				}
           			}
             }
-            if(need_ravg || need_weightavg) {
+            if(need_ravg || need_weightavg || (bin_type == BIN_LIN)) {
                 union_rbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rbin);
                 //protect the unroll pragma in case compiler is not icc.
 #if  __INTEL_COMPILER
@@ -840,6 +847,9 @@ static inline int xi_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c
 #endif
                 for(int jj=0;jj<SSE_NVEC;jj++) {
                     const int kbin = union_rbin.ibin[jj];
+                    if(bin_type == BIN_LIN){
+                        npairs[kbin]++;
+                    }
                     if(need_ravg){
                         const DOUBLE r = union_mDperp.Dperp[jj];
                         ravg[kbin] += r;
diff --git a/utils/avx2_calls.h b/utils/avx2_calls.h
index 387c5a66..96f97edb 100644
--- a/utils/avx2_calls.h
+++ b/utils/avx2_calls.h
@@ -63,7 +63,6 @@ extern "C" {
 #define AVX2_LOG10_FLOAT(X)                 _mm256_log10_ps(X)
 #define AVX2_LOG2_FLOAT(X)                _mm256_log2_ps(X)
 #define AVX2_RECIPROCAL_FLOATS(X)         _mm256_rcp_ps(X)
-#define AVX2_CONVERT_INT_TO_FLOAT(X)      _mm256_cvtepi32_ps(X)
 
 #define AVX2_BROADCAST_FLOAT(X)           _mm256_broadcast_ss(X)
 #define AVX2_SET_FLOAT(X)                 _mm256_set1_ps(X)
@@ -108,6 +107,7 @@ extern "C" {
 #define AVX2_STREAMING_STORE_INTS(X,Y)     _mm256_stream_si256(X,Y)
 
 /* returns Z + XY*/
+#define AVX2_FMA_ADD_FLOATS(X,Y,Z)          _mm256_fmadd_ps(X,Y,Z)
 #define AVX2_FMA_ADD_TRUNCATE_FLOATS(X,Y,Z) _mm256_round_ps(_mm256_fmadd_ps(X,Y,Z),_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC)
 
 #else //DOUBLE PRECISION CALCULATIONS
@@ -142,7 +142,6 @@ extern "C" {
 #define AVX2_LOG2_FLOAT(X)                _mm256_log2_pd(X)
 #define AVX2_LOG10_FLOAT(X)                _mm256_log10_pd(X)
 #define AVX2_RECIPROCAL_FLOATS(X)         _mm256_rcp_pd(X)
-#define AVX2_CONVERT_INT_TO_FLOAT(X)      _mm256_cvtepi32_pd(X)
 
     // X OP Y
 #define AVX2_COMPARE_FLOATS(X,Y,OP)        _mm256_cmp_pd(X,Y,OP)
@@ -185,6 +184,7 @@ extern "C" {
 #define AVX2_STREAMING_STORE_INTS(X,Y)     _mm_stream_si128(X,Y)
 
 /* returns Z + XY*/
+#define AVX2_FMA_ADD_FLOATS(X,Y,Z)          _mm256_fmadd_pd(X,Y,Z)
 #define AVX2_FMA_ADD_TRUNCATE_FLOATS(X,Y,Z) _mm256_round_pd(_mm256_fmadd_pd(X,Y,Z),_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC)
 
 #endif //DOUBLE_PREC
diff --git a/utils/avx512_calls.h b/utils/avx512_calls.h
index 69b01c2f..1ea3d198 100644
--- a/utils/avx512_calls.h
+++ b/utils/avx512_calls.h
@@ -148,7 +148,6 @@ extern "C" {
 #define AVX512_RECIPROCAL_FLOATS(X)                               _mm512_rcp14_ps(X)
 #define AVX512_MASK_RECIPROCAL_FLOATS(FALSEVALS, MASK, X)         _mm512_mask_rcp14_ps(FALSEVALS, MASK, X)
 #define AVX512_MASKZ_RECIPROCAL_FLOATS(MASK, X)                   _mm512_maskz_rcp14_ps(MASK, X)
-#define AVX512_CONVERT_INT_TO_FLOAT(X)                            _mm512_cvtepi32_ps(X)
 
 #define AVX512_SET_FLOAT(X)                                       _mm512_set1_ps(X)
 
@@ -285,7 +284,6 @@ extern "C" {
 #define AVX512_RECIPROCAL_FLOATS(X)                               _mm512_rcp14_pd(X)
 #define AVX512_MASK_RECIPROCAL_FLOATS(FALSEVALS, MASK, X)         _mm512_mask_rcp14_pd(FALSEVALS, MASK, X)
 #define AVX512_MASKZ_RECIPROCAL_FLOATS(MASK, X)                   _mm512_maskz_rcp14_pd(MASK, X)
-#define AVX512_CONVERT_INT_TO_FLOAT(X)                            _mm512_cvtepi32_pd(X)
 
     // X OP Y
 #define AVX512_COMPARE_FLOATS(X, Y, OP)                           _mm512_cmp_pd_mask(X, Y, OP)
diff --git a/utils/avx_calls.h b/utils/avx_calls.h
index f8c1f3da..830b93f0 100644
--- a/utils/avx_calls.h
+++ b/utils/avx_calls.h
@@ -62,8 +62,8 @@ extern "C" {
 #define AVX_SET_FLOAT(X)                 _mm256_set1_ps(X)
 
 /* returns Z + XY*/
+#define AVX_FMA_ADD_FLOATS(X,Y,Z)          _mm256_fmadd_ps(X,Y,Z)
 #define AVX_FMA_ADD_TRUNCATE_FLOATS(X,Y,Z) _mm256_round_ps(_mm256_fmadd_ps(X,Y,Z),_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC)
-#define AVX_CONVERT_INT_TO_FLOAT(X)        _mm256_cvtepi32_ps(X)
 
 // X OP Y
 #define AVX_COMPARE_FLOATS(X,Y,OP)        _mm256_cmp_ps(X,Y,OP)
@@ -127,8 +127,8 @@ extern "C" {
 #define AVX_RECIPROCAL_FLOATS(X)         _mm256_rcp_pd(X)
 
 /* returns Z + XY*/
+#define AVX_FMA_ADD_FLOATS(X,Y,Z)          _mm256_fmadd_pd(X,Y,Z)
 #define AVX_FMA_ADD_TRUNCATE_FLOATS(X,Y,Z) _mm256_round_pd(_mm256_fmadd_pd(X,Y,Z),_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC)
-#define AVX_CONVERT_INT_TO_FLOAT(X)        _mm256_cvtepi32_pd(X)
 
 // X OP Y
 #define AVX_COMPARE_FLOATS(X,Y,OP)        _mm256_cmp_pd(X,Y,OP)
@@ -152,13 +152,13 @@ extern "C" {
 #define AVX_ARC_COSINE(X, order)                  inv_cosine_avx(X, order)
 #endif
 
-    //Max
+//Max
 #define AVX_MAX_FLOATS(X,Y)               _mm256_max_pd(X,Y)
 
   //Absolute value
 #define AVX_ABS_FLOAT(X)                  _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(), X), X)
 
-    //Casting (does not actual convert between types)
+//Casting (does not actual convert between types)
 #define AVX_CAST_FLOAT_TO_INT(X)          _mm256_castpd_si256(X)
 #define AVX_CAST_INT_TO_FLOAT(X)          _mm256_castsi256_pd(X)
 
diff --git a/utils/sse_calls.h b/utils/sse_calls.h
index 41a8f3b0..88924bbd 100644
--- a/utils/sse_calls.h
+++ b/utils/sse_calls.h
@@ -61,11 +61,10 @@ extern "C" {
 #define SSE_ABS_FLOAT(X)                  _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), X), X)
 
 /* returns Z + XY*/
+#define SSE_FMA_ADD_FLOATS(X,Y,Z)          _mm_fmadd_ps(X,Y,Z)
 //#define SSE_FMA_ADD_TRUNCATE_FLOATS(X,Y,Z) _mm_fmadd_round_ss(X,Y,Z,_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC)
 #define SSE_FMA_ADD_TRUNCATE_FLOATS(X,Y,Z) _mm_round_ps(_mm_fmadd_ps(X,Y,Z),_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC)
 
-#define SSE_CONVERT_INT_TO_FLOAT(X)                            _mm_cvtepi32_ps(X)
-
 
 #ifdef  __INTEL_COMPILER
 #define SSE_ARC_COSINE(X, order)                 _mm_acos_ps(X)
@@ -122,11 +121,10 @@ extern "C" {
 #define SSE_ABS_FLOAT(X)                  _mm_max_pd(_mm_sub_pd(_mm_setzero_pd(), X), X)
 
 /* returns Z + XY*/
+#define SSE_FMA_ADD_FLOATS(X,Y,Z)           _mm_fmadd_pd(X,Y,Z)
 //#define SSE_FMA_ADD_TRUNCATE_FLOATS(X,Y,Z) _mm_fmadd_round_sd(X,Y,Z,_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC)
 #define SSE_FMA_ADD_TRUNCATE_FLOATS(X,Y,Z) _mm_round_pd(_mm_fmadd_pd(X,Y,Z),_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC)
 
-#define SSE_CONVERT_INT_TO_FLOAT(X)                            _mm_cvtepi32_pd(X)
-
 #endif
 
 #ifndef  __INTEL_COMPILER
diff --git a/utils/utils.c b/utils/utils.c
index a2ef7645..811582d5 100644
--- a/utils/utils.c
+++ b/utils/utils.c
@@ -59,27 +59,7 @@ void get_max_double(const int64_t ND1, const double *cz1, double *czmax)
     *czmax = max;
 }
 
-static int detect_bin_type_float(const float *rupp, int nbin, bin_type_t *bin_type)
-{
-    if (*bin_type == BIN_AUTO) {
-        // if linear spacing, return BIN_LIN, else BIN_CUSTOM
-        const float atol = 1e-8; // same tol as numpy.allclose
-        const float rtol = 1e-5;
-        float rmin = rupp[0];
-        float rstep = (rupp[nbin-1] - rupp[0])/(nbin - 1);
-        *bin_type = BIN_LIN;
-        for (int ii=1; ii<nbin; ii++) {
-            float pred = rmin + rstep*ii;
-            if ((fabsf(rupp[ii] - pred) > atol)||(fabsf((rupp[ii] - pred)/pred) > rtol)) {
-                *bin_type = BIN_CUSTOM;
-                break;
-            }
-        }
-    }
-    return EXIT_SUCCESS;
-}
-
-static int detect_bin_type_double(const double *rupp, int nbin, bin_type_t *bin_type)
+int detect_bin_type(const double *rupp, int nbin, bin_type_t *bin_type)
 {
     if (*bin_type == BIN_AUTO) {
         // if linear spacing, return BIN_LIN, else BIN_CUSTOM
@@ -99,7 +79,7 @@ static int detect_bin_type_double(const double *rupp, int nbin, bin_type_t *bin_
     return EXIT_SUCCESS;
 }
 
-int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp, bin_type_t *bin_type)
+int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp)
 {
     //set up the bins according to the binned data file
     //the form of the data file should be <rlow  rhigh ....>
@@ -138,7 +118,6 @@ int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double
     }
     *rmax = (*rupp)[index-1];
     fclose(fp);
-    detect_bin_type_double(*rupp, *nbin, bin_type);
 
     (*rupp)[*nbin] = *rmax;
     (*rupp)[*nbin-1] = *rmax;
@@ -146,7 +125,7 @@ int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double
     return EXIT_SUCCESS;
 }
 
-int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp, bin_type_t *bin_type)
+int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp)
 {
     //set up the bins according to the binned data file
     //the form of the data file should be <rlow  rhigh ....>
@@ -183,7 +162,6 @@ int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin,
     }
     *rmax = (*rupp)[index-1];
     fclose(fp);
-    detect_bin_type_double(*rupp, *nbin, bin_type);
 
     (*rupp)[*nbin] = *rmax;
     (*rupp)[*nbin-1] = *rmax;
@@ -191,7 +169,7 @@ int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin,
     return EXIT_SUCCESS;
 }
 
-int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, float **rupp, bin_type_t *bin_type)
+int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, float **rupp)
 {
     //set up the bins according to the binned data file
     //the form of the data file should be <rlow  rhigh ....>
@@ -229,7 +207,6 @@ int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, flo
     }
     *rmax = (*rupp)[index-1];
     fclose(fp);
-    detect_bin_type_float(*rupp, *nbin, bin_type);
 
     (*rupp)[*nbin] =* rmax;
     (*rupp)[*nbin-1] =* rmax;
diff --git a/utils/utils.h b/utils/utils.h
index b07ee6c9..2061e594 100644
--- a/utils/utils.h
+++ b/utils/utils.h
@@ -58,9 +58,10 @@ void volume_free(void ***v,int64_t nrow,int64_t ncol);
 
 extern int run_system_call(const char *execstring);
 
-extern int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp, bin_type_t *bin_type);
-extern int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp, bin_type_t *bin_type);
-extern int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, float **rupp, bin_type_t *bin_type);
+extern int detect_bin_type(const double *rupp, int nbin, bin_type_t *bin_type);
+extern int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp);
+extern int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp);
+extern int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, float **rupp);
 
 extern int test_all_files_present(const int nfiles, ...);