diff --git a/Corrfunc/tests.py b/Corrfunc/tests.py index 200f2124..98875fb3 100644 --- a/Corrfunc/tests.py +++ b/Corrfunc/tests.py @@ -65,38 +65,33 @@ def test_linear_binning_mocks(isa='fallback'): autocorr = 1 numbins_to_print = 5 cosmology = 1 + bin_types = ['custom', 'lin'] - def allclose(a, b): - return all(np.allclose(a[name], b[name]) for name in a.dtype.names) + def allclose(a, *others): + toret = True + for b in others: + toret &= all(np.allclose(a[name], b[name]) for name in ['npairs']) + return toret + + def test_bin_type(pair_counter, *args, **kwargs): + res = (pair_counter(*args, bin_type=bin_type, weight_type=weight_type, **kwargs) for bin_type in bin_types for weight_type in [None, "pair_product"]) + assert allclose(*res) print("\nRunning 2-D correlation function xi(rp,pi)") - results_DDrppi_ref = DDrppi_mocks(autocorr, cosmology, nthreads, - pimax, binfile, - ra, dec, cz, - weights1=w, weight_type='pair_product', - output_rpavg=True, verbose=True, isa=isa, bin_type='custom') - results_DDrppi = DDrppi_mocks(autocorr, cosmology, nthreads, - pimax, binfile, - ra, dec, cz, - weights1=w, weight_type='pair_product', - output_rpavg=True, verbose=False, isa=isa, bin_type='lin') - assert allclose(results_DDrppi, results_DDrppi_ref) + test_bin_type(DDrppi_mocks, autocorr, cosmology, nthreads, + pimax, binfile, + ra, dec, cz, weights1=w, + output_rpavg=True, verbose=True, + isa=isa) nmu_bins = 10 mu_max = 1.0 - print("\nRunning 2-D correlation function xi(s,mu)") - results_DDsmu_ref = DDsmu_mocks(autocorr, cosmology, nthreads, - mu_max, nmu_bins, binfile, - ra, dec, cz, weights1=w, - output_savg=True, verbose=True, - weight_type='pair_product', isa=isa, bin_type='custom') - results_DDsmu = DDsmu_mocks(autocorr, cosmology, nthreads, - mu_max, nmu_bins, binfile, - ra, dec, cz, weights1=w, - output_savg=True, verbose=False, - weight_type='pair_product', isa=isa, bin_type='lin') - assert allclose(results_DDsmu, results_DDsmu_ref) + test_bin_type(DDsmu_mocks, autocorr, cosmology, nthreads, + mu_max, nmu_bins, binfile, + ra, dec, cz, weights1=w, + output_savg=True, verbose=True, + isa=isa) def test_linear_binning_theory(isa='fallback'): @@ -113,7 +108,7 @@ def test_linear_binning_theory(isa='fallback'): tstart = time.time() t0 = tstart - x, y, z = read_catalog() + x, y, z = np.array(read_catalog())[:,:1000] w = np.ones((1, len(x)), dtype=x.dtype) boxsize = 420.0 t1 = time.time() @@ -133,80 +128,54 @@ def test_linear_binning_theory(isa='fallback'): file.write("{0} {1}\n".format(low, hi)) autocorr = 1 periodic = 1 + bin_types = ['custom', 'lin'] + + def allclose(a, *others): + toret = True + for b in others: + toret &= all(np.allclose(a[name], b[name]) for name in ['npairs']) + return toret - def allclose(a, b): - return all(np.allclose(a[name], b[name]) for name in a.dtype.names) + def test_bin_type(pair_counter, *args, **kwargs): + res = (pair_counter(*args, bin_type=bin_type, weight_type=weight_type, **kwargs) for bin_type in bin_types for weight_type in [None, "pair_product"]) + assert allclose(*res) print("Running 3-D correlation function DD(r)") - results_DD_ref = DD(autocorr, nthreads, binfile, x, y, z, - weights1=w, weight_type='pair_product', - verbose=True, periodic=periodic, boxsize=boxsize, - isa=isa, bin_type='custom') - results_DD = DD(autocorr, nthreads, binfile, x, y, z, - weights1=w, weight_type='pair_product', - verbose=False, periodic=periodic, boxsize=boxsize, - isa=isa, bin_type='auto') - allclose(results_DD, results_DD_ref) + + test_bin_type(DD, autocorr, nthreads, binfile, x, y, z, weights1=w, + verbose=True, periodic=periodic, boxsize=boxsize, isa=isa) + print("\nRunning 2-D correlation function DD(rp,pi)") - results_DDrppi_ref = DDrppi(autocorr, nthreads, pimax, - binfile, x, y, z, - weights1=w, weight_type='pair_product', - verbose=False, periodic=periodic, - boxsize=boxsize, isa=isa, bin_type='custom') - results_DDrppi = DDrppi(autocorr, nthreads, pimax, - binfile, x, y, z, - weights1=w, weight_type='pair_product', - verbose=False, periodic=periodic, - boxsize=boxsize, isa=isa, bin_type='lin') - allclose(results_DDrppi, results_DDrppi_ref) + test_bin_type(DDrppi, autocorr, nthreads, pimax, + binfile, x, y, z, weights1=w, + verbose=True, periodic=periodic, + boxsize=boxsize, isa=isa) mu_max = 0.5 nmu_bins = 10 print("\nRunning 2-D correlation function DD(s,mu)") - results_DDsmu_ref = DDsmu(autocorr, nthreads, binfile, - mu_max, nmu_bins, - x, y, z, - weights1=w, weight_type='pair_product', - verbose=True, periodic=periodic, - boxsize=boxsize, output_savg=True, - isa=isa, bin_type='custom') - results_DDsmu = DDsmu(autocorr, nthreads, binfile, - mu_max, nmu_bins, - x, y, z, - weights1=w, weight_type='pair_product', - verbose=False, periodic=periodic, - boxsize=boxsize, output_savg=True, - isa=isa, bin_type='lin') - allclose(results_DDsmu, results_DDsmu_ref) + test_bin_type(DDsmu, autocorr, nthreads, binfile, + mu_max, nmu_bins, + x, y, z, weights1=w, + verbose=True, periodic=periodic, + boxsize=boxsize, output_savg=True, isa=isa) print("\nRunning 2-D projected correlation function wp(rp)") - results_wp_ref = wp(boxsize, pimax, nthreads, - binfile, x, y, z, - weights=w, weight_type='pair_product', - verbose=True, isa=isa, bin_type='custom') - results_wp = wp(boxsize, pimax, nthreads, - binfile, x, y, z, - weights=w, weight_type='pair_product', - verbose=False, isa=isa, bin_type='custom') - allclose(results_wp, results_wp_ref) + test_bin_type(wp, boxsize, pimax, nthreads, + binfile, x, y, z, weights=w, + verbose=True, isa=isa) print("\nRunning 3-D auto-correlation function xi(r)") - results_xi_ref = xi(boxsize, nthreads, binfile, - x, y, z, - weights=w, weight_type='pair_product', - verbose=True, isa=isa, bin_type='custom') - results_xi = xi(boxsize, nthreads, binfile, - x, y, z, - weights=w, weight_type='pair_product', - verbose=False, isa=isa, bin_type='lin') - allclose(results_xi, results_xi_ref) + test_bin_type(xi, boxsize, nthreads, binfile, + x, y, z, weights=w, + verbose=True, isa=isa) if __name__ == '__main__': - #tests() + tests() for isa in ['fallback','sse42','avx','avx512f']: test_linear_binning_mocks(isa=isa) test_linear_binning_theory(isa=isa) diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py index 6d9e4971..bd5e6051 100644 --- a/Corrfunc/utils.py +++ b/Corrfunc/utils.py @@ -548,11 +548,11 @@ def translate_bin_type_string_to_enum(bin_type): except NameError: if not isinstance(bin_type, str): raise TypeError(msg) - valid_bin_type = ['AUTO', 'CUSTOM', 'LIN'] + valid_bin_types = ['AUTO', 'CUSTOM', 'LIN'] bin_type_upper = bin_type.upper() - if bin_type_upper not in valid_bin_type: + if bin_type_upper not in valid_bin_types: msg = "Desired bin type = {0} is not in the list of valid "\ - "bin types = {1}".format(bin_type, valid_bin_type) + "bin types = {1}".format(bin_type, valid_bin_types) raise ValueError(msg) enums = {'AUTO': 0, diff --git a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src index 9f898f21..04760e83 100644 --- a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src +++ b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_impl.c.src @@ -297,12 +297,13 @@ int countpairs_mocks_DOUBLE(const int64_t ND1, DOUBLE *ra1, DOUBLE *dec1, DOUBLE double *rupp; int nrpbin ; double rpmin,rpmax; - setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp,&(options->bin_type)); + setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp); if( ! (rpmin > 0.0 && rpmax > 0.0 && rpmin < rpmax && nrpbin > 0)) { fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n", rpmin, rpmax, nrpbin); return EXIT_FAILURE; } + detect_bin_type(rupp, nrpbin, &(options->bin_type)); if (options->verbose) { if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n"); else fprintf(stderr,"Custom R binning\n"); diff --git a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src index ba3d0638..3e195bfc 100644 --- a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src +++ b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src @@ -53,12 +53,11 @@ static inline int countpairs_rp_pi_mocks_avx512_intrinsics_DOUBLE(const int64_t const int32_t need_rpavg = src_rpavg != NULL; const int32_t need_weightavg = src_weightavg != NULL; const DOUBLE sqr_rpmin=rpmin*rpmin, sqr_rpmax=rpmax*rpmax; - DOUBLE inv_rpstep=0., rpmin_invstep=0.; AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT(); AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT(); if (bin_type == BIN_LIN) { - inv_rpstep = (nbin - 1)/(rpmax - rpmin); - rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep + const DOUBLE inv_rpstep = (nbin - 1)/(rpmax - rpmin); + const DOUBLE rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep); m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep); } diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src index 2045de9e..fb7528f1 100644 --- a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src +++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src @@ -297,12 +297,13 @@ int countpairs_mocks_s_mu_DOUBLE(const int64_t ND1, DOUBLE *ra1, DOUBLE *dec1, D double *supp; int nsbin; double smin,smax; - setup_bins(sbinfile,&smin,&smax,&nsbin,&supp,&(options->bin_type)); + setup_bins(sbinfile,&smin,&smax,&nsbin,&supp); if( ! (smin > 0.0 && smax > 0.0 && smin < smax && nsbin > 0)) { fprintf(stderr,"Error: Could not setup with S bins correctly. (smin = %lf, smax = %lf, with nbins = %d). Expected non-zero smin/smax with smax > smin and nbins >=1 \n", smin, smax, nsbin); return EXIT_FAILURE; } + detect_bin_type(supp, nsbin, &(options->bin_type)); if (options->verbose) { if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear S binning\n"); else fprintf(stderr,"Custom S binning\n"); diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src index e2d2f377..a82574a2 100644 --- a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src +++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src @@ -48,12 +48,11 @@ static inline int countpairs_s_mu_mocks_avx512_intrinsics_DOUBLE(const int64_t N const int32_t need_savg = src_savg != NULL; const int32_t need_weightavg = src_weightavg != NULL; const DOUBLE sqr_smin=smin*smin, sqr_smax=smax*smax; - DOUBLE inv_sstep=0., smin_invstep=0.; AVX512_FLOATS m_inv_sstep = AVX512_SETZERO_FLOAT(); AVX512_FLOATS m_smin_invstep = AVX512_SETZERO_FLOAT(); if (bin_type == BIN_LIN) { - inv_sstep = (nsbin - 1)/(smax - smin); - smin_invstep = 1 - smin*inv_sstep; //trick to avoid adding one to (r - rmin)/rstep + const DOUBLE inv_sstep = (nsbin - 1)/(smax - smin); + const DOUBLE smin_invstep = 1 - smin*inv_sstep; //trick to avoid adding one to (r - rmin)/rstep m_inv_sstep = AVX512_SET_FLOAT(inv_sstep); m_smin_invstep = AVX512_SET_FLOAT(smin_invstep); } diff --git a/theory/DD/countpairs_impl.c.src b/theory/DD/countpairs_impl.c.src index 42ed5d30..c66f6414 100644 --- a/theory/DD/countpairs_impl.c.src +++ b/theory/DD/countpairs_impl.c.src @@ -198,13 +198,14 @@ int countpairs_DOUBLE(const int64_t ND1, DOUBLE *X1, DOUBLE *Y1, DOUBLE *Z1, double *rupp=NULL; int nrpbin ; double rpmin,rpmax; - setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp,&(options->bin_type)); + setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp); if( ! (rpmin >=0.0 && rpmax > 0.0 && rpmin < rpmax && nrpbin > 0)) { fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). " "Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n", rpmin, rpmax, nrpbin); return EXIT_FAILURE; } + detect_bin_type(rupp, nrpbin, &(options->bin_type)); if (options->verbose) { if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n"); else fprintf(stderr,"Custom R binning\n"); diff --git a/theory/DD/countpairs_kernels.c.src b/theory/DD/countpairs_kernels.c.src index d6b7b3a0..884ed55a 100644 --- a/theory/DD/countpairs_kernels.c.src +++ b/theory/DD/countpairs_kernels.c.src @@ -51,12 +51,11 @@ static inline int countpairs_avx512_intrinsics_DOUBLE(const int64_t N0, DOUBLE * const int32_t need_rpavg = src_rpavg != NULL; const int32_t need_weightavg = src_weightavg != NULL; const DOUBLE sqr_rpmin=rpmin*rpmin, sqr_rpmax=rpmax*rpmax; - DOUBLE inv_rpstep=0., rpmin_invstep=0.; AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT(); AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT(); if (bin_type == BIN_LIN) { - inv_rpstep = (nbin - 1)/(rpmax - rpmin); - rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep + const DOUBLE inv_rpstep = (nbin - 1)/(rpmax - rpmin); + const DOUBLE rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep); m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep); } @@ -249,7 +248,7 @@ static inline int countpairs_avx512_intrinsics_DOUBLE(const int64_t N0, DOUBLE * } }//backwards loop over the bins } - if(need_rpavg || need_weightavg) { + if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) { //Do I need this step of going via the union? accessing int[] -> AVX* vector might //cause alignment problems but accessing the ints from an AVX* //register should always be fine @@ -260,6 +259,9 @@ static inline int countpairs_avx512_intrinsics_DOUBLE(const int64_t N0, DOUBLE * #endif for(int jj=0;jj=1;kbin--) { @@ -850,6 +857,7 @@ static inline int countpairs_sse_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0, npairs[kbin] += SSE_BIT_COUNT_INT(test2); if(need_rpavg || need_weightavg){ m_rpbin = SSE_BLEND_FLOATS_WITH_MASK(m_rpbin,m_kbin[kbin], m_bin_mask); + union_rpbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rpbin); } int test3 = SSE_TEST_COMPARISON(m_mask_left); if(test3 == 0) { @@ -857,14 +865,16 @@ static inline int countpairs_sse_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0, } } } - if(need_rpavg || need_weightavg) { - union_rpbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rpbin); + if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) { //protect the unroll pragma in case compiler is not icc. #if __INTEL_COMPILER #pragma unroll(SSE_NVEC) #endif for(int jj=0;jjbin_type)); + setup_bins(binfile,&rpmin,&rpmax,&nrpbin,&rupp); if( ! (rpmin >= 0.0 && rpmax > 0.0 && rpmin < rpmax && nrpbin > 0)) { fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n", rpmin, rpmax, nrpbin); return EXIT_FAILURE; } + detect_bin_type(rupp, nrpbin, &(options->bin_type)); if (options->verbose) { if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n"); else fprintf(stderr,"Custom R binning\n"); diff --git a/theory/DDrppi/countpairs_rp_pi_kernels.c.src b/theory/DDrppi/countpairs_rp_pi_kernels.c.src index 85a7259e..d31e763b 100644 --- a/theory/DDrppi/countpairs_rp_pi_kernels.c.src +++ b/theory/DDrppi/countpairs_rp_pi_kernels.c.src @@ -44,12 +44,11 @@ static inline int countpairs_rp_pi_avx512_intrinsics_DOUBLE(const int64_t N0, DO const int32_t need_rpavg = src_rpavg != NULL; const int32_t need_weightavg = src_weightavg != NULL; const DOUBLE sqr_rpmin=rpmin*rpmin, sqr_rpmax=rpmax*rpmax; - DOUBLE inv_rpstep=0., rpmin_invstep=0.; AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT(); AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT(); if (bin_type == BIN_LIN) { - inv_rpstep = (nbin - 1)/(rpmax - rpmin); - rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep + const DOUBLE inv_rpstep = (nbin - 1)/(rpmax - rpmin); + const DOUBLE rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep); m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep); } diff --git a/theory/DDsmu/countpairs_s_mu_impl.c.src b/theory/DDsmu/countpairs_s_mu_impl.c.src index 5a04c54b..61e3d488 100644 --- a/theory/DDsmu/countpairs_s_mu_impl.c.src +++ b/theory/DDsmu/countpairs_s_mu_impl.c.src @@ -204,12 +204,13 @@ int countpairs_s_mu_DOUBLE(const int64_t ND1, DOUBLE *X1, DOUBLE *Y1, DOUBLE *Z1 double *supp; int nsbin; double smin,smax; - setup_bins(sbinfile,&smin,&smax,&nsbin,&supp,&(options->bin_type)); + setup_bins(sbinfile,&smin,&smax,&nsbin,&supp); if( ! (smin >= 0.0 && smax > 0.0 && smin < smax && nsbin > 0)) { fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n", smin, smax, nsbin); return EXIT_FAILURE; } + detect_bin_type(supp, nsbin, &(options->bin_type)); if (options->verbose) { if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n"); else fprintf(stderr,"Custom R binning\n"); diff --git a/theory/DDsmu/countpairs_s_mu_kernels.c.src b/theory/DDsmu/countpairs_s_mu_kernels.c.src index 1786b9f1..8be4870e 100644 --- a/theory/DDsmu/countpairs_s_mu_kernels.c.src +++ b/theory/DDsmu/countpairs_s_mu_kernels.c.src @@ -45,12 +45,11 @@ static inline int countpairs_s_mu_avx512_intrinsics_DOUBLE(const int64_t N0, DOU const int32_t need_savg = src_savg != NULL; const int32_t need_weightavg = src_weightavg != NULL; const DOUBLE sqr_smin=smin*smin, sqr_smax=smax*smax; - DOUBLE inv_sstep=0., smin_invstep=0.; AVX512_FLOATS m_inv_sstep = AVX512_SETZERO_FLOAT(); AVX512_FLOATS m_smin_invstep = AVX512_SETZERO_FLOAT(); if (bin_type == BIN_LIN) { - inv_sstep = (nsbin - 1)/(smax - smin); - smin_invstep = 1 - smin*inv_sstep; //trick to avoid adding one to (r - rmin)/rstep + const DOUBLE inv_sstep = (nsbin - 1)/(smax - smin); + const DOUBLE smin_invstep = 1 - smin*inv_sstep; //trick to avoid adding one to (r - rmin)/rstep m_inv_sstep = AVX512_SET_FLOAT(inv_sstep); m_smin_invstep = AVX512_SET_FLOAT(smin_invstep); } diff --git a/theory/wp/countpairs_wp_impl.c.src b/theory/wp/countpairs_wp_impl.c.src index 5f2658a6..690dd9f1 100644 --- a/theory/wp/countpairs_wp_impl.c.src +++ b/theory/wp/countpairs_wp_impl.c.src @@ -218,12 +218,13 @@ int countpairs_wp_DOUBLE(const int64_t ND, DOUBLE * restrict X, DOUBLE * restric double *rupp; double rpmin,rpmax; int nrpbins; - setup_bins(binfile,&rpmin,&rpmax,&nrpbins,&rupp,&(options->bin_type)); + setup_bins(binfile,&rpmin,&rpmax,&nrpbins,&rupp); if( ! (rpmin >=0 && rpmax > 0.0 && rpmin < rpmax && nrpbins > 0)) { fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n", rpmin, rpmax, nrpbins); return EXIT_FAILURE; } + detect_bin_type(rupp, nrpbins, &(options->bin_type)); if (options->verbose) { if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n"); else fprintf(stderr,"Custom R binning\n"); diff --git a/theory/wp/wp_kernels.c.src b/theory/wp/wp_kernels.c.src index 633a7162..685c61dc 100644 --- a/theory/wp/wp_kernels.c.src +++ b/theory/wp/wp_kernels.c.src @@ -49,12 +49,11 @@ static inline int wp_avx512_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0 const int32_t need_rpavg = src_rpavg != NULL; const int32_t need_weightavg = src_weightavg != NULL; const DOUBLE sqr_rpmin=rpmin*rpmin, sqr_rpmax=rpmax*rpmax; - DOUBLE inv_rpstep=0., rpmin_invstep=0.; AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT(); AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT(); if (bin_type == BIN_LIN) { - inv_rpstep = (nbin - 1)/(rpmax - rpmin); - rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep + const DOUBLE inv_rpstep = (nbin - 1)/(rpmax - rpmin); + const DOUBLE rpmin_invstep = 1 - rpmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep); m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep); } @@ -271,7 +270,7 @@ static inline int wp_avx512_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0 } }//backwards loop over the bins } - if(need_rpavg || need_weightavg) { + if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) { //Do I need this step of going via the union? accessing int[] -> AVX* vector might //cause alignment problems but accessing the ints from an AVX* //register should always be fine @@ -282,6 +281,9 @@ static inline int wp_avx512_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0 #endif for(int jj=0;jj=1;kbin--) { @@ -1328,7 +1339,7 @@ static inline int wp_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c if(SSE_TEST_COMPARISON(m_mask_left) == 0) break; } } - if(need_rpavg || need_weightavg) { + if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) { union_rpbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rpbin); //protect the unroll pragma in case compiler is not icc. #if __INTEL_COMqPILER @@ -1336,6 +1347,9 @@ static inline int wp_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c #endif for(int jj=0;jjbin_type)); + setup_bins(binfile,&rmin,&rmax,&nbins,&rupp); if( ! (rmin >= 0.0 && rmax > 0.0 && rmin < rmax && nbins > 0)) { fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n", rmin, rmax, nbins); return EXIT_FAILURE; } + detect_bin_type(rupp, nbins, &(options->bin_type)); if (options->verbose) { if (options->bin_type == BIN_LIN) fprintf(stderr,"Linear R binning\n"); else fprintf(stderr,"Custom R binning\n"); diff --git a/theory/xi/xi_kernels.c.src b/theory/xi/xi_kernels.c.src index 3fb26565..28594a71 100644 --- a/theory/xi/xi_kernels.c.src +++ b/theory/xi/xi_kernels.c.src @@ -51,12 +51,11 @@ static inline int xi_avx512_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1 const int32_t need_rpavg = src_rpavg != NULL; const int32_t need_weightavg = src_weightavg != NULL; const DOUBLE sqr_rmin=rmin*rmin, sqr_rmax=rmax*rmax; - DOUBLE inv_rpstep=0., rpmin_invstep=0.; AVX512_FLOATS m_inv_rpstep = AVX512_SETZERO_FLOAT(); AVX512_FLOATS m_rpmin_invstep = AVX512_SETZERO_FLOAT(); if (bin_type == BIN_LIN) { - inv_rpstep = (nbin - 1)/(rmax - rmin); - rpmin_invstep = 1 - rmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep + const DOUBLE inv_rpstep = (nbin - 1)/(rmax - rmin); + const DOUBLE rpmin_invstep = 1 - rmin*inv_rpstep; //trick to avoid adding one to (r - rmin)/rstep m_inv_rpstep = AVX512_SET_FLOAT(inv_rpstep); m_rpmin_invstep = AVX512_SET_FLOAT(rpmin_invstep); } @@ -245,7 +244,7 @@ static inline int xi_avx512_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1 } }//backwards loop over the bins } - if(need_rpavg || need_weightavg) { + if(need_rpavg || need_weightavg || (bin_type == BIN_LIN)) { //Do I need this step of going via the union? accessing int[] -> AVX* vector might //cause alignment problems but accessing the ints from an AVX* //register should always be fine @@ -256,6 +255,9 @@ static inline int xi_avx512_intrinsics_DOUBLE(DOUBLE *x1, DOUBLE *y1, DOUBLE *z1 #endif for(int jj=0;jj=1;kbin--) { @@ -832,7 +839,7 @@ static inline int xi_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c } } } - if(need_ravg || need_weightavg) { + if(need_ravg || need_weightavg || (bin_type == BIN_LIN)) { union_rbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_rbin); //protect the unroll pragma in case compiler is not icc. #if __INTEL_COMPILER @@ -840,6 +847,9 @@ static inline int xi_sse_intrinsics_DOUBLE(DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, c #endif for(int jj=0;jj atol)||(fabsf((rupp[ii] - pred)/pred) > rtol)) { - *bin_type = BIN_CUSTOM; - break; - } - } - } - return EXIT_SUCCESS; -} - -static int detect_bin_type_double(const double *rupp, int nbin, bin_type_t *bin_type) +int detect_bin_type(const double *rupp, int nbin, bin_type_t *bin_type) { if (*bin_type == BIN_AUTO) { // if linear spacing, return BIN_LIN, else BIN_CUSTOM @@ -99,7 +79,7 @@ static int detect_bin_type_double(const double *rupp, int nbin, bin_type_t *bin_ return EXIT_SUCCESS; } -int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp, bin_type_t *bin_type) +int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp) { //set up the bins according to the binned data file //the form of the data file should be @@ -138,7 +118,6 @@ int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double } *rmax = (*rupp)[index-1]; fclose(fp); - detect_bin_type_double(*rupp, *nbin, bin_type); (*rupp)[*nbin] = *rmax; (*rupp)[*nbin-1] = *rmax; @@ -146,7 +125,7 @@ int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double return EXIT_SUCCESS; } -int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp, bin_type_t *bin_type) +int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp) { //set up the bins according to the binned data file //the form of the data file should be @@ -183,7 +162,6 @@ int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, } *rmax = (*rupp)[index-1]; fclose(fp); - detect_bin_type_double(*rupp, *nbin, bin_type); (*rupp)[*nbin] = *rmax; (*rupp)[*nbin-1] = *rmax; @@ -191,7 +169,7 @@ int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, return EXIT_SUCCESS; } -int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, float **rupp, bin_type_t *bin_type) +int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, float **rupp) { //set up the bins according to the binned data file //the form of the data file should be @@ -229,7 +207,6 @@ int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, flo } *rmax = (*rupp)[index-1]; fclose(fp); - detect_bin_type_float(*rupp, *nbin, bin_type); (*rupp)[*nbin] =* rmax; (*rupp)[*nbin-1] =* rmax; diff --git a/utils/utils.h b/utils/utils.h index b07ee6c9..2061e594 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -58,9 +58,10 @@ void volume_free(void ***v,int64_t nrow,int64_t ncol); extern int run_system_call(const char *execstring); -extern int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp, bin_type_t *bin_type); -extern int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp, bin_type_t *bin_type); -extern int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, float **rupp, bin_type_t *bin_type); +extern int detect_bin_type(const double *rupp, int nbin, bin_type_t *bin_type); +extern int setup_bins(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp); +extern int setup_bins_double(const char *fname, double *rmin, double *rmax, int *nbin, double **rupp); +extern int setup_bins_float(const char *fname, float *rmin, float *rmax, int *nbin, float **rupp); extern int test_all_files_present(const int nfiles, ...);