From 297c121dcf790cafdac4479d48c188d784f1c0aa Mon Sep 17 00:00:00 2001 From: Gavin Hunsche Date: Wed, 24 Jul 2024 10:04:21 -0400 Subject: [PATCH] successful version of track a sextupole; error in sx value for quad --- src/low_level/int_arrays.jl | 13 +++- src/low_level/structures.jl | 45 ++++++++----- src/track_a_quadrupole.jl | 130 ++++++++++++++++++++++++------------ src/track_a_sextupole.jl | 105 +++++++++++++++++++++++++++++ 4 files changed, 228 insertions(+), 65 deletions(-) create mode 100644 src/track_a_sextupole.jl diff --git a/src/low_level/int_arrays.jl b/src/low_level/int_arrays.jl index 7851e1f..bf8fa31 100644 --- a/src/low_level/int_arrays.jl +++ b/src/low_level/int_arrays.jl @@ -5,9 +5,16 @@ function fill_quadrupole(elements) """filling intermediate calculation arrays for track a quadrupole with zeros to avoid dynamic memory allocation; change element count as needed.""" - x_ele, px_ele, S, C, x_lab, y_lab, px_lab, py_lab, beta, beta0, e_tot, - evaluation, dz, sqrt_k, sk_l, sx, ax11, ax12, ax21, ay11, ay12, ay21, - cx1, cx2, cx3, cy1, cy2, cy3, b1, rel_p = (CUDA.fill(0.0, elements) for item = 1:30) + global x_ele, px_ele, S, C, x_lab, y_lab, px_lab, py_lab, + beta, beta0, e_tot, evaluation, dz, sqrt_k, sk_l, sx, a11, + a12, a21, c1, c2, c3, b1, rel_p = (CUDA.fill(0.0, elements) for item = 1:24) return nothing end + +function fill_sextupole(elements) + global x_ele, px_ele, y_ele, S, C, b2, rel_p, beta, beta0, + e_tot, evaluation, dz = (CUDA.fill(0.0, elements) for item = 1:12) + + return nothing +end \ No newline at end of file diff --git a/src/low_level/structures.jl b/src/low_level/structures.jl index 3d2ebcc..e8a0bc8 100644 --- a/src/low_level/structures.jl +++ b/src/low_level/structures.jl @@ -26,7 +26,7 @@ struct z_correction{T} # z energy correction pz::T p0c::T mass::T - ds::T + ds::Float64 end struct quad_calc_input{T} # quad_mat2_calc @@ -35,9 +35,9 @@ struct quad_calc_input{T} # quad_mat2_calc rel_p::T end -struct quad_input{T} #track_a_quadrupole +struct quad_and_sextupole{T} #track_a_quadrupole L::Float64 - K1::T + K::T NUM_STEPS::Int32 X_OFFSET::T Y_OFFSET::T @@ -79,7 +79,7 @@ struct int_z_correction{T} # z energy correction dz::T end -struct int_quad_elements{T} +struct int_quad{T} x_ele::T px_ele::T S::T @@ -87,25 +87,34 @@ struct int_quad_elements{T} sqrt_k::T sk_l::T sx::T - ax11::T - ax12::T - ax21::T - ay11::T - ay12::T - ay21::T - cx1::T - cx2::T - cx3::T - cy1::T - cy2::T - cy3::T + a11::T + a12::T + a21::T + c1::T + c2::T + c3::T b1::T rel_p::T end +struct int_sextupole{T} + x_ele::T + px_ele::T + y_ele::T + S::T + C::T + b1::T + rel_p::T + beta::T + beta0::T + e_tot::T + evaluation::T + dz::T +end + """adapting structs to bitstype""" Adapt.@adapt_structure particle; Adapt.@adapt_structure drift; Adapt.@adapt_structure offset_and_tilt; -Adapt.@adapt_structure z_correction; Adapt.@adapt_structure quad_calc_input; Adapt.@adapt_structure quad_input; +Adapt.@adapt_structure z_correction; Adapt.@adapt_structure quad_calc_input; Adapt.@adapt_structure quad_and_sextupole; Adapt.@adapt_structure int_drift; Adapt.@adapt_structure int_set; Adapt.@adapt_structure int_unset; -Adapt.@adapt_structure int_z_correction; Adapt.@adapt_structure int_quad_elements; +Adapt.@adapt_structure int_z_correction; Adapt.@adapt_structure int_quad; Adapt.@adapt_structure int_sextupole; diff --git a/src/track_a_quadrupole.jl b/src/track_a_quadrupole.jl index 8da8ffd..3c86fe4 100644 --- a/src/track_a_quadrupole.jl +++ b/src/track_a_quadrupole.jl @@ -1,25 +1,27 @@ using CUDA -include("low_level/structures.jl"); include("low_level/offset_particle.jl"); include("low_level/int_arrays.jl"); +include("low_level/structures.jl"); include("low_level/int_arrays.jl"); function track_a_quadrupole!(p_in, quad, int) """Tracks the incoming Particle p_in though quad element and returns the outgoing particle. See Bmad manual section 24.15 """ - l = quad.L + len = quad.L x_off = quad.X_OFFSET y_off = quad.Y_OFFSET tilt = quad.TILT - k1 = quad.K1 + K1 = quad.K n_step = quad.NUM_STEPS # number of divisions - l /= n_step # length of division + l = len/n_step # length of division - x_ele, px_ele, S, C, sqrt_k, sk_l, sx, ax11, ax12, ax21, cx1, - cx2, cx3, ay11, ay12, ay21, cy1, cy2, cy3, b1, rel_p = int.x_ele, - int.px_ele, int.S, int.C, int.sqrt_k, int.sk_l, int.sx, int.ax11, - int.ax12, int.ax21, int.cx1, int.cx2, int.cx3, int.ay11, int.ay12, - int.ay21, int.cy1, int.cy2, int.cy3, int.b1, int.rel_p + s = p_in.s + p0c = p_in.p0c + mc2 = p_in.mc2 + + x_ele, px_ele, S, C, sqrt_k, sk_l, sx, a11, a12, a21, c1, + c2, c3, b1, rel_p = int.x_ele, int.px_ele, int.S, int.C, int.sqrt_k, int.sk_l, + int.sx, int.a11, int.a12, int.a21, int.c1, int.c2, int.c3, int.b1, int.rel_p # --- TRACKING --- : x, px, y, py, z, pz = p_in.x, p_in.px, p_in.y, p_in.py, p_in.z, p_in.pz @@ -31,11 +33,12 @@ function track_a_quadrupole!(p_in, quad, int) """offset_particle_set""" i = index + j = Int32(1) while i <= length(x) # set to particle coordinates - @inbounds (b1[i] = k1[i] * l; - + @inbounds ( + b1[i] = K1[i] * len; S[i] = sin(tilt[i]); C[i] = cos(tilt[i]); x[i] -= x_off[i]; @@ -46,54 +49,93 @@ function track_a_quadrupole!(p_in, quad, int) py[i] *= C[i]; py[i] -= px[i]*S[i];) - for j in eachindex(n_step) + while j <= n_step # transfer matrix elements and coefficients for x-coord - @inbounds (rel_p[i] = 1 + pz[i]; - k1[i] = b1[i]/(l*rel_p[i]); - - sqrt_k[i] = sqrt(abs(-k1[i])+eps); + @inbounds ( + rel_p[i] = 1.0 + pz[i]; + K1[i] = b1[i]/(rel_p[i]*len); + + sqrt_k[i] = sqrt(abs(K1[i])+eps); sk_l[i] = sqrt_k[i] * l; - ax11[i] = cos(sk_l[i]) * (-k1[i]<=0) + cosh(sk_l[i]) * (-k1[i]>0); - sx[i] = (sin(sk_l[i])/(sqrt_k[i]))*(-k1[i]<=0) + (sinh(sk_l[i])/(sqrt_k[i]))*(-k1[i]>0); - - ax12[i] = sx[i] / rel_p[i]; - ax21[i] = -k1[i] * sx[i] * rel_p[i]; + K1[i] *= -1; + + a11[i] = cos(sk_l[i]) * (K1[i]<=0) + cosh(sk_l[i]) * (K1[i]>0); + sx[i] = (sin(sk_l[i])/(sqrt_k[i]))*(K1[i]<=0) + (sinh(sk_l[i])/(sqrt_k[i]))*(K1[i]>0); + + a12[i] = sx[i] / rel_p[i]; + a21[i] = K1[i] * sx[i] * rel_p[i]; - cx1[i] = -k1[i] * (-ax11[i] * sx[i] + l) / 4; - cx2[i] = k1[i] * sx[i]^2 / (2 * rel_p[i]); - cx3[i] = -(ax11[i] * sx[i] + l) / (4 * rel_p[i]^2); + c1[i] = K1[i] * (-a11[i] * sx[i] + l) / 4; + c2[i] = -K1[i] * sx[i]^2 / (2 * rel_p[i]); + c3[i] = -(a11[i] * sx[i] + l) / (4 * rel_p[i]^2); + + # z (without energy correction) + z[i] += (c1[i] * x_ele[i]^2 + c2[i] * x_ele[i] * px_ele[i] + c3[i] + * px_ele[i]^2); + # next index x-vals + x_ele[i] = a11[i] * x_ele[i] + a12[i] * px_ele[i]; + px_ele[i] = a21[i] * x_ele[i] + a11[i] * px_ele[i]; + + K1[i] *= -1; # transfer matrix elements and coefficients for y-coord - ay11[i] = cos(sk_l[i]) * (k1[i]<=0) + cosh(sk_l[i]) * (k1[i]>0); - sx[i] = (sin(sk_l[i])/(sqrt_k[i]))*(k1[i]<=0) + (sinh(sk_l[i])/(sqrt_k[i]))*(k1[i]>0); - - ay12[i] = sx[i] / rel_p[i]; - ay21[i] = k1[i] * sx[i] * rel_p[i]; + a11[i] = cos(sk_l[i]) * (K1[i]<=0) + cosh(sk_l[i]) * (K1[i]>0); + sx[i] = (sin(sk_l[i])/(sqrt_k[i]))*(K1[i]<=0) + (sinh(sk_l[i])/(sqrt_k[i]))*(K1[i]>0); + + a12[i] = sx[i] / rel_p[i]; + a21[i] = K1[i] * sx[i] * rel_p[i]; - cy1[i] = k1[i] * (-ay11[i] * sx[i] + l) / 4; - cy2[i] = -k1[i] * sx[i]^2 / (2 * rel_p[i]); - cy3[i] = -(ay11[i] * sx[i] + l) / (4 * rel_p[i]^2); + c1[i] = K1[i] * (-a11[i] * sx[i] + l) / 4; + c2[i] = -K1[i] * sx[i]^2 / (2 * rel_p[i]); + c3[i] = -(a11[i] * sx[i] + l) / (4 * rel_p[i]^2); # z (without energy correction) - z[i] += ( cx1[i] * x_ele[i]^2 + cx2[i] * x_ele[i] * px_ele[i] + cx3[i] - * px_ele[i]^2 + cy1[i] * y[i]^2 + cy2[i] * y[i] * py[i] + cy3[i] * py[i]^2 ); + z[i] += c1[i] * y[i]^2 + c2[i] * y[i] * py[i] + c3[i] * py[i]^2; # next index vals - x_ele[i] = ax11[i] * x_ele[i] + ax12[i] * px_ele[i]; - px_ele[i] = ax21[i] * x_ele[i] + ax11[i] * px_ele[i]; - y[i] = ay11[i] * y[i] + ay12[i] * py[i]; - py[i] = ay21[i] * y[i] + ay11[i] * py[i];) - + y[i] = a11[i] * y[i] + a12[i] * py[i]; + py[i] = a21[i] * y[i] + a11[i] * py[i];) + j += 1 + end + i += stride - end - """continue""" - s = p_in.s - p0c = p_in.p0c - mc2 = p_in.mc2 + end + return nothing end +fill_quadrupole(10_000); +x = CUDA.fill(1.0, 10_000); px = CUDA.fill(0.8, 10_000); y = CUDA.fill(1.0, 10_000); +py = CUDA.fill(0.85, 10_000); z = CUDA.fill(0.5, 10_000); pz = CUDA.fill(0.2, 10_000); +s = 1.0; p0c = CUDA.fill(1.184, 10_000); mc2 = 0.511; + +NUM_STEPS = Int32(1000); K1 = CUDA.fill(2.0, 10_000); L = 0.2; +TILT = CUDA.fill(1.0, 10_000); X_OFFSET = CUDA.fill(0.006, 10_000); Y_OFFSET = CUDA.fill(0.01, 10_000); + +p_in = particle(x, px, y, py, z, pz, s, p0c, mc2); +quad = quad_and_sextupole(L, K1, NUM_STEPS, X_OFFSET, Y_OFFSET, TILT); +int = int_quad(x_ele, px_ele, S, C, sqrt_k, sk_l, sx, a11, a12, + a21, c1, c2, c3, b1, rel_p); + +kernel = @cuda launch=false track_a_quadrupole!(p_in, quad, int); +config = launch_configuration(kernel.fun) +threads = config.threads +blocks= cld(length(x), threads) + +@cuda threads=threads blocks=blocks track_a_quadrupole!(p_in, quad, int) + +print(c1[1:3]) +print(c2[1:3]) +print(c3[1:3]) +print(sqrt_k[1:3]) +print(sk_l[1:3]) +print(a11[1:3]) +print(sx[1:2]) +print(a12[1:2]) +print(K1[1:3]) +print(b1[1:3]) +print(rel_p[1:3]) diff --git a/src/track_a_sextupole.jl b/src/track_a_sextupole.jl new file mode 100644 index 0000000..28e1271 --- /dev/null +++ b/src/track_a_sextupole.jl @@ -0,0 +1,105 @@ +using CUDA + +include("low_level/structures.jl"); include("low_level/int_arrays.jl"); + +function track_a_sextupole!(p_in, sextupole, int) + """Tracks the incoming Particle p_in though pure sextupole element and + returns the outgoing particle. + See Bmad manual section 24.15 + """ + l = sextupole.L + k2 = sextupole.K + + n_step = sextupole.NUM_STEPS # number of divisions + step_len = l / n_step # length of division + + x_off = sextupole.X_OFFSET + y_off = sextupole.Y_OFFSET + tilt = sextupole.TILT + + s = p_in.s + p0c = p_in.p0c + mc2 = p_in.mc2 + + x_ele, px_ele, y_ele, S, C, b2, rel_p, beta, beta0, e_tot, evaluation, + dz = int.x_ele, int.px_ele, int.y_ele, int.S, int.C, int.b1, int.rel_p, + int.beta, int.beta0, int.e_tot, int.evaluation, int.dz + + index = (blockIdx().x - Int32(1)) * blockDim().x + threadIdx().x + stride = gridDim().x * blockDim().x + + i = index + j = Int32(1) + # --- TRACKING --- : + + x, px, y, py, z, pz = p_in.x, p_in.px, p_in.y, p_in.py, p_in.z, p_in.pz + + while i <= length(x) + + # set to particle coordinates + @inbounds ( + b2[i] = k2[i] * l; + + S[i] = sin(tilt[i]); + C[i] = cos(tilt[i]); + x[i] -= x_off[i]; + y[i] -= y_off[i]; + x_ele[i] = x[i]*C[i] + y[i]*S[i]; + y[i] = -x[i]*S[i] + y[i]*C[i]; + px_ele[i] = px[i]*C[i] + py[i]*S[i]; + py[i] *= C[i]; + py[i] -= px[i]*S[i]; + + x[i] = x_ele[i]; + px[i] = px_ele[i]; ) + + + while j <= n_step + @inbounds (rel_p[i] = 1 + pz[i]; # Particle's relative momentum (P/P0) + k2[i] = b2[i]/(l*rel_p[i]); + + # next index + x_ele[i] = x[i] + step_len * px[i]; + y_ele[i] = y[i] + step_len * py[i]; + + px_ele[i] = px[i] + 0.5 * k2[i] * step_len * (y[i]^2 - x[i]^2); + py[i] = py[i] + k2[i] * step_len * x[i] * y[i]; + + x[i] = x_ele[i]; + y[i] = y_ele[i]; + px[i] = px_ele[i]; + + # z low energy correction + beta[i] = (1+pz[i]) * p0c[i] / sqrt(((1+pz[i])*p0c[i])^2 + mc2^2); + beta0[i] = p0c[i] / sqrt( p0c[i]^2 + mc2^2); + e_tot[i] = sqrt(p0c[i]^2+mc2^2); + + evaluation[i] = mc2 * (beta0[i]*pz[i])^2; + dz[i] = (step_len * pz[i] * (1 - 3*(pz[i]*beta0[i]^2)/2+pz[i]^2*beta0[i]^2 + * (2*beta0[i]^2-(mc2/e_tot[i])^2/2) ) + * (mc2/e_tot[i])^2 + * (evaluation[i]<3e-7*e_tot[i]) + + (step_len*(beta[i]-beta0[i])/beta0[i]) + * (evaluation[i]>=3e-7*e_tot[i]) ); + + z[i] += dz[i]; + ) + j += 1 + end + + # setting back to lab frame + @inbounds ( + x_ele[i] = x[i]*C[i] - y[i]*S[i]; + y[i] = x[i]*S[i] + y[i]*C[i]; + x[i] = x_ele[i] + x_off[i]; + y[i] = y[i] + y_off[i]; + px_ele[i] = px[i]*C[i] - py[i]*S[i]; + py[i] = px[i]*S[i] + py[i]*C[i]; + px[i] = px_ele[i];) + + i += stride + + end + return nothing +end +