From 297c121dcf790cafdac4479d48c188d784f1c0aa Mon Sep 17 00:00:00 2001
From: Gavin Hunsche <gh452@cornell.edu>
Date: Wed, 24 Jul 2024 10:04:21 -0400
Subject: [PATCH] successful version of track a sextupole; error in sx value
 for quad

---
 src/low_level/int_arrays.jl |  13 +++-
 src/low_level/structures.jl |  45 ++++++++-----
 src/track_a_quadrupole.jl   | 130 ++++++++++++++++++++++++------------
 src/track_a_sextupole.jl    | 105 +++++++++++++++++++++++++++++
 4 files changed, 228 insertions(+), 65 deletions(-)
 create mode 100644 src/track_a_sextupole.jl

diff --git a/src/low_level/int_arrays.jl b/src/low_level/int_arrays.jl
index 7851e1f..bf8fa31 100644
--- a/src/low_level/int_arrays.jl
+++ b/src/low_level/int_arrays.jl
@@ -5,9 +5,16 @@ function fill_quadrupole(elements)
     """filling intermediate calculation arrays for track 
 a quadrupole with zeros to avoid dynamic memory allocation; 
 change element count as needed."""
-    x_ele, px_ele, S, C, x_lab, y_lab, px_lab, py_lab, beta, beta0, e_tot, 
-    evaluation, dz, sqrt_k, sk_l, sx, ax11, ax12, ax21, ay11, ay12, ay21,
-    cx1, cx2, cx3, cy1, cy2, cy3, b1, rel_p = (CUDA.fill(0.0, elements) for item = 1:30)
+    global x_ele, px_ele, S, C, x_lab, y_lab, px_lab, py_lab,
+    beta, beta0, e_tot, evaluation, dz, sqrt_k, sk_l, sx, a11, 
+    a12, a21, c1, c2, c3, b1, rel_p = (CUDA.fill(0.0, elements) for item = 1:24)
     
     return nothing
 end
+
+function fill_sextupole(elements)
+    global x_ele, px_ele, y_ele, S, C, b2, rel_p, beta, beta0,
+    e_tot, evaluation, dz = (CUDA.fill(0.0, elements) for item = 1:12)
+
+    return nothing
+end
\ No newline at end of file
diff --git a/src/low_level/structures.jl b/src/low_level/structures.jl
index 3d2ebcc..e8a0bc8 100644
--- a/src/low_level/structures.jl
+++ b/src/low_level/structures.jl
@@ -26,7 +26,7 @@ struct z_correction{T} # z energy correction
     pz::T
     p0c::T
     mass::T
-    ds::T
+    ds::Float64
 end
 
 struct quad_calc_input{T}  # quad_mat2_calc
@@ -35,9 +35,9 @@ struct quad_calc_input{T}  # quad_mat2_calc
     rel_p::T
 end
 
-struct quad_input{T}  #track_a_quadrupole
+struct quad_and_sextupole{T}  #track_a_quadrupole
     L::Float64
-    K1::T
+    K::T
     NUM_STEPS::Int32
     X_OFFSET::T
     Y_OFFSET::T
@@ -79,7 +79,7 @@ struct int_z_correction{T} # z energy correction
     dz::T
 end
 
-struct int_quad_elements{T} 
+struct int_quad{T} 
     x_ele::T
     px_ele::T
     S::T
@@ -87,25 +87,34 @@ struct int_quad_elements{T}
     sqrt_k::T
     sk_l::T
     sx::T
-    ax11::T
-    ax12::T
-    ax21::T
-    ay11::T
-    ay12::T
-    ay21::T
-    cx1::T
-    cx2::T
-    cx3::T
-    cy1::T
-    cy2::T
-    cy3::T
+    a11::T
+    a12::T
+    a21::T
+    c1::T
+    c2::T
+    c3::T
     b1::T
     rel_p::T
 end
 
+struct int_sextupole{T}
+    x_ele::T
+    px_ele::T
+    y_ele::T
+    S::T
+    C::T
+    b1::T
+    rel_p::T
+    beta::T
+    beta0::T
+    e_tot::T
+    evaluation::T
+    dz::T
+end
+
 """adapting structs to bitstype"""
 Adapt.@adapt_structure particle; Adapt.@adapt_structure drift; Adapt.@adapt_structure offset_and_tilt;
-Adapt.@adapt_structure z_correction; Adapt.@adapt_structure quad_calc_input; Adapt.@adapt_structure quad_input;
+Adapt.@adapt_structure z_correction; Adapt.@adapt_structure quad_calc_input; Adapt.@adapt_structure quad_and_sextupole;
 Adapt.@adapt_structure int_drift; Adapt.@adapt_structure int_set; Adapt.@adapt_structure int_unset;
-Adapt.@adapt_structure int_z_correction; Adapt.@adapt_structure int_quad_elements; 
+Adapt.@adapt_structure int_z_correction; Adapt.@adapt_structure int_quad; Adapt.@adapt_structure int_sextupole;
 
diff --git a/src/track_a_quadrupole.jl b/src/track_a_quadrupole.jl
index 8da8ffd..3c86fe4 100644
--- a/src/track_a_quadrupole.jl
+++ b/src/track_a_quadrupole.jl
@@ -1,25 +1,27 @@
 using CUDA
 
-include("low_level/structures.jl"); include("low_level/offset_particle.jl"); include("low_level/int_arrays.jl");
+include("low_level/structures.jl"); include("low_level/int_arrays.jl");
 
 function track_a_quadrupole!(p_in, quad, int)
     """Tracks the incoming Particle p_in though quad element and
     returns the outgoing particle.
     See Bmad manual section 24.15
     """
-    l = quad.L
+    len = quad.L
     x_off = quad.X_OFFSET
     y_off = quad.Y_OFFSET
     tilt = quad.TILT
-    k1 = quad.K1
+    K1 = quad.K
     n_step = quad.NUM_STEPS  # number of divisions
-    l /= n_step  # length of division
+    l = len/n_step # length of division
 
-    x_ele, px_ele, S, C, sqrt_k, sk_l, sx, ax11, ax12, ax21, cx1, 
-    cx2, cx3, ay11, ay12, ay21, cy1, cy2, cy3, b1, rel_p = int.x_ele, 
-    int.px_ele, int.S, int.C, int.sqrt_k, int.sk_l, int.sx, int.ax11, 
-    int.ax12, int.ax21, int.cx1, int.cx2, int.cx3, int.ay11, int.ay12, 
-    int.ay21, int.cy1, int.cy2, int.cy3, int.b1, int.rel_p
+    s = p_in.s
+    p0c = p_in.p0c
+    mc2 = p_in.mc2
+
+    x_ele, px_ele, S, C, sqrt_k, sk_l, sx, a11, a12, a21, c1, 
+    c2, c3, b1, rel_p = int.x_ele, int.px_ele, int.S, int.C, int.sqrt_k, int.sk_l, 
+    int.sx, int.a11, int.a12, int.a21, int.c1, int.c2, int.c3, int.b1, int.rel_p
     
     # --- TRACKING --- :
     x, px, y, py, z, pz = p_in.x, p_in.px, p_in.y, p_in.py, p_in.z, p_in.pz
@@ -31,11 +33,12 @@ function track_a_quadrupole!(p_in, quad, int)
 
     """offset_particle_set"""
     i = index
+    j = Int32(1)
     while i <= length(x)
         
         # set to particle coordinates
-        @inbounds (b1[i] = k1[i] * l;
-        
+        @inbounds (
+        b1[i] = K1[i] * len;
         S[i] = sin(tilt[i]);
         C[i] = cos(tilt[i]);
         x[i] -= x_off[i];
@@ -46,54 +49,93 @@ function track_a_quadrupole!(p_in, quad, int)
         py[i] *= C[i];
         py[i] -= px[i]*S[i];)
 
-        for j in eachindex(n_step)
+        while j <= n_step
             # transfer matrix elements and coefficients for x-coord
-            @inbounds (rel_p[i] = 1 + pz[i];
-            k1[i] = b1[i]/(l*rel_p[i]);
-            
-            sqrt_k[i] = sqrt(abs(-k1[i])+eps);
+            @inbounds (
+            rel_p[i] = 1.0 + pz[i];
+            K1[i] = b1[i]/(rel_p[i]*len);
+
+            sqrt_k[i] = sqrt(abs(K1[i])+eps);
             sk_l[i] = sqrt_k[i] * l;
             
-            ax11[i] = cos(sk_l[i]) * (-k1[i]<=0) + cosh(sk_l[i]) * (-k1[i]>0);
-            sx[i] = (sin(sk_l[i])/(sqrt_k[i]))*(-k1[i]<=0) + (sinh(sk_l[i])/(sqrt_k[i]))*(-k1[i]>0);
-                
-            ax12[i] = sx[i] / rel_p[i];
-            ax21[i] = -k1[i] * sx[i] * rel_p[i];
+            K1[i] *= -1;
+
+            a11[i] = cos(sk_l[i]) * (K1[i]<=0) + cosh(sk_l[i]) * (K1[i]>0);
+            sx[i] = (sin(sk_l[i])/(sqrt_k[i]))*(K1[i]<=0) + (sinh(sk_l[i])/(sqrt_k[i]))*(K1[i]>0);
+
+            a12[i] = sx[i] / rel_p[i];
+            a21[i] = K1[i] * sx[i] * rel_p[i];
                 
-            cx1[i] = -k1[i] * (-ax11[i] * sx[i] + l) / 4;
-            cx2[i] = k1[i] * sx[i]^2 / (2 * rel_p[i]);
-            cx3[i] = -(ax11[i] * sx[i] + l) / (4 * rel_p[i]^2);
+            c1[i] = K1[i] * (-a11[i] * sx[i] + l) / 4;
+            c2[i] = -K1[i] * sx[i]^2 / (2 * rel_p[i]);
+            c3[i] = -(a11[i] * sx[i] + l) / (4 * rel_p[i]^2);
+
+            # z (without energy correction)
+            z[i] += (c1[i] * x_ele[i]^2 + c2[i] * x_ele[i] * px_ele[i] + c3[i] 
+            * px_ele[i]^2);
             
+            # next index x-vals
+            x_ele[i] = a11[i] * x_ele[i] + a12[i] * px_ele[i];
+            px_ele[i] = a21[i] * x_ele[i] + a11[i] * px_ele[i];
+            
+            K1[i] *= -1;
             # transfer matrix elements and coefficients for y-coord
-            ay11[i] = cos(sk_l[i]) * (k1[i]<=0) + cosh(sk_l[i]) * (k1[i]>0);
-            sx[i] = (sin(sk_l[i])/(sqrt_k[i]))*(k1[i]<=0) + (sinh(sk_l[i])/(sqrt_k[i]))*(k1[i]>0);
-                
-            ay12[i] = sx[i] / rel_p[i];
-            ay21[i] = k1[i] * sx[i] * rel_p[i];
+            a11[i] = cos(sk_l[i]) * (K1[i]<=0) + cosh(sk_l[i]) * (K1[i]>0);
+            sx[i] = (sin(sk_l[i])/(sqrt_k[i]))*(K1[i]<=0) + (sinh(sk_l[i])/(sqrt_k[i]))*(K1[i]>0);
+            
+            a12[i] = sx[i] / rel_p[i];
+            a21[i] = K1[i] * sx[i] * rel_p[i];
                 
-            cy1[i] = k1[i] * (-ay11[i] * sx[i] + l) / 4;
-            cy2[i] = -k1[i] * sx[i]^2 / (2 * rel_p[i]);
-            cy3[i] = -(ay11[i] * sx[i] + l) / (4 * rel_p[i]^2);
+            c1[i] = K1[i] * (-a11[i] * sx[i] + l) / 4;
+            c2[i] = -K1[i] * sx[i]^2 / (2 * rel_p[i]);
+            c3[i] = -(a11[i] * sx[i] + l) / (4 * rel_p[i]^2);
             
             # z (without energy correction)
-            z[i] += ( cx1[i] * x_ele[i]^2 + cx2[i] * x_ele[i] * px_ele[i] + cx3[i] 
-            * px_ele[i]^2 + cy1[i] * y[i]^2 + cy2[i] * y[i] * py[i] + cy3[i] * py[i]^2 );
+            z[i] += c1[i] * y[i]^2 + c2[i] * y[i] * py[i] + c3[i] * py[i]^2;
             
             # next index vals
-            x_ele[i] = ax11[i] * x_ele[i] + ax12[i] * px_ele[i];
-            px_ele[i] = ax21[i] * x_ele[i] + ax11[i] * px_ele[i];
-            y[i] = ay11[i] * y[i] + ay12[i] * py[i];
-            py[i] = ay21[i] * y[i] + ay11[i] * py[i];)
-
+            y[i] = a11[i] * y[i] + a12[i] * py[i];
+            py[i] = a21[i] * y[i] + a11[i] * py[i];)
+            j += 1
+            
         end
+        
         i += stride
-    end
 
-    """continue"""
-    s = p_in.s
-    p0c = p_in.p0c
-    mc2 = p_in.mc2
+    end
+    
     
     return nothing
 end
 
+fill_quadrupole(10_000);
+x = CUDA.fill(1.0, 10_000); px = CUDA.fill(0.8, 10_000); y = CUDA.fill(1.0, 10_000);
+py = CUDA.fill(0.85, 10_000); z = CUDA.fill(0.5, 10_000); pz = CUDA.fill(0.2, 10_000);
+s = 1.0; p0c = CUDA.fill(1.184, 10_000); mc2 = 0.511;
+
+NUM_STEPS = Int32(1000); K1 = CUDA.fill(2.0, 10_000); L = 0.2;
+TILT = CUDA.fill(1.0, 10_000); X_OFFSET = CUDA.fill(0.006, 10_000); Y_OFFSET = CUDA.fill(0.01, 10_000);
+
+p_in = particle(x, px, y, py, z, pz, s, p0c, mc2);
+quad = quad_and_sextupole(L, K1, NUM_STEPS, X_OFFSET, Y_OFFSET, TILT);
+int = int_quad(x_ele, px_ele, S, C, sqrt_k, sk_l, sx, a11, a12,
+                        a21, c1, c2, c3, b1, rel_p);
+
+kernel = @cuda launch=false track_a_quadrupole!(p_in, quad, int);
+config = launch_configuration(kernel.fun)
+threads = config.threads
+blocks= cld(length(x), threads)
+
+@cuda threads=threads blocks=blocks track_a_quadrupole!(p_in, quad, int)
+
+print(c1[1:3])
+print(c2[1:3])
+print(c3[1:3])
+print(sqrt_k[1:3])
+print(sk_l[1:3])
+print(a11[1:3])
+print(sx[1:2])
+print(a12[1:2])
+print(K1[1:3])
+print(b1[1:3])
+print(rel_p[1:3])
diff --git a/src/track_a_sextupole.jl b/src/track_a_sextupole.jl
new file mode 100644
index 0000000..28e1271
--- /dev/null
+++ b/src/track_a_sextupole.jl
@@ -0,0 +1,105 @@
+using CUDA
+
+include("low_level/structures.jl"); include("low_level/int_arrays.jl");
+
+function track_a_sextupole!(p_in, sextupole, int)
+    """Tracks the incoming Particle p_in though pure sextupole element and
+    returns the outgoing particle.
+    See Bmad manual section 24.15
+    """
+    l = sextupole.L
+    k2 = sextupole.K
+
+    n_step = sextupole.NUM_STEPS  # number of divisions
+    step_len = l / n_step  # length of division
+    
+    x_off = sextupole.X_OFFSET
+    y_off = sextupole.Y_OFFSET
+    tilt = sextupole.TILT
+    
+    s = p_in.s
+    p0c = p_in.p0c
+    mc2 = p_in.mc2
+
+    x_ele, px_ele, y_ele, S, C, b2, rel_p, beta, beta0, e_tot, evaluation,
+    dz = int.x_ele, int.px_ele, int.y_ele, int.S, int.C, int.b1, int.rel_p,
+    int.beta, int.beta0, int.e_tot, int.evaluation, int.dz
+    
+    index = (blockIdx().x - Int32(1)) * blockDim().x + threadIdx().x
+    stride = gridDim().x * blockDim().x
+
+    i = index
+    j = Int32(1)
+    # --- TRACKING --- :
+    
+    x, px, y, py, z, pz = p_in.x, p_in.px, p_in.y, p_in.py, p_in.z, p_in.pz
+    
+    while i <= length(x)
+
+        # set to particle coordinates
+        @inbounds (
+        b2[i] = k2[i] * l;
+
+        S[i] = sin(tilt[i]);
+        C[i] = cos(tilt[i]);
+        x[i] -= x_off[i];
+        y[i] -= y_off[i];
+        x_ele[i] = x[i]*C[i] + y[i]*S[i]; 
+        y[i] = -x[i]*S[i] + y[i]*C[i];
+        px_ele[i] = px[i]*C[i] + py[i]*S[i];
+        py[i] *= C[i];
+        py[i] -= px[i]*S[i];
+
+        x[i] = x_ele[i];
+        px[i] = px_ele[i]; )
+
+
+        while j <= n_step
+            @inbounds (rel_p[i] = 1 + pz[i];  # Particle's relative momentum (P/P0)
+            k2[i] = b2[i]/(l*rel_p[i]);
+
+            # next index
+            x_ele[i] = x[i] + step_len * px[i];
+            y_ele[i] = y[i] + step_len * py[i];
+
+            px_ele[i] = px[i] + 0.5 * k2[i] * step_len * (y[i]^2 - x[i]^2);
+            py[i] = py[i] + k2[i] * step_len * x[i] * y[i];
+            
+            x[i] = x_ele[i];
+            y[i] = y_ele[i];
+            px[i] = px_ele[i];
+            
+            # z low energy correction
+            beta[i] = (1+pz[i]) * p0c[i] / sqrt(((1+pz[i])*p0c[i])^2 + mc2^2);
+            beta0[i] = p0c[i] / sqrt( p0c[i]^2 + mc2^2);
+            e_tot[i] = sqrt(p0c[i]^2+mc2^2);
+            
+            evaluation[i] = mc2 * (beta0[i]*pz[i])^2;
+            dz[i] = (step_len * pz[i] * (1 - 3*(pz[i]*beta0[i]^2)/2+pz[i]^2*beta0[i]^2
+                    * (2*beta0[i]^2-(mc2/e_tot[i])^2/2) )
+                    * (mc2/e_tot[i])^2
+                    * (evaluation[i]<3e-7*e_tot[i]) 
+                    + (step_len*(beta[i]-beta0[i])/beta0[i])
+                    * (evaluation[i]>=3e-7*e_tot[i])  );
+            
+            z[i] += dz[i];
+           )
+            j += 1
+        end
+
+        # setting back to lab frame
+        @inbounds (
+        x_ele[i] = x[i]*C[i] - y[i]*S[i];
+        y[i] = x[i]*S[i] + y[i]*C[i];
+        x[i] = x_ele[i] + x_off[i];
+        y[i] = y[i] + y_off[i];
+        px_ele[i] = px[i]*C[i] - py[i]*S[i];
+        py[i] = px[i]*S[i] + py[i]*C[i];
+        px[i] = px_ele[i];)
+
+        i += stride
+        
+    end
+    return nothing
+end
+