2 #include "../include/split_op.h"
3 #include "../include/kernels.h"
4 #include "../include/constants.h"
5 #include "../include/fileIO.h"
6 #include "../include/tracker.h"
7 #include "../include/minions.h"
8 #include "../include/parser.h"
10 #include "../include/lattice.h"
11 #include "../include/node.h"
12 #include "../include/edge.h"
13 #include "../include/manip.h"
14 #include "../include/vort.h"
19 //Declare the static uid values to avoid conflicts. Upper limit of 2^32-1 different instances. Should be reasonable in
20 // any simulation of realistic timescales.
21 unsigned int LatticeGraph::Edge::suid = 0;
22 unsigned int LatticeGraph::Node::suid = 0;
23 //std::size_t Vtx::Vortex::suid = 0;
25 char buffer[100]; //Buffer for printing out. Need to replace by a better write-out procedure. Consider binary or HDF.
26 int verbose; //Print more info. Not curently implemented.
27 int device; //GPU ID choice.
28 int kick_it; //Kicking mode: 0 = off, 1 = multiple, 2 = single
29 int graph=0; //Generate graph from vortex lattice.
30 double gammaY; //Aspect ratio of trapping geometry.
31 double omega; //Rotation rate of condensate
33 double angle_sweep; //Rotation angle of condensate relative to x-axis
34 double x0_shift, y0_shift; //Optical lattice shift parameters.
35 double Rxy; //Condensate scaling factor.
36 double a0x, a0y; //Harmonic oscillator length in x and y directions
37 double sepMinEpsilon=0.0; //Minimum separation for epsilon.
40 * Checks CUDA routines have exitted correctly.
42 int isError(int result, char* c){
44 printf("Error has occurred for method %s with return type %d\n",
51 * Used to perform parallel summation on WFC for normalisation.
53 void parSum(double* gpuWfc, double* gpuParSum, Grid &par){
54 // May need to add double l
55 int dimnum = par.ival("dimnum");
56 double dx = par.dval("dx");
57 double dy = par.dval("dy");
58 double dz = par.dval("dz");
59 dim3 threads = par.threads;
60 int xDim = par.ival("xDim");
61 int yDim = par.ival("yDim");
62 int zDim = par.ival("zDim");
63 dim3 grid_tmp(xDim, 1, 1);
67 // Setting option for 3d
78 dim3 block(grid_tmp.x/threads.x, 1, 1);
79 dim3 thread_tmp = threads;
82 set_eq<<<par.grid, par.threads>>>(gpuWfc, gpuParSum);
84 while((double)grid_tmp.x/threads.x > 1.0){
85 multipass<<<block,thread_tmp,thread_tmp.x*sizeof(double)>>>(
86 &gpuParSum[0],&gpuParSum[0]);
87 grid_tmp.x /= threads.x;
88 block = (int) ceil((double)grid_tmp.x/threads.x);
90 //std::cout << grid_tmp.x << '\n';
92 thread_tmp = grid_tmp.x;
93 multipass<<<1,thread_tmp,thread_tmp.x*sizeof(double2)>>>(&gpuParSum[0],
98 * Used to perform parallel summation on WFC for normalisation.
100 void parSum(double2* gpuWfc, Grid &par){
101 // May need to add double l
102 int dimnum = par.ival("dimnum");
103 double dx = par.dval("dx");
104 double dy = par.dval("dy");
105 double dz = par.dval("dz");
106 dim3 threads = par.threads;
107 int xDim = par.ival("xDim");
108 int yDim = par.ival("yDim");
109 int zDim = par.ival("zDim");
110 dim3 grid_tmp(xDim, 1, 1);
114 // Setting option for 3d
125 dim3 block(grid_tmp.x/threads.x, 1, 1);
126 dim3 thread_tmp = threads;
130 cudaMalloc((void**) &density, sizeof(double)*gsize);
132 complexMagnitudeSquared<<<par.grid, par.threads>>>(gpuWfc, density);
135 std::cout << "grid / threads = " << '\t'
136 << (double)grid_tmp.x/threads.x << '\n'
137 << "grid.x is: " << grid_tmp.x << '\t'
138 << "threads.x are: " << threads.x << '\n';
140 while((double)grid_tmp.x/threads.x > 1.0){
141 multipass<<<block,threads,threads.x*sizeof(double)>>>(&density[0],
143 grid_tmp.x /= threads.x;
144 block = (int) ceil((double)grid_tmp.x/threads.x);
146 //std::cout << pass << '\t' << grid_tmp.x << '\n';
148 thread_tmp = grid_tmp.x;
149 multipass<<<1,thread_tmp,thread_tmp.x*sizeof(double)>>>(&density[0],
153 // Writing out in the parSum Function (not recommended, for debugging)
155 sum = (double *) malloc(sizeof(double)*gsize);
156 cudaMemcpy(sum,density,sizeof(double)*gsize,
157 cudaMemcpyDeviceToHost);
158 std::cout << (sum[0]) << '\n';
160 scalarDiv_wfcNorm<<<par.grid,par.threads>>>(gpuWfc, dg, density, gpuWfc);
166 ** Matches the optical lattice to the vortex lattice.
167 ** Moire super-lattice project.
169 void optLatSetup(std::shared_ptr<Vtx::Vortex> centre, const double* V,
170 std::vector<std::shared_ptr<Vtx::Vortex>> &vArray, double theta_opt,
171 double intensity, double* v_opt, const double *x, const double *y,
173 std::string data_dir = par.sval("data_dir");
174 int xDim = par.ival("xDim");
175 int yDim = par.ival("yDim");
176 double dx = par.dval("dx");
177 double dy = par.dval("dy");
178 double dt = par.dval("dt");
179 cufftDoubleComplex *EV_opt = par.cufftDoubleComplexval("EV_opt");
181 double sepMin = Tracker::vortSepAvg(vArray,centre);
182 sepMin = sepMin*(1 + sepMinEpsilon);
183 par.store("Vort_sep",(double)sepMin);
185 // Defining the necessary k vectors for the optical lattice
188 // Additional /2 as a result of lambda/2 period
189 double k_mag = ((2*PI/(sepMin*dx))/2)*(2/sqrt(3));
190 double2* k = (double2*) malloc(sizeof(double2)*3);
191 par.store("kmag",(double)k_mag);
192 k[0].x = k_mag * cos(0*PI/3 + theta_opt);
193 k[0].y = k_mag * sin(0*PI/3 + theta_opt);
194 k[1].x = k_mag * cos(2*PI/3 + theta_opt);
195 k[1].y = k_mag * sin(2*PI/3 + theta_opt);
196 k[2].x = k_mag * cos(4*PI/3 + theta_opt);
197 k[2].y = k_mag * sin(4*PI/3 + theta_opt);
199 double2 *r_opt = (double2*) malloc(sizeof(double2)*xDim);
201 //FileIO::writeOut(buffer,data_dir + "r_opt",r_opt,xDim,0);
202 par.store("k[0].x",(double)k[0].x);
203 par.store("k[0].y",(double)k[0].y);
204 par.store("k[1].x",(double)k[1].x);
205 par.store("k[1].y",(double)k[1].y);
206 par.store("k[2].x",(double)k[2].x);
207 par.store("k[2].y",(double)k[2].y);
209 // sin(theta_opt)*(sepMin);
211 double x_shift = dx*(9+(0.5*xDim-1) - centre->getCoordsD().x);
213 // cos(theta_opt)*(sepMin);
214 double y_shift = dy*(0+(0.5*yDim-1) - centre->getCoordsD().y);
216 printf("Xs=%e\nYs=%e\n",x_shift,y_shift);
218 //#pragma omp parallel for private(j)
219 for ( j=0; j<yDim; ++j ){
220 for ( i=0; i<xDim; ++i ){
221 v_opt[j*xDim + i] = intensity*(
222 pow( ( cos( k[0].x*( x[i] + x_shift ) +
223 k[0].y*( y[j] + y_shift ) ) ), 2) +
224 pow( ( cos( k[1].x*( x[i] + x_shift ) +
225 k[1].y*( y[j] + y_shift ) ) ), 2) +
226 pow( ( cos( k[2].x*( x[i] + x_shift ) +
227 k[2].y*( y[j] + y_shift ) ) ), 2)
229 EV_opt[(j*xDim + i)].x=cos( -(V[(j*xDim + i)] +
230 v_opt[j*xDim + i])*(dt/(2*HBAR)));
231 EV_opt[(j*xDim + i)].y=sin( -(V[(j*xDim + i)] +
232 v_opt[j*xDim + i])*(dt/(2*HBAR)));
236 // Storing changed variables
237 par.store("EV_opt", EV_opt);
239 par.store("V_opt",v_opt);
242 double energy_calc(Grid &par, double2* wfc){
243 double* K = par.dsval("K_gpu");
244 double* V = par.dsval("V_gpu");
246 dim3 grid = par.grid;
247 dim3 threads = par.threads;
249 int xDim = par.ival("xDim");
250 int yDim = par.ival("yDim");
251 int zDim = par.ival("zDim");
252 int gsize = xDim*yDim*zDim;
254 double dx = par.dval("dx");
255 double dy = par.dval("dy");
256 double dz = par.dval("dz");
257 double dg = dx*dy*dz;
261 if (par.ival("dimnum") == 1){
262 plan = par.ival("plan_1d");
264 if (par.ival("dimnum") == 2){
265 plan = par.ival("plan_2d");
267 if (par.ival("dimnum") == 3){
268 plan = par.ival("plan_3d");
271 double renorm_factor = 1.0/pow(gsize,0.5);
273 double2 *wfc_c, *wfc_k;
274 double2 *energy_r, *energy_k;
277 cudaMalloc((void **) &wfc_c, sizeof(double2)*gsize);
278 cudaMalloc((void **) &wfc_k, sizeof(double2)*gsize);
279 cudaMalloc((void **) &energy_r, sizeof(double2)*gsize);
280 cudaMalloc((void **) &energy_k, sizeof(double2)*gsize);
282 cudaMalloc((void **) &energy, sizeof(double)*gsize);
285 vecConjugate<<<grid, threads>>>(wfc, wfc_c);
287 // Momentum-space energy
288 cufftExecZ2Z(plan, wfc, wfc_k, CUFFT_FORWARD);
289 scalarMult<<<grid, threads>>>(wfc_k, renorm_factor, wfc_k);
291 vecMult<<<grid, threads>>>(wfc_k, K, energy_k);
293 cufftExecZ2Z(plan, energy_k, energy_k, CUFFT_INVERSE);
294 scalarMult<<<grid, threads>>>(energy_k, renorm_factor, energy_k);
296 cMult<<<grid, threads>>>(wfc_c, energy_k, energy_k);
298 // Position-space energy
299 vecMult<<<grid, threads>>>(wfc, V, energy_r);
300 cMult<<<grid, threads>>>(wfc_c, energy_r, energy_r);
302 complexAbsSum<<<grid, threads>>>(energy_r, energy_k, energy);
305 energy_cpu = (double *)malloc(sizeof(double)*gsize);
307 cudaMemcpy(energy_cpu, energy, sizeof(double)*gsize,
308 cudaMemcpyDeviceToHost);
311 for (int i = 0; i < gsize; ++i){
312 sum += energy_cpu[i]*dg;