GPUE  v1.0
GPU Gross-Pitaevskii Equation numerical solver for Bose-Einstein condensates
split_op.cu
Go to the documentation of this file.
1 
2 #include "../include/split_op.h"
3 #include "../include/kernels.h"
4 #include "../include/constants.h"
5 #include "../include/fileIO.h"
6 #include "../include/tracker.h"
7 #include "../include/minions.h"
8 #include "../include/parser.h"
9 
10 #include "../include/lattice.h"
11 #include "../include/node.h"
12 #include "../include/edge.h"
13 #include "../include/manip.h"
14 #include "../include/vort.h"
15 #include <string>
16 #include <iostream>
17 
18 
19 //Declare the static uid values to avoid conflicts. Upper limit of 2^32-1 different instances. Should be reasonable in
20 // any simulation of realistic timescales.
21 unsigned int LatticeGraph::Edge::suid = 0;
22 unsigned int LatticeGraph::Node::suid = 0;
23 //std::size_t Vtx::Vortex::suid = 0;
24 
25 char buffer[100]; //Buffer for printing out. Need to replace by a better write-out procedure. Consider binary or HDF.
26 int verbose; //Print more info. Not curently implemented.
27 int device; //GPU ID choice.
28 int kick_it; //Kicking mode: 0 = off, 1 = multiple, 2 = single
29 int graph=0; //Generate graph from vortex lattice.
30 double gammaY; //Aspect ratio of trapping geometry.
31 double omega; //Rotation rate of condensate
32 double timeTotal;
33 double angle_sweep; //Rotation angle of condensate relative to x-axis
34 double x0_shift, y0_shift; //Optical lattice shift parameters.
35 double Rxy; //Condensate scaling factor.
36 double a0x, a0y; //Harmonic oscillator length in x and y directions
37 double sepMinEpsilon=0.0; //Minimum separation for epsilon.
38 int kill_idx = -1;;
39 /*
40  * Checks CUDA routines have exitted correctly.
41  */
42 int isError(int result, char* c){
43  if(result!=0){
44  printf("Error has occurred for method %s with return type %d\n",
45  c,result);
46  exit(result);
47  }
48  return result;
49 }
50 /*
51  * Used to perform parallel summation on WFC for normalisation.
52  */
53 void parSum(double* gpuWfc, double* gpuParSum, Grid &par){
54  // May need to add double l
55  int dimnum = par.ival("dimnum");
56  double dx = par.dval("dx");
57  double dy = par.dval("dy");
58  double dz = par.dval("dz");
59  dim3 threads = par.threads;
60  int xDim = par.ival("xDim");
61  int yDim = par.ival("yDim");
62  int zDim = par.ival("zDim");
63  dim3 grid_tmp(xDim, 1, 1);
64  int gsize = xDim;
65  double dg = dx;
66 
67  // Setting option for 3d
68  if (dimnum > 1){
69  grid_tmp.x *= yDim;
70  gsize *= yDim;
71  dg *= dy;
72  }
73  if (dimnum > 2){
74  grid_tmp.x *= zDim;
75  gsize *= zDim;
76  dg *= dz;
77  }
78  dim3 block(grid_tmp.x/threads.x, 1, 1);
79  dim3 thread_tmp = threads;
80  int pass = 0;
81 
82  set_eq<<<par.grid, par.threads>>>(gpuWfc, gpuParSum);
83 
84  while((double)grid_tmp.x/threads.x > 1.0){
85  multipass<<<block,thread_tmp,thread_tmp.x*sizeof(double)>>>(
86  &gpuParSum[0],&gpuParSum[0]);
87  grid_tmp.x /= threads.x;
88  block = (int) ceil((double)grid_tmp.x/threads.x);
89  pass++;
90  //std::cout << grid_tmp.x << '\n';
91  }
92  thread_tmp = grid_tmp.x;
93  multipass<<<1,thread_tmp,thread_tmp.x*sizeof(double2)>>>(&gpuParSum[0],
94  &gpuParSum[0]);
95 }
96 
97 /*
98  * Used to perform parallel summation on WFC for normalisation.
99  */
100 void parSum(double2* gpuWfc, Grid &par){
101  // May need to add double l
102  int dimnum = par.ival("dimnum");
103  double dx = par.dval("dx");
104  double dy = par.dval("dy");
105  double dz = par.dval("dz");
106  dim3 threads = par.threads;
107  int xDim = par.ival("xDim");
108  int yDim = par.ival("yDim");
109  int zDim = par.ival("zDim");
110  dim3 grid_tmp(xDim, 1, 1);
111  int gsize = xDim;
112  double dg = dx;
113 
114  // Setting option for 3d
115  if (dimnum > 1){
116  grid_tmp.x *= yDim;
117  gsize *= yDim;
118  dg *= dy;
119  }
120  if (dimnum > 2){
121  grid_tmp.x *= zDim;
122  gsize *= zDim;
123  dg *= dz;
124  }
125  dim3 block(grid_tmp.x/threads.x, 1, 1);
126  dim3 thread_tmp = threads;
127  int pass = 0;
128 
129  double *density;
130  cudaMalloc((void**) &density, sizeof(double)*gsize);
131 
132  complexMagnitudeSquared<<<par.grid, par.threads>>>(gpuWfc, density);
133 
134 /*
135  std::cout << "grid / threads = " << '\t'
136  << (double)grid_tmp.x/threads.x << '\n'
137  << "grid.x is: " << grid_tmp.x << '\t'
138  << "threads.x are: " << threads.x << '\n';
139 */
140  while((double)grid_tmp.x/threads.x > 1.0){
141  multipass<<<block,threads,threads.x*sizeof(double)>>>(&density[0],
142  &density[0]);
143  grid_tmp.x /= threads.x;
144  block = (int) ceil((double)grid_tmp.x/threads.x);
145  pass++;
146  //std::cout << pass << '\t' << grid_tmp.x << '\n';
147  }
148  thread_tmp = grid_tmp.x;
149  multipass<<<1,thread_tmp,thread_tmp.x*sizeof(double)>>>(&density[0],
150  &density[0]);
151 
152 /*
153  // Writing out in the parSum Function (not recommended, for debugging)
154  double *sum;
155  sum = (double *) malloc(sizeof(double)*gsize);
156  cudaMemcpy(sum,density,sizeof(double)*gsize,
157  cudaMemcpyDeviceToHost);
158  std::cout << (sum[0]) << '\n';
159 */
160  scalarDiv_wfcNorm<<<par.grid,par.threads>>>(gpuWfc, dg, density, gpuWfc);
161 
162  cudaFree(density);
163 }
164 
165 /**
166 ** Matches the optical lattice to the vortex lattice.
167 ** Moire super-lattice project.
168 **/
169 void optLatSetup(std::shared_ptr<Vtx::Vortex> centre, const double* V,
170  std::vector<std::shared_ptr<Vtx::Vortex>> &vArray, double theta_opt,
171  double intensity, double* v_opt, const double *x, const double *y,
172  Grid &par){
173  std::string data_dir = par.sval("data_dir");
174  int xDim = par.ival("xDim");
175  int yDim = par.ival("yDim");
176  double dx = par.dval("dx");
177  double dy = par.dval("dy");
178  double dt = par.dval("dt");
179  cufftDoubleComplex *EV_opt = par.cufftDoubleComplexval("EV_opt");
180  int i,j;
181  double sepMin = Tracker::vortSepAvg(vArray,centre);
182  sepMin = sepMin*(1 + sepMinEpsilon);
183  par.store("Vort_sep",(double)sepMin);
184 
185  // Defining the necessary k vectors for the optical lattice
186 
187 
188  // Additional /2 as a result of lambda/2 period
189  double k_mag = ((2*PI/(sepMin*dx))/2)*(2/sqrt(3));
190  double2* k = (double2*) malloc(sizeof(double2)*3);
191  par.store("kmag",(double)k_mag);
192  k[0].x = k_mag * cos(0*PI/3 + theta_opt);
193  k[0].y = k_mag * sin(0*PI/3 + theta_opt);
194  k[1].x = k_mag * cos(2*PI/3 + theta_opt);
195  k[1].y = k_mag * sin(2*PI/3 + theta_opt);
196  k[2].x = k_mag * cos(4*PI/3 + theta_opt);
197  k[2].y = k_mag * sin(4*PI/3 + theta_opt);
198 
199  double2 *r_opt = (double2*) malloc(sizeof(double2)*xDim);
200 
201  //FileIO::writeOut(buffer,data_dir + "r_opt",r_opt,xDim,0);
202  par.store("k[0].x",(double)k[0].x);
203  par.store("k[0].y",(double)k[0].y);
204  par.store("k[1].x",(double)k[1].x);
205  par.store("k[1].y",(double)k[1].y);
206  par.store("k[2].x",(double)k[2].x);
207  par.store("k[2].y",(double)k[2].y);
208 
209  // sin(theta_opt)*(sepMin);
210 
211  double x_shift = dx*(9+(0.5*xDim-1) - centre->getCoordsD().x);
212 
213  // cos(theta_opt)*(sepMin);
214  double y_shift = dy*(0+(0.5*yDim-1) - centre->getCoordsD().y);
215 
216  printf("Xs=%e\nYs=%e\n",x_shift,y_shift);
217 
218  //#pragma omp parallel for private(j)
219  for ( j=0; j<yDim; ++j ){
220  for ( i=0; i<xDim; ++i ){
221  v_opt[j*xDim + i] = intensity*(
222  pow( ( cos( k[0].x*( x[i] + x_shift ) +
223  k[0].y*( y[j] + y_shift ) ) ), 2) +
224  pow( ( cos( k[1].x*( x[i] + x_shift ) +
225  k[1].y*( y[j] + y_shift ) ) ), 2) +
226  pow( ( cos( k[2].x*( x[i] + x_shift ) +
227  k[2].y*( y[j] + y_shift ) ) ), 2)
228  );
229  EV_opt[(j*xDim + i)].x=cos( -(V[(j*xDim + i)] +
230  v_opt[j*xDim + i])*(dt/(2*HBAR)));
231  EV_opt[(j*xDim + i)].y=sin( -(V[(j*xDim + i)] +
232  v_opt[j*xDim + i])*(dt/(2*HBAR)));
233  }
234  }
235 
236  // Storing changed variables
237  par.store("EV_opt", EV_opt);
238  par.store("V", V);
239  par.store("V_opt",v_opt);
240 }
241 
242 double energy_calc(Grid &par, double2* wfc){
243  double* K = par.dsval("K_gpu");
244  double* V = par.dsval("V_gpu");
245 
246  dim3 grid = par.grid;
247  dim3 threads = par.threads;
248 
249  int xDim = par.ival("xDim");
250  int yDim = par.ival("yDim");
251  int zDim = par.ival("zDim");
252  int gsize = xDim*yDim*zDim;
253 
254  double dx = par.dval("dx");
255  double dy = par.dval("dy");
256  double dz = par.dval("dz");
257  double dg = dx*dy*dz;
258 
259  cufftHandle plan;
260 
261  if (par.ival("dimnum") == 1){
262  plan = par.ival("plan_1d");
263  }
264  if (par.ival("dimnum") == 2){
265  plan = par.ival("plan_2d");
266  }
267  if (par.ival("dimnum") == 3){
268  plan = par.ival("plan_3d");
269  }
270 
271  double renorm_factor = 1.0/pow(gsize,0.5);
272 
273  double2 *wfc_c, *wfc_k;
274  double2 *energy_r, *energy_k;
275  double *energy;
276 
277  cudaMalloc((void **) &wfc_c, sizeof(double2)*gsize);
278  cudaMalloc((void **) &wfc_k, sizeof(double2)*gsize);
279  cudaMalloc((void **) &energy_r, sizeof(double2)*gsize);
280  cudaMalloc((void **) &energy_k, sizeof(double2)*gsize);
281 
282  cudaMalloc((void **) &energy, sizeof(double)*gsize);
283 
284  // Finding conjugate
285  vecConjugate<<<grid, threads>>>(wfc, wfc_c);
286 
287  // Momentum-space energy
288  cufftExecZ2Z(plan, wfc, wfc_k, CUFFT_FORWARD);
289  scalarMult<<<grid, threads>>>(wfc_k, renorm_factor, wfc_k);
290 
291  vecMult<<<grid, threads>>>(wfc_k, K, energy_k);
292 
293  cufftExecZ2Z(plan, energy_k, energy_k, CUFFT_INVERSE);
294  scalarMult<<<grid, threads>>>(energy_k, renorm_factor, energy_k);
295 
296  cMult<<<grid, threads>>>(wfc_c, energy_k, energy_k);
297 
298  // Position-space energy
299  vecMult<<<grid, threads>>>(wfc, V, energy_r);
300  cMult<<<grid, threads>>>(wfc_c, energy_r, energy_r);
301 
302  complexAbsSum<<<grid, threads>>>(energy_r, energy_k, energy);
303 
304  double *energy_cpu;
305  energy_cpu = (double *)malloc(sizeof(double)*gsize);
306 
307  cudaMemcpy(energy_cpu, energy, sizeof(double)*gsize,
308  cudaMemcpyDeviceToHost);
309 
310  double sum = 0;
311  for (int i = 0; i < gsize; ++i){
312  sum += energy_cpu[i]*dg;
313  }
314 
315  free(energy_cpu);
316  cudaFree(energy_r);
317  cudaFree(energy_k);
318  cudaFree(energy);
319  cudaFree(wfc_c);
320  cudaFree(wfc_k);
321 
322  return sum;
323 }