1 #include "../include/evolution.h"
2 #include "../include/vortex_3d.h"
9 // Re-establishing variables from parsed Grid class
10 std::string data_dir = par.sval("data_dir");
11 int dimnum = par.ival("dimnum");
12 double omega = par.dval("omega");
13 double angle_sweep = par.dval("angle_sweep");
14 double gdt = par.dval("gdt");
15 double dt = par.dval("dt");
16 double omegaX = par.dval("omegaX");
17 double omegaY = par.dval("omegaY");
18 double mass = par.dval("mass");
19 double dx = par.dval("dx");
22 double interaction = par.dval("interaction");
23 double laser_power = par.dval("laser_power");
24 double gDenConst = par.dval("gDenConst");
25 double thresh_const = par.dval("thresh_const");
26 double *x = par.dsval("x");
28 double *V = par.dsval("V");
29 double *Phi = par.dsval("Phi");
30 double2 *gpu1dpAx = par.cufftDoubleComplexval("pAx_gpu");
33 double *Phi_gpu = par.dsval("Phi_gpu");
34 bool write_it = par.bval("write_it");
35 bool graph = par.bval("graph");
36 int N = par.ival("atoms");
37 int printSteps = par.ival("printSteps");
38 bool nonlin = par.bval("gpe");
39 bool lz = par.bval("corotating");
40 bool ramp = par.bval("ramp");
41 int ramp_type = par.ival("ramp_type");
42 int xDim = par.ival("xDim");
45 cufftDoubleComplex *wfc = par.cufftDoubleComplexval("wfc");
46 cufftDoubleComplex *gpuWfc = par.cufftDoubleComplexval("wfc_gpu");
47 cufftDoubleComplex *K_gpu =
48 par.cufftDoubleComplexval("K_gpu");
49 cufftDoubleComplex *V_gpu =
50 par.cufftDoubleComplexval("V_gpu");
55 gpu1dpAy = par.cufftDoubleComplexval("pAy_gpu");
56 yDim = par.ival("yDim");
60 gpu1dpAz = par.cufftDoubleComplexval("pAz_gpu");
61 zDim = par.ival("zDim");
64 int gridSize = xDim * yDim * zDim;
66 // getting data from Cuda class
68 cufftHandle plan_1d = par.ival("plan_1d");
69 cufftHandle plan_2d = par.ival("plan_2d");
70 cufftHandle plan_other2d = par.ival("plan_other2d");
71 cufftHandle plan_3d = par.ival("plan_3d");
72 cufftHandle plan_dim2 = par.ival("plan_dim2");
73 cufftHandle plan_dim3 = par.ival("plan_dim3");
74 dim3 threads = par.threads;
77 // Because no two operations are created equally.
78 // Multiplication is faster than divisions.
79 double renorm_factor_nd=1.0/pow(gridSize,0.5);
80 double renorm_factor_1d=1.0/pow(xDim,0.5);
87 printf("Timestep for groundstate solver set as: %E\n",Dt);
91 printf("Timestep for evolution set as: %E\n",Dt);
94 double omega_0=omega*omegaX;
96 // ** ############################################################## ** //
97 // ** HERE BE DRAGONS OF THE MOST DANGEROUS KIND! ** //
98 // ** ############################################################## ** //
100 // 2D VORTEX TRACKING
102 double mask_2d = par.dval("mask_2d");
103 int x0_shift = par.dval("x0_shift");
104 int y0_shift = par.dval("y0_shift");
105 int charge = par.ival("charge");
106 int kill_idx = par.ival("kill_idx");
107 cufftDoubleComplex *EV_opt = par.cufftDoubleComplexval("EV_opt");
108 int kick_it = par.ival("kick_it");
109 double *V_opt = par.dsval("V_opt");
110 // Double buffering and will attempt to thread free and calloc operations to
111 // hide time penalty. Or may not bother.
112 int num_vortices[2] = {0,0};
114 // binary matrix of size xDim*yDim,
115 // 1 for vortex at specified index, 0 otherwise
117 //int* olMaxLocation = (int*) calloc(xDim*yDim,sizeof(int));
119 std::shared_ptr<Vtx::Vortex> central_vortex; //vortex closest to the central position
121 central_vortex.coords.x = -1;
122 central_vortex.coords.y = -1;
123 central_vortex.coordsD.x = -1.;
124 central_vortex.coordsD.y = -1.;
125 central_vortex.wind = 0;
128 // Angle of vortex lattice. Add to optical lattice for alignment.
131 // array of vortex coordinates from vortexLocation 1's
132 //struct Vtx::Vortex *vortCoords = NULL;
135 std::shared_ptr<Vtx::VtxList> vortCoords = std::make_shared<Vtx::VtxList>(7);
136 //std::vector<std::shared_ptr<Vtx::Vortex> vortCoords;
138 //Previous array of vortex coordinates from vortexLocation 1's
139 //struct Vtx::Vortex *vortCoordsP = NULL;
140 //std::vector<struct Vtx::Vortex> vortCoordsP;
141 std::shared_ptr<Vtx::VtxList> vortCoordsP = std::make_shared<Vtx::VtxList>(7);
144 LatticeGraph::Lattice lattice; //Vortex lattice graph.
147 // Assuming triangular lattice at rotatio
150 //std::cout << "numSteps is: " << numSteps << '\n';
151 // Iterating through all of the steps in either g or esteps.
152 for(int i=0; i < numSteps; ++i){
156 //Adjusts omega for the appropriate trap frequency.
159 omega_0 = (double)omega;
162 omega_0 = (double)i / (double)(i+1);
167 omega_0=(double)omega/(double)(numSteps);
170 omega_0 = (double)(i+1) / (double)i;
175 // Print-out at pre-determined rate.
176 // Vortex & wfc analysis performed here also.
177 if(i % printSteps == 0) {
178 // If the unit_test flag is on, we need a special case
179 printf("Step: %d Omega: %lf\n", i, omega_0);
180 cudaMemcpy(wfc, gpuWfc, sizeof(cufftDoubleComplex)*xDim*yDim*zDim,
181 cudaMemcpyDeviceToHost);
183 // Printing out time of iteration
185 time_spent = (double) (end - begin) / CLOCKS_PER_SEC;
186 printf("Time spent: %lf\n", time_spent);
187 std::string fileName = "";
188 //printf("ramp=%d gstate=%d rg=%d \n",
189 // ramp, gstate, ramp | (gstate << 1));
190 switch (ramp | (gstate << 1)) {
191 case 0: //Groundstate solver, constant Omega value.
193 fileName = "wfc_0_const";
196 case 1: //Groundstate solver, ramped Omega value.
198 fileName = "wfc_0_ramp";
201 case 2: //Real-time evolution, constant Omega value.
204 // Note: In the case of 3d, we need to think about
205 // vortex tracking in a new way.
206 // It may be as simple as splitting the problem
207 // into 2D elements and working from there, but
208 // let's look into it when we need it in the
210 std::cout << "commencing 3d vortex tracking" << '\n';
212 // Creating the necessary double* values
213 double* edges = (double *)malloc(sizeof(double)
217 find_edges(par, wfc, edges);
218 double* edges_gpu = par.dsval("edges_gpu");
220 // Now we need to output everything
222 FileIO::writeOutDouble(buffer, data_dir+"Edges",
228 else if (dimnum == 2){
229 vortexLocation = (int *) calloc(xDim * yDim, sizeof(int));
230 num_vortices[0] = Tracker::findVortex(vortexLocation, wfc,
231 mask_2d, xDim, x, i);
232 // If initial step, locate vortices, least-squares to find
233 // exact centre, calculate lattice angle, generate optical
236 if(num_vortices[0] > 0){
237 //Reserve enough space for the vortices
238 //reserve(num_vortices[0]);
239 vortCoords = std::make_shared<Vtx::VtxList>
241 vortCoordsP = std::make_shared<Vtx::VtxList>
244 //Locate the vortex positions to the nearest grid, then
245 //perform a least-squares fit to determine the location
246 //to sub-grid reolution.
247 Tracker::vortPos(vortexLocation,
248 vortCoords->getVortices(), xDim, wfc);
249 Tracker::lsFit(vortCoords->getVortices(),wfc,xDim);
251 //Find the centre-most vortex in the lattice
252 central_vortex = Tracker::vortCentre(vortCoords->
253 getVortices(), xDim);
254 //Determine the Angle formed by the lattice relative to
256 vort_angle = Tracker::vortAngle(vortCoords->
257 getVortices(), central_vortex);
259 //Store the vortex angle in the parameter file
260 par.store("Vort_angle", vort_angle);
262 //Determine average lattice spacing.
263 double sepAvg = Tracker::vortSepAvg(vortCoords->
264 getVortices(), central_vortex);
266 par.store("Central_vort_x",
267 (double) central_vortex->getCoords().x);
268 par.store("Central_vort_y",
269 (double) central_vortex->getCoords().y);
270 par.store("Central_vort_winding",
271 (double) central_vortex->getWinding());
272 par.store("Num_vort", (double) vortCoords->
273 getVortices().size());
275 //Setup the optical lattice to match the spacing and
276 // angle+angle_sweep of the vortex lattice.
277 // Amplitude matched by setting laser_power
279 optLatSetup(central_vortex, V,
280 vortCoords->getVortices(),
281 vort_angle + PI * angle_sweep / 180.0,
282 laser_power * HBAR * sqrt(omegaX * omegaY),
287 // If kick_it param is 2, perform a single kick of the
288 // optical lattice for the first timestep only.
289 // This is performed by loading the
290 // EV_opt exp(V + V_opt) array into GPU memory
291 // for the potential.
293 printf("Kicked it 1\n");
294 cudaMemcpy(V_gpu, EV_opt,
295 sizeof(cufftDoubleComplex) * xDim * yDim,
296 cudaMemcpyHostToDevice);
298 // Write out the newly specified potential
299 // and exp potential to files
300 FileIO::writeOutDouble(buffer, data_dir + "V_opt_1",
301 V_opt, xDim * yDim, 0);
302 FileIO::writeOut(buffer, data_dir + "EV_opt_1", EV_opt,
305 //Store necessary parameters to Params.dat file.
306 FileIO::writeOutParam(buffer, par,
307 data_dir + "Params.dat");
309 //If i!=0 and the number of vortices changes
310 // if num_vortices[1] < num_vortices[0] ... Fewer vortices
312 if (num_vortices[0] > 0){
313 Tracker::vortPos(vortexLocation,
314 vortCoords->getVortices(), xDim, wfc);
315 Tracker::lsFit(vortCoords->getVortices(),
317 Tracker::vortArrange(vortCoords->getVortices(),
318 vortCoordsP->getVortices());
319 FileIO::writeOutInt(buffer, data_dir + "vLoc_",
320 vortexLocation, xDim * yDim, i);
324 // The following will eventually be modified and moved into
325 // a new library that works closely wy0_shiftUE. Used to
326 // also defined for vortex elimination using graph positions
328 if (graph && num_vortices[0] > 0) {
329 for (int ii = 0; ii < vortCoords->getVortices().size();
331 std::shared_ptr<LatticeGraph::Node>
332 n(new LatticeGraph::Node(
333 *vortCoords->getVortices().at(ii).get()));
334 lattice.addVortex(std::move(n));
336 unsigned int *uids = (unsigned int *) malloc(
337 sizeof(unsigned int) *
338 lattice.getVortices().size());
339 for (size_t a=0; a < lattice.getVortices().size(); ++a){
340 uids[a] = lattice.getVortexIdx(a)->getUid();
343 //Lambda for vortex annihilation/creation.
344 auto killIt=[&](int idx, int winding,
345 double delta_x, double delta_y) {
346 if (abs(delta_x) > 0 || abs(delta_y) > 0){
347 // Killing initial vortex and then
348 // imprinting new one
349 WFC::phaseWinding(Phi, 1, x,y, dx,dy,
350 lattice.getVortexUid(idx)->
351 getData().getCoordsD().x,
352 lattice.getVortexUid(idx)->
353 getData().getCoordsD().y,
356 cudaMemcpy(Phi_gpu, Phi,
357 sizeof(double) * xDim * yDim,
358 cudaMemcpyHostToDevice);
359 cMultPhi <<<grid, threads>>>(gpuWfc,Phi_gpu,
362 // Imprinting new one
364 WFC::phaseWinding(Phi, cval, x,y, dx,dy,
365 lattice.getVortexUid(idx)->
366 getData().getCoordsD().x + delta_x,
367 lattice.getVortexUid(idx)->
368 getData().getCoordsD().y + delta_y,
371 // Sending to device for imprinting
372 cudaMemcpy(Phi_gpu, Phi,
373 sizeof(double) * xDim * yDim,
374 cudaMemcpyHostToDevice);
375 cMultPhi <<<grid, threads>>>(gpuWfc,Phi_gpu,
379 int cval = -(winding-1);
380 WFC::phaseWinding(Phi, cval, x,y,dx,dy,
381 lattice.getVortexUid(idx)->
382 getData().getCoordsD().x,
383 lattice.getVortexUid(idx)->
384 getData().getCoordsD().y,
386 cudaMemcpy(Phi_gpu, Phi,
387 sizeof(double) * xDim * yDim,
388 cudaMemcpyHostToDevice);
389 cMultPhi <<<grid, threads>>>(gpuWfc,Phi_gpu,
394 killIt(kill_idx, charge, x0_shift, y0_shift);
397 lattice.createEdges(1.5 * 2e-5 / dx);
399 //Assumes that vortices
400 //only form edges when the distance is upto 1.5*2e-5.
401 //Replace with delaunay triangulation determined edges
402 //for better computational scaling (and sanity)
404 //O(n^2) -> terrible implementation. It works for now.
405 //Generates the adjacency matrix from the graph, and
406 //outputs to a Mathematica compatible format.
407 adjMat = (double *)calloc(lattice.getVortices().size() *
408 lattice.getVortices().size(),
410 lattice.genAdjMat(adjMat);
411 FileIO::writeOutAdjMat(buffer, data_dir + "graph",
413 lattice.getVortices().size(), i);
415 //Free and clear all memory blocks
418 lattice.getVortices().clear();
419 lattice.getEdges().clear();
423 //Write out the vortex locations
424 FileIO::writeOutVortex(buffer, data_dir + "vort_arr",
425 vortCoords->getVortices(), i);
426 printf("Located %d vortices\n",
427 vortCoords->getVortices().size());
429 //Free memory block for now.
430 free(vortexLocation);
432 //Current values become previous values.
433 num_vortices[1] = num_vortices[0];
434 vortCoords->getVortices().swap(vortCoordsP->getVortices());
435 vortCoords->getVortices().clear();
443 fileName = "wfc_ev_ramp";
452 //std::cout << "writing" << '\n';
454 FileIO::writeOut(buffer, data_dir + fileName,
455 wfc, xDim*yDim*zDim, i);
457 //std::cout << "written" << '\n';
458 if (par.bval("energy_calc")){
459 double energy = energy_calc(par,gpuWfc);
460 // Now opening and closing file for writing.
461 std::ofstream energy_out;
462 std::string mode = "energyi.dat";
467 energy_out.open(data_dir + mode);
470 energy_out.open(data_dir + mode, std::ios::out |
473 energy_out << energy << '\n';
475 printf("Energy[t@%d]=%E\n",i,energy);
476 par.store("energy", energy);
481 // No longer writing out
483 // ** ########################################################## ** //
484 // ** More F'n' Dragons! ** //
485 // ** ########################################################## ** //
489 if(par.bval("V_time")){
490 EqnNode_gpu* V_eqn = par.astval("V");
491 int e_num = par.ival("V_num");
492 cMultDensity_ast<<<grid,threads>>>(V_eqn,gpuWfc,gpuWfc,
493 dx, dy, dz, time, e_num, 0.5*Dt,
494 mass,gstate,interaction*gDenConst);
497 cMultDensity<<<grid,threads>>>(V_gpu,gpuWfc,gpuWfc,0.5*Dt,
498 mass,gstate,interaction*gDenConst);
502 if(par.bval("V_time")){
503 EqnNode_gpu* V_eqn = par.astval("V");
504 int e_num = par.ival("V_num");
505 ast_op_mult<<<grid,threads>>>(gpuWfc,gpuWfc, V_eqn,
506 dx, dy, dz, time, e_num, gstate+1, Dt);
509 cMult<<<grid,threads>>>(V_gpu,gpuWfc,gpuWfc);
514 result = cufftExecZ2Z(plan_3d,gpuWfc,gpuWfc,CUFFT_FORWARD);
517 scalarMult<<<grid,threads>>>(gpuWfc,renorm_factor_nd,gpuWfc);
518 if (par.bval("K_time")){
519 EqnNode_gpu* k_eqn = par.astval("k");
520 int e_num = par.ival("k_num");
521 ast_op_mult<<<grid,threads>>>(gpuWfc,gpuWfc, k_eqn,
522 dx, dy, dz, time, e_num, gstate+1, Dt);
525 cMult<<<grid,threads>>>(K_gpu,gpuWfc,gpuWfc);
527 result = cufftExecZ2Z(plan_3d,gpuWfc,gpuWfc,CUFFT_INVERSE);
530 scalarMult<<<grid,threads>>>(gpuWfc,renorm_factor_nd,gpuWfc);
534 if(par.bval("V_time")){
535 EqnNode_gpu* V_eqn = par.astval("V");
536 int e_num = par.ival("V_num");
537 cMultDensity_ast<<<grid,threads>>>(V_eqn,gpuWfc,gpuWfc,
538 dx, dy, dz, time, e_num, 0.5*Dt,
539 mass,gstate,interaction*gDenConst);
542 cMultDensity<<<grid,threads>>>(V_gpu,gpuWfc,gpuWfc,0.5*Dt,
543 mass,gstate,interaction*gDenConst);
547 if(par.bval("V_time")){
548 EqnNode_gpu* V_eqn = par.astval("V");
549 int e_num = par.ival("V_num");
550 ast_op_mult<<<grid,threads>>>(gpuWfc,gpuWfc, V_eqn,
551 dx, dy, dz, time, e_num, gstate+1, Dt);
554 cMult<<<grid,threads>>>(V_gpu,gpuWfc,gpuWfc);
559 // Angular momentum pAy-pAx (if engaged) //
561 // Multiplying by ramping factor if necessary
562 // Note: using scalarPow to do the scaling inside of the exp
564 scalarPow<<<grid,threads>>>((cufftDoubleComplex*) gpu1dpAy,
566 (cufftDoubleComplex*) gpu1dpAy);
568 scalarPow<<<grid,threads>>>((cufftDoubleComplex*) gpu1dpAx,
570 (cufftDoubleComplex*) gpu1dpAx);
573 scalarPow<<<grid,threads>>>((cufftDoubleComplex*) gpu1dpAz,
575 (cufftDoubleComplex*) gpu1dpAz);
578 int size = xDim*zDim;
581 case 0: //Groundstate solver, even step
583 // 1d forward / mult by Az
584 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,
586 scalarMult<<<grid,threads>>>(gpuWfc,
587 renorm_factor_1d,gpuWfc);
588 if(par.bval("Ay_time")){
589 EqnNode_gpu* Ay_eqn = par.astval("Ay");
590 int e_num = par.ival("Ay_num");
591 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
592 Ay_eqn, dx, dy, dz, time, e_num);
595 cMult<<<grid,threads>>>(gpuWfc,
596 (cufftDoubleComplex*) gpu1dpAy, gpuWfc);
598 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,
600 scalarMult<<<grid,threads>>>(gpuWfc,
601 renorm_factor_1d, gpuWfc);
603 // loop to multiply by Ay
604 for (int i = 0; i < yDim; i++){
605 //size = xDim * zDim;
606 result = cufftExecZ2Z(plan_dim2,
608 &gpuWfc[i*size],CUFFT_FORWARD);
611 scalarMult<<<grid,threads>>>(gpuWfc,
612 renorm_factor_1d,gpuWfc);
613 if(par.bval("Ax_time")){
614 EqnNode_gpu* Ax_eqn = par.astval("Ax");
615 int e_num = par.ival("Ax_num");
616 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
617 Ax_eqn, dx, dy, dz, time, e_num);
620 cMult<<<grid,threads>>>(gpuWfc,
621 (cufftDoubleComplex*) gpu1dpAx, gpuWfc);
624 for (int i = 0; i < yDim; i++){
625 //size = xDim * zDim;
626 result = cufftExecZ2Z(plan_dim2,
628 &gpuWfc[i*size],CUFFT_INVERSE);
630 scalarMult<<<grid,threads>>>(gpuWfc,
631 renorm_factor_1d,gpuWfc);
634 result = cufftExecZ2Z(plan_dim3,gpuWfc,gpuWfc,
636 scalarMult<<<grid,threads>>>(gpuWfc,
637 renorm_factor_1d,gpuWfc);
638 if(par.bval("Az_time")){
639 EqnNode_gpu* Az_eqn = par.astval("Az");
640 int e_num = par.ival("Az_num");
641 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
642 Az_eqn, dx, dy, dz, time, e_num);
645 cMult<<<grid,threads>>>(gpuWfc,
646 (cufftDoubleComplex*) gpu1dpAz, gpuWfc);
649 result = cufftExecZ2Z(plan_dim3,gpuWfc,gpuWfc,
651 scalarMult<<<grid,threads>>>(gpuWfc,
652 renorm_factor_1d, gpuWfc);
656 case 1: //Groundstate solver, odd step
659 result = cufftExecZ2Z(plan_dim3,gpuWfc,gpuWfc,
661 scalarMult<<<grid,threads>>>(gpuWfc,
662 renorm_factor_1d,gpuWfc);
663 if(par.bval("Az_time")){
664 EqnNode_gpu* Az_eqn = par.astval("Az");
665 int e_num = par.ival("Az_num");
666 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
667 Az_eqn, dx, dy, dz, time, e_num);
670 cMult<<<grid,threads>>>(gpuWfc,
671 (cufftDoubleComplex*) gpu1dpAz, gpuWfc);
674 result = cufftExecZ2Z(plan_dim3,gpuWfc,gpuWfc,
676 scalarMult<<<grid,threads>>>(gpuWfc,
677 renorm_factor_1d, gpuWfc);
679 // loop to multiply by Ay
680 for (int i = 0; i < yDim; i++){
681 //size = xDim * zDim;
682 result = cufftExecZ2Z(plan_dim2,
684 &gpuWfc[i*size],CUFFT_FORWARD);
687 scalarMult<<<grid,threads>>>(gpuWfc,
688 renorm_factor_1d,gpuWfc);
689 if(par.bval("Ax_time")){
690 EqnNode_gpu* Ax_eqn = par.astval("Ax");
691 int e_num = par.ival("Ax_num");
692 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
693 Ax_eqn, dx, dy, dz, time, e_num);
696 cMult<<<grid,threads>>>(gpuWfc,
697 (cufftDoubleComplex*) gpu1dpAx, gpuWfc);
700 for (int i = 0; i < yDim; i++){
701 //size = xDim * zDim;
702 result = cufftExecZ2Z(plan_dim2,
704 &gpuWfc[i*size],CUFFT_INVERSE);
706 scalarMult<<<grid,threads>>>(gpuWfc,
707 renorm_factor_1d,gpuWfc);
709 // 1d forward / mult by Az
710 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,
712 scalarMult<<<grid,threads>>>(gpuWfc,
713 renorm_factor_1d,gpuWfc);
714 if(par.bval("Ay_time")){
715 EqnNode_gpu* Ay_eqn = par.astval("Ay");
716 int e_num = par.ival("Ay_num");
717 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
718 Ay_eqn, dx, dy, dz, time, e_num);
721 cMult<<<grid,threads>>>(gpuWfc,
722 (cufftDoubleComplex*) gpu1dpAy, gpuWfc);
725 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,
727 scalarMult<<<grid,threads>>>(gpuWfc,
728 renorm_factor_1d, gpuWfc);
733 else if (dimnum == 2){
735 case 0: //Groundstate solver, even step
737 // 1d forward / mult by Ay
738 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,
740 scalarMult<<<grid,threads>>>(gpuWfc,
741 renorm_factor_1d,gpuWfc);
742 if(par.bval("Ay_time")){
743 EqnNode_gpu* Ay_eqn = par.astval("Ay");
744 int e_num = par.ival("Ay_num");
745 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
746 Ay_eqn, dx, dy, dz, time, e_num);
749 cMult<<<grid,threads>>>(gpuWfc,
750 (cufftDoubleComplex*) gpu1dpAy, gpuWfc);
752 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,
754 scalarMult<<<grid,threads>>>(gpuWfc,
755 renorm_factor_1d, gpuWfc);
759 result = cufftExecZ2Z(plan_other2d,gpuWfc,gpuWfc,
761 scalarMult<<<grid,threads>>>(gpuWfc,
762 renorm_factor_1d,gpuWfc);
763 if(par.bval("Ax_time")){
764 EqnNode_gpu* Ax_eqn = par.astval("Ax");
765 int e_num = par.ival("Ax_num");
766 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
767 Ax_eqn, dx, dy, dz, time, e_num);
770 cMult<<<grid,threads>>>(gpuWfc,
771 (cufftDoubleComplex*) gpu1dpAx, gpuWfc);
774 result = cufftExecZ2Z(plan_other2d,gpuWfc,gpuWfc,
776 scalarMult<<<grid,threads>>>(gpuWfc,
777 renorm_factor_1d, gpuWfc);
780 case 1: //Groundstate solver, odd step
782 result = cufftExecZ2Z(plan_other2d,gpuWfc,gpuWfc,
784 scalarMult<<<grid,threads>>>(gpuWfc,
785 renorm_factor_1d,gpuWfc);
786 if(par.bval("Ax_time")){
787 EqnNode_gpu* Ax_eqn = par.astval("Ax");
788 int e_num = par.ival("Ax_num");
789 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
790 Ax_eqn, dx, dy, dz, time, e_num);
793 cMult<<<grid,threads>>>(gpuWfc,
794 (cufftDoubleComplex*) gpu1dpAx, gpuWfc);
797 result = cufftExecZ2Z(plan_other2d,gpuWfc,gpuWfc,
799 scalarMult<<<grid,threads>>>(gpuWfc,
800 renorm_factor_1d, gpuWfc);
803 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,
805 scalarMult<<<grid,threads>>>(gpuWfc,
806 renorm_factor_1d,gpuWfc);
807 if(par.bval("Ay_time")){
808 EqnNode_gpu* Ay_eqn = par.astval("Ay");
809 int e_num = par.ival("Ay_num");
810 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
811 Ay_eqn, dx, dy, dz, time, e_num);
814 cMult<<<grid,threads>>>(gpuWfc,
815 (cufftDoubleComplex*) gpu1dpAy, gpuWfc);
818 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,
820 scalarMult<<<grid,threads>>>(gpuWfc,
821 renorm_factor_1d, gpuWfc);
826 else if (dimnum == 1){
827 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,CUFFT_FORWARD);
828 scalarMult<<<grid,threads>>>(gpuWfc,renorm_factor_1d,gpuWfc);
829 if(par.bval("Ax_time")){
830 EqnNode_gpu* Ax_eqn = par.astval("Ax");
831 int e_num = par.ival("Ax_num");
832 ast_cmult<<<grid,threads>>>(gpuWfc, gpuWfc,
833 Ax_eqn, dx, dy, dz, time, e_num);
836 cMult<<<grid,threads>>>(gpuWfc,
837 (cufftDoubleComplex*) gpu1dpAx, gpuWfc);
840 result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, CUFFT_INVERSE);
841 scalarMult<<<grid,threads>>>(gpuWfc, renorm_factor_1d, gpuWfc);
850 par.store("wfc", wfc);
851 par.store("wfc_gpu", gpuWfc);