1 #include "../include/operators.h"
2 #include "../include/kernels.h"
3 #include "../include/dynamic.h"
17 // Function to take the curl of Ax and Ay in 2d
18 // note: This is on the cpu, there should be a GPU version too.
19 double *curl2d(Grid &par, double *Ax, double *Ay){
20 int xDim = par.ival("xDim");
21 int yDim = par.ival("yDim");
23 int size = sizeof(double) * xDim * yDim;
25 curl = (double *)malloc(size);
29 // Note: To take the curl, we need a change in x and y to create a dx or dy
30 // For this reason, we have added yDim to y and 1 to x
31 for (int i = 0; i < xDim; i++){
32 for (int j = 0; j < yDim-1; j++){
34 curl[index] = (Ay[index] - Ay[index+xDim])
35 - (Ax[index] - Ax[index+1]);
42 double *curl3d_r(Grid &par, double *Bx, double *By){
43 int xDim = par.ival("xDim");
44 int yDim = par.ival("yDim");
45 int zDim = par.ival("zDim");
47 int size = sizeof(double) * xDim * yDim * zDim;
49 curl = (double *)malloc(size);
51 for (int i = 0; i < xDim*yDim*zDim; ++i){
52 curl[i] = sqrt(Bx[i]*Bx[i] + By[i] * By[i]);
58 double *curl3d_phi(Grid &par, double *Bx, double *By){
59 int xDim = par.ival("xDim");
60 int yDim = par.ival("yDim");
61 int zDim = par.ival("zDim");
63 int size = sizeof(double) * xDim * yDim * zDim;
65 curl = (double *)malloc(size);
67 for (int i = 0; i < xDim*yDim*zDim; ++i){
68 curl[i] = atan2(By[i], Bx[i]);
75 // Function to take the curl of Ax and Ay in 2d
76 // note: This is on the cpu, there should be a GPU version too.
78 double *curl3d_x(Grid &par, double *Ax, double *Ay, double *Az){
79 int xDim = par.ival("xDim");
80 int yDim = par.ival("yDim");
81 int zDim = par.ival("zDim");
83 int size = sizeof(double) * xDim * yDim * zDim;
85 curl = (double *)malloc(size);
89 // Note: To take the curl, we need a change in x and y to create a dx or dy
90 // For this reason, we have added yDim to y and 1 to x
91 for (int i = 0; i < xDim; i++){
92 for (int j = 0; j < yDim-1; j++){
93 for (int k = 0; k < zDim - 1; k++){
94 index = k + zDim * j + zDim * yDim * i;
95 curl[index] = (Az[index] - Az[index + xDim])
96 -(Ay[index] - Ay[index + xDim*yDim]);
104 // Function to take the curl of Ax and Ay in 2d
105 // note: This is on the cpu, there should be a GPU version too.
107 double *curl3d_y(Grid &par, double *Ax, double *Ay, double *Az){
108 int xDim = par.ival("xDim");
109 int yDim = par.ival("yDim");
110 int zDim = par.ival("zDim");
112 int size = sizeof(double) * xDim * yDim * zDim;
114 curl = (double *)malloc(size);
118 // Note: To take the curl, we need a change in x and y to create a dx or dy
119 // For this reason, we have added yDim to y and 1 to x
120 for (int i = 0; i < xDim-1; i++){
121 for (int j = 0; j < yDim; j++){
122 for (int k = 0; k < zDim - 1; k++){
123 index = k + zDim * j + zDim * yDim * i;
124 curl[index] = -(Az[index] - Az[index + 1])
125 -(Ax[index] - Ax[index + xDim*yDim]);
133 // Function to take the curl of Ax and Ay in 2d
134 // note: This is on the cpu, there should be a GPU version too.
136 double *curl3d_z(Grid &par, double *Ax, double *Ay, double *Az){
137 int xDim = par.ival("xDim");
138 int yDim = par.ival("yDim");
139 int zDim = par.ival("zDim");
141 int size = sizeof(double) * xDim * yDim * zDim;
143 curl = (double *)malloc(size);
147 // Note: To take the curl, we need a change in x and y to create a dx or dy
148 // For this reason, we have added yDim to y and 1 to x
149 for (int i = 0; i < xDim - 1; i++){
150 for (int j = 0; j < yDim-1; j++){
151 for (int k = 0; k < zDim; k++){
152 index = k + zDim * j + zDim * yDim * i;
153 curl[index] = (Ay[index] - Ay[index + 1])
154 -(Ax[index] - Ax[index + xDim]);
162 // Function to check whether a file exists
163 std::string filecheck(std::string filename){
165 struct stat buffer = {0};
166 if (stat(filename.c_str(), &buffer) == -1){
167 std::cout << "File " << filename << " does not exist!" << '\n';
168 std::cout << "Please select a new file:" << '\n';
169 std::cin >> filename;
170 filename = filecheck(filename);
176 // Function to read Ax from file.
177 // Note that this comes with a special method in init...
178 void file_A(std::string filename, double *A, double omega){
179 std::fstream infile(filename, std::ios_base::in);
183 while (infile >> inval){
184 A[count] = omega*inval;
189 /*----------------------------------------------------------------------------//
191 *-----------------------------------------------------------------------------*/
193 // Function to generate momentum grids
194 void generate_p_space(Grid &par){
196 int dimnum = par.ival("dimnum");
197 int xDim = par.ival("xDim");
198 int yDim = par.ival("yDim");
199 int zDim = par.ival("zDim");
200 double xMax = par.dval("xMax");
203 yMax = par.dval("yMax");
207 zMax = par.dval("zMax");
210 double pxMax = par.dval("pxMax");
213 pyMax = par.dval("pyMax");
217 pzMax = par.dval("pzMax");
220 double dx = par.dval("dx");
230 double dpx = par.dval("dpx");
233 dpy = par.dval("dpy");
237 dpz = par.dval("dpz");
240 double *x, *y, *z, *px, *py, *pz,
241 *x_gpu, *y_gpu, *z_gpu,
242 *px_gpu, *py_gpu, *pz_gpu;
244 x = (double *) malloc(sizeof(double) * xDim);
245 y = (double *) malloc(sizeof(double) * yDim);
246 z = (double *) malloc(sizeof(double) * zDim);
247 px = (double *) malloc(sizeof(double) * xDim);
248 py = (double *) malloc(sizeof(double) * yDim);
249 pz = (double *) malloc(sizeof(double) * zDim);
253 for(int i=0; i<xDim/2; ++i){
255 x[i + (xDim/2)] = i*dx;
258 px[i + (xDim/2)] = -pxMax + i*dpx;
261 for(int i=0; i<yDim/2; ++i){
263 y[i + (yDim/2)] = i*dy;
266 py[i + (yDim/2)] = -pyMax + i*dpy;
270 for(int i = 0; i < zDim; ++i){
276 else if(dimnum == 3){
277 for(int i=0; i<xDim/2; ++i){
279 x[i + (xDim/2)] = i*dx;
282 px[i + (xDim/2)] = -pxMax + i*dpx;
285 for(int i=0; i<yDim/2; ++i){
287 y[i + (yDim/2)] = i*dy;
290 py[i + (yDim/2)] = -pyMax + i*dpy;
293 for(int i=0; i<zDim/2; ++i){
295 z[i + (zDim/2)] = i*dz;
298 pz[i + (zDim/2)] = -pzMax + i*dpz;
303 else if (dimnum == 1){
304 for(int i=0; i<xDim/2; ++i){
306 x[i + (xDim/2)] = i*dx;
309 px[i + (xDim/2)] = -pxMax + i*dpx;
313 for(int i = 0; i < zDim; ++i){
328 // Now move these items to the gpu
329 cudaMalloc((void**) &x_gpu, sizeof(double) * xDim);
330 cudaMalloc((void**) &y_gpu, sizeof(double) * yDim);
331 cudaMalloc((void**) &z_gpu, sizeof(double) * zDim);
332 cudaMalloc((void**) &px_gpu, sizeof(double) * xDim);
333 cudaMalloc((void**) &py_gpu, sizeof(double) * yDim);
334 cudaMalloc((void**) &pz_gpu, sizeof(double) * zDim);
336 cudaMemcpy(x_gpu, x, sizeof(double)*xDim, cudaMemcpyHostToDevice);
337 cudaMemcpy(y_gpu, y, sizeof(double)*yDim, cudaMemcpyHostToDevice);
338 cudaMemcpy(z_gpu, z, sizeof(double)*zDim, cudaMemcpyHostToDevice);
339 cudaMemcpy(px_gpu, px, sizeof(double)*xDim, cudaMemcpyHostToDevice);
340 cudaMemcpy(py_gpu, py, sizeof(double)*yDim, cudaMemcpyHostToDevice);
341 cudaMemcpy(pz_gpu, pz, sizeof(double)*zDim, cudaMemcpyHostToDevice);
343 par.store("x_gpu",x_gpu);
344 par.store("y_gpu",y_gpu);
345 par.store("z_gpu",z_gpu);
346 par.store("px_gpu",px_gpu);
347 par.store("py_gpu",py_gpu);
348 par.store("pz_gpu",pz_gpu);
351 // This function is basically a wrapper to call the appropriate K kernel
352 void generate_K(Grid &par){
354 // For k, we need xp, yp, and zp. These will also be used in generating
355 // pAxyz parameters, so it should already be stored in par.
356 double *px_gpu = par.dsval("px_gpu");
357 double *py_gpu = par.dsval("py_gpu");
358 double *pz_gpu = par.dsval("pz_gpu");
359 double gSize = par.ival("gSize");
360 double mass = par.dval("mass");
362 // Creating K to work with
364 K = (double*)malloc(sizeof(double)*gSize);
365 cudaMalloc((void**) &K_gpu, sizeof(double)*gSize);
367 simple_K<<<par.grid, par.threads>>>(px_gpu, py_gpu, pz_gpu, mass, K_gpu);
369 cudaMemcpy(K, K_gpu, sizeof(double)*gSize, cudaMemcpyDeviceToHost);
371 par.store("K_gpu",K_gpu);
375 // Simple kernel for generating K
376 __global__ void simple_K(double *xp, double *yp, double *zp, double mass,
379 unsigned int gid = getGid3d3d();
380 unsigned int xid = blockDim.x*blockIdx.x + threadIdx.x;
381 unsigned int yid = blockDim.y*blockIdx.y + threadIdx.y;
382 unsigned int zid = blockDim.z*blockIdx.z + threadIdx.z;
383 K[gid] = (HBAR*HBAR/(2*mass))*(xp[xid]*xp[xid] + yp[yid]*yp[yid]
387 // Function to generate game fields
388 void generate_gauge(Grid &par){
390 int gSize = par.ival("gSize");
391 int dimnum = par.ival("dimnum");
393 double *Ax, *Ay, *Az, *Ax_gpu, *Ay_gpu, *Az_gpu;
394 double *x_gpu = par.dsval("x_gpu");
395 double *y_gpu = par.dsval("y_gpu");
396 double *z_gpu = par.dsval("z_gpu");
398 double xMax = par.dval("xMax");
399 double yMax = par.dval("yMax");
402 zMax = par.dval("zMax");
404 double omegaX = par.dval("omegaX");
405 double omegaY = par.dval("omegaY");
408 omegaZ = par.dval("omegaZ");
410 double omega = par.dval("omega");
411 double fudge = par.dval("fudge");
413 Ax = (double *)malloc(sizeof(double)*gSize);
414 Ay = (double *)malloc(sizeof(double)*gSize);
415 Az = (double *)malloc(sizeof(double)*gSize);
417 cudaMalloc((void**) &Ax_gpu, sizeof(double)*gSize);
418 cudaMalloc((void**) &Ay_gpu, sizeof(double)*gSize);
419 cudaMalloc((void**) &Az_gpu, sizeof(double)*gSize);
421 if (par.Afn == "file"){
422 file_A(par.Axfile, Ax, omega);
423 cudaMemcpy(Ax_gpu, Ax, sizeof(double)*gSize, cudaMemcpyHostToDevice);
426 file_A(par.Ayfile, Ay, omega);
427 cudaMemcpy(Ay_gpu,Ay,sizeof(double)*gSize,cudaMemcpyHostToDevice);
431 file_A(par.Azfile, Az, omega);
432 cudaMemcpy(Az_gpu,Az,sizeof(double)*gSize,cudaMemcpyHostToDevice);
435 std::cout << "finished reading Ax / Ay / Az from file" << '\n';
438 if (par.is_ast_gpu("Ax")){
439 double dx = par.dval("dx");
440 double dy = par.dval("dy");
441 double dz = par.dval("dz");
442 double xMax = par.dval("xMax");
443 double yMax = par.dval("yMax");
446 zMax = par.dval("zMax");
449 EqnNode_gpu *eqn = par.astval("Ax");
451 find_field<<<par.grid, par.threads>>>(Ax_gpu, dx, dy, dz,
452 xMax, yMax, zMax, 0, eqn);
455 par.Ax_fn<<<par.grid, par.threads>>>(x_gpu, y_gpu, z_gpu,
457 omegaX, omegaY, omegaZ,
458 omega, fudge, Ax_gpu);
460 if (par.is_ast_gpu("Ay")){
461 double dx = par.dval("dx");
462 double dy = par.dval("dy");
463 double dz = par.dval("dz");
464 double xMax = par.dval("xMax");
465 double yMax = par.dval("yMax");
468 zMax = par.dval("zMax");
471 EqnNode_gpu *eqn = par.astval("Ay");
473 find_field<<<par.grid, par.threads>>>(Ay_gpu, dx, dy, dz,
474 xMax, yMax, zMax , 0, eqn);
477 par.Ay_fn<<<par.grid, par.threads>>>(x_gpu, y_gpu, z_gpu,
479 omegaX, omegaY, omegaZ,
480 omega, fudge, Ay_gpu);
483 if (par.is_ast_gpu("Az")){
484 double dx = par.dval("dx");
485 double dy = par.dval("dy");
486 double dz = par.dval("dz");
488 double xMax = par.dval("xMax");
489 double yMax = par.dval("yMax");
492 zMax = par.dval("zMax");
495 EqnNode_gpu *eqn = par.astval("Az");
497 find_field<<<par.grid, par.threads>>>(Az_gpu, dx, dy, dz,
502 par.Az_fn<<<par.grid, par.threads>>>(x_gpu, y_gpu, z_gpu,
504 omegaX, omegaY, omegaZ,
505 omega, fudge, Az_gpu);
509 kconstant_A<<<par.grid, par.threads>>>(x_gpu, y_gpu, z_gpu,
511 omegaX, omegaY, omegaZ,
512 omega, fudge, Az_gpu);
515 cudaMemcpy(Ax, Ax_gpu, sizeof(double)*gSize,cudaMemcpyDeviceToHost);
516 cudaMemcpy(Ay, Ay_gpu, sizeof(double)*gSize,cudaMemcpyDeviceToHost);
517 cudaMemcpy(Az, Az_gpu, sizeof(double)*gSize,cudaMemcpyDeviceToHost);
523 par.store("Ax_gpu", Ax_gpu);
524 par.store("Ay_gpu", Ay_gpu);
525 par.store("Az_gpu", Az_gpu);
530 __global__ void kconstant_A(double *x, double *y, double *z,
531 double xMax, double yMax, double zMax,
532 double omegaX, double omegaY, double omegaZ,
533 double omega, double fudge, double *A){
534 int gid = getGid3d3d();
538 // Kernel for simple rotational case, Ax
539 __global__ void krotation_Ax(double *x, double *y, double *z,
540 double xMax, double yMax, double zMax,
541 double omegaX, double omegaY, double omegaZ,
542 double omega, double fudge, double *A){
543 int gid = getGid3d3d();
544 int yid = blockDim.y*blockIdx.y + threadIdx.y;
545 A[gid] = -y[yid] * omega * omegaX;
548 // Kernel for simple rotational case, Ay
549 __global__ void krotation_Ay(double *x, double *y, double *z,
550 double xMax, double yMax, double zMax,
551 double omegaX, double omegaY, double omegaZ,
552 double omega, double fudge, double *A){
553 int gid = getGid3d3d();
554 int xid = blockDim.x*blockIdx.x + threadIdx.x;
555 A[gid] = x[xid] * omega * omegaY;
558 // Kernel for simple rotational case, Ax
559 __global__ void kring_rotation_Ax(double *x, double *y, double *z,
560 double xMax, double yMax, double zMax,
561 double omegaX, double omegaY, double omegaZ,
562 double omega, double fudge, double *A){
563 int gid = getGid3d3d();
564 int xid = blockDim.x*blockIdx.x + threadIdx.x;
565 int yid = blockDim.y*blockIdx.y + threadIdx.y;
566 int zid = blockDim.z*blockIdx.z + threadIdx.z;
567 double theta = atan2(y[yid],x[xid]);
568 A[gid] = (z[zid]+zMax)*cos(theta)*omega*omegaX;
571 // Kernel for simple rotational case, Ay
572 __global__ void kring_rotation_Ay(double *x, double *y, double *z,
573 double xMax, double yMax, double zMax,
574 double omegaX, double omegaY, double omegaZ,
575 double omega, double fudge, double *A){
576 int gid = getGid3d3d();
577 int xid = blockDim.x*blockIdx.x + threadIdx.x;
578 int yid = blockDim.y*blockIdx.y + threadIdx.y;
579 int zid = blockDim.z*blockIdx.z + threadIdx.z;
580 double theta = atan2(y[yid],x[xid]);
581 A[gid] = (z[zid]+zMax)*sin(theta)*omega*omegaX;
584 // Kernel for simple rotational case, Az
585 __global__ void kring_rotation_Az(double *x, double *y, double *z,
586 double xMax, double yMax, double zMax,
587 double omegaX, double omegaY, double omegaZ,
588 double omega, double fudge, double *A){
589 int gid = getGid3d3d();
590 int xid = blockDim.x*blockIdx.x + threadIdx.x;
591 int yid = blockDim.y*blockIdx.y + threadIdx.y;
592 double r = sqrt(x[xid]*x[xid] + y[yid]*y[yid]);
593 A[gid] = r*omega*omegaX;
597 // kernel for a simple vortex ring
598 __global__ void kring_Az(double *x, double *y, double *z,
599 double xMax, double yMax, double zMax,
600 double omegaX, double omegaY, double omegaZ,
601 double omega, double fudge, double *A){
602 int gid = getGid3d3d();
603 int xid = blockDim.x*blockIdx.x + threadIdx.x;
604 int yid = blockDim.y*blockIdx.y + threadIdx.y;
606 double rad = sqrt(x[xid]*x[xid] + y[yid]*y[yid]);
608 A[gid] = omega * exp(-rad*rad / (0.0001*xMax)) * 0.01;
612 __global__ void ktest_Ax(double *x, double *y, double *z,
613 double xMax, double yMax, double zMax,
614 double omegaX, double omegaY, double omegaZ,
615 double omega, double fudge, double *A){
616 int gid = getGid3d3d();
617 int yid = blockDim.y*blockIdx.y + threadIdx.y;
618 A[gid] = (sin(y[yid] * 100000)+1) * yMax * omega;
622 __global__ void ktest_Ay(double *x, double *y, double *z,
623 double xMax, double yMax, double zMax,
624 double omegaX, double omegaY, double omegaZ,
625 double omega, double fudge, double *A){
626 int gid = getGid3d3d();
630 // function to generate V
631 void generate_fields(Grid &par){
633 generate_p_space(par);
637 int gSize = par.ival("gSize");
638 int dimnum = par.ival("dimnum");
639 int winding = par.dval("winding");
641 bool energy_calc = par.bval("energy_calc");
643 double dt = par.dval("dt");
644 double gdt = par.dval("gdt");
645 double *x_gpu = par.dsval("x_gpu");
646 double *y_gpu = par.dsval("y_gpu");
647 double *z_gpu = par.dsval("z_gpu");
648 double *px_gpu = par.dsval("px_gpu");
649 double *py_gpu = par.dsval("py_gpu");
650 double *pz_gpu = par.dsval("pz_gpu");
651 double *Ax_gpu = par.dsval("Ax_gpu");
652 double *Ay_gpu = par.dsval("Ay_gpu");
653 double *Az_gpu = par.dsval("Az_gpu");
654 double *K_gpu = par.dsval("K_gpu");
656 // Creating items list for kernels
658 double *items, *items_gpu;
660 items = (double*)malloc(sizeof(double)*item_size);
661 cudaMalloc((void**) &items_gpu, sizeof(double)*item_size);
663 for (int i = 0; i < item_size; ++i){
666 items[0] = par.dval("xMax");
667 items[1] = par.dval("yMax");
669 items[2] = par.dval("zMax");
672 items[3] = par.dval("omegaX");
673 items[4] = par.dval("omegaY");
675 items[5] = par.dval("omegaZ");
678 items[6] = par.dval("x0_shift");
679 items[7] = par.dval("y0_shift");
681 items[8] = par.dval("z0_shift");
687 items[9] = par.dval("mass");
688 items[10] = par.dval("gammaY");
689 items[11] = 1.0; // For gammaZ
690 items[12] = par.dval("fudge");
691 items[13] = 0.0; // For time
693 items[14] = par.dval("Rxy");
695 items[15] = par.dval("a0x");
696 items[16] = par.dval("a0y");
698 items[17] = par.dval("a0z");
704 cudaMemcpy(items_gpu, items, sizeof(double)*item_size,
705 cudaMemcpyHostToDevice);
707 double fudge = par.dval("fudge");
713 V = (double *)malloc(sizeof(double)*gSize);
715 cudaMalloc((void **) &V_gpu, sizeof(double)*gSize);
717 if (par.is_ast_gpu("V")){
718 double dx = par.dval("dx");
719 double dy = par.dval("dy");
720 double dz = par.dval("dz");
722 double xMax = par.dval("xMax");
723 double yMax = par.dval("yMax");
726 zMax = par.dval("zMax");
729 EqnNode_gpu *eqn = par.astval("V");
730 find_field<<<par.grid, par.threads>>>(V_gpu, dx, dy, dz,
731 xMax, yMax, zMax, 0, eqn);
734 par.V_fn<<<par.grid, par.threads>>>(x_gpu, y_gpu, z_gpu, items_gpu,
735 Ax_gpu, Ay_gpu, Az_gpu, V_gpu);
738 cudaMemcpy(V, V_gpu, sizeof(double)*gSize, cudaMemcpyDeviceToHost);
742 double2 *wfc, *wfc_gpu;
743 double *phi, *phi_gpu;
745 wfc = (double2 *)malloc(sizeof(double2)*gSize);
746 phi = (double *)malloc(sizeof(double)*gSize);
748 cudaMalloc((void**) &wfc_gpu, sizeof(double2)*gSize);
749 cudaMalloc((void**) &phi_gpu, sizeof(double)*gSize);
751 if (par.bval("read_wfc")){
752 wfc = par.cufftDoubleComplexval("wfc");
753 cudaMemcpy(wfc_gpu, wfc, sizeof(double2)*gSize, cudaMemcpyHostToDevice);
756 par.wfc_fn<<<par.grid, par.threads>>>(x_gpu, y_gpu, z_gpu, items_gpu,
757 winding, phi_gpu, wfc_gpu);
758 cudaMemcpy(wfc, wfc_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
761 cudaMemcpy(phi, phi_gpu, sizeof(double)*gSize, cudaMemcpyDeviceToHost);
763 // generating aux fields.
764 double2 *GV, *EV, *GK, *EK;
765 double2 *GV_gpu, *EV_gpu, *GK_gpu, *EK_gpu;
766 double2 *GpAx, *GpAy, *GpAz, *EpAx, *EpAy, *EpAz;
767 double2 *GpAx_gpu, *GpAy_gpu, *GpAz_gpu, *EpAx_gpu, *EpAy_gpu, *EpAz_gpu;
768 double *pAx, *pAy, *pAz;
769 double *pAx_gpu, *pAy_gpu, *pAz_gpu;
771 GV = (double2 *)malloc(sizeof(double2)*gSize);
772 EV = (double2 *)malloc(sizeof(double2)*gSize);
773 GK = (double2 *)malloc(sizeof(double2)*gSize);
774 EK = (double2 *)malloc(sizeof(double2)*gSize);
776 GpAx = (double2 *)malloc(sizeof(double2)*gSize);
777 EpAx = (double2 *)malloc(sizeof(double2)*gSize);
778 GpAy = (double2 *)malloc(sizeof(double2)*gSize);
779 EpAy = (double2 *)malloc(sizeof(double2)*gSize);
780 GpAz = (double2 *)malloc(sizeof(double2)*gSize);
781 EpAz = (double2 *)malloc(sizeof(double2)*gSize);
783 pAx = (double *)malloc(sizeof(double)*gSize);
784 pAy = (double *)malloc(sizeof(double)*gSize);
785 pAz = (double *)malloc(sizeof(double)*gSize);
787 cudaMalloc((void**) &GV_gpu, sizeof(double2)*gSize);
788 cudaMalloc((void**) &EV_gpu, sizeof(double2)*gSize);
789 cudaMalloc((void**) &GK_gpu, sizeof(double2)*gSize);
790 cudaMalloc((void**) &EK_gpu, sizeof(double2)*gSize);
792 cudaMalloc((void**) &GpAx_gpu, sizeof(double2)*gSize);
793 cudaMalloc((void**) &EpAx_gpu, sizeof(double2)*gSize);
794 cudaMalloc((void**) &GpAy_gpu, sizeof(double2)*gSize);
795 cudaMalloc((void**) &EpAy_gpu, sizeof(double2)*gSize);
796 cudaMalloc((void**) &GpAz_gpu, sizeof(double2)*gSize);
797 cudaMalloc((void**) &EpAz_gpu, sizeof(double2)*gSize);
799 cudaMalloc((void**) &pAx_gpu, sizeof(double)*gSize);
800 cudaMalloc((void**) &pAy_gpu, sizeof(double)*gSize);
801 cudaMalloc((void**) &pAz_gpu, sizeof(double)*gSize);
803 aux_fields<<<par.grid, par.threads>>>(V_gpu, K_gpu, gdt, dt,
804 Ax_gpu, Ay_gpu, Az_gpu,
805 px_gpu, py_gpu, pz_gpu,
806 pAx_gpu, pAy_gpu, pAz_gpu,
807 GV_gpu, EV_gpu, GK_gpu, EK_gpu,
808 GpAx_gpu, GpAy_gpu, GpAz_gpu,
809 EpAx_gpu, EpAy_gpu, EpAz_gpu);
811 cudaMemcpy(GV, GV_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
812 cudaMemcpy(EV, EV_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
813 cudaMemcpy(GK, GK_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
814 cudaMemcpy(EK, EK_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
816 cudaMemcpy(GpAx, GpAx_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
817 cudaMemcpy(EpAx, EpAx_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
818 cudaMemcpy(GpAy, GpAy_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
819 cudaMemcpy(EpAy, EpAy_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
820 cudaMemcpy(GpAz, GpAz_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
821 cudaMemcpy(EpAz, EpAz_gpu, sizeof(double2)*gSize, cudaMemcpyDeviceToHost);
823 cudaMemcpy(pAx, pAx_gpu, sizeof(double)*gSize, cudaMemcpyDeviceToHost);
824 cudaMemcpy(pAy, pAy_gpu, sizeof(double)*gSize, cudaMemcpyDeviceToHost);
825 cudaMemcpy(pAz, pAz_gpu, sizeof(double)*gSize, cudaMemcpyDeviceToHost);
864 par.store("V_gpu",V_gpu);
868 par.store("items", items);
869 //par.store("items_gpu", items_gpu);
870 par.store("wfc", wfc);
871 par.store("wfc_gpu", wfc_gpu);
872 par.store("Phi", phi);
873 par.store("Phi_gpu", phi_gpu);
879 //par.store("GV_gpu",GV_gpu);
880 //par.store("EV_gpu",EV_gpu);
881 //par.store("GK_gpu",GK_gpu);
882 //par.store("EK_gpu",EK_gpu);
884 par.store("GpAx",GpAx);
885 par.store("EpAx",EpAx);
886 par.store("GpAy",GpAy);
887 par.store("EpAy",EpAy);
888 par.store("GpAz",GpAz);
889 par.store("EpAz",EpAz);
891 par.store("pAx",pAx);
892 par.store("pAy",pAy);
893 par.store("pAz",pAz);
894 //par.store("pAx_gpu",pAx_gpu);
895 //par.store("pAy_gpu",pAy_gpu);
896 //par.store("pAz_gpu",pAz_gpu);
900 __global__ void kharmonic_V(double *x, double *y, double *z, double* items,
901 double *Ax, double *Ay, double *Az, double *V){
903 int gid = getGid3d3d();
904 int xid = blockDim.x*blockIdx.x + threadIdx.x;
905 int yid = blockDim.y*blockIdx.y + threadIdx.y;
906 int zid = blockDim.z*blockIdx.z + threadIdx.z;
908 double V_x = items[3]*(x[xid]+items[6]);
909 double V_y = items[10]*items[4]*(y[yid]+items[7]);
910 double V_z = items[11]*items[5]*(z[zid]+items[8]);
912 V[gid] = 0.5*items[9]*((V_x*V_x + V_y*V_y + V_z*V_z)
913 + (Ax[gid]*Ax[gid] + Ay[gid]*Ay[gid] + Az[gid]*Az[gid]));
916 // kernel for simple 3d torus trapping potential
917 __global__ void ktorus_V(double *x, double *y, double *z, double* items,
918 double *Ax, double *Ay, double *Az, double *V){
920 int gid = getGid3d3d();
921 int xid = blockDim.x*blockIdx.x + threadIdx.x;
922 int yid = blockDim.y*blockIdx.y + threadIdx.y;
923 int zid = blockDim.z*blockIdx.z + threadIdx.z;
925 double rad = sqrt((x[xid] - items[6]) * (x[xid] - items[6])
926 + (y[yid] - items[7]) * (y[yid] - items[7]))
928 double omegaR = (items[3]*items[3] + items[4]*items[4]);
929 double V_tot = (2*items[5]*items[5]*(z[zid] - items[8])*(z[zid] - items[8])
930 + omegaR*(rad*rad + items[12]*rad*z[zid]));
931 V[gid] = 0.5*items[9]*(V_tot
937 __global__ void kstd_wfc(double *x, double *y, double *z, double *items,
938 double winding, double *phi, double2 *wfc){
940 int gid = getGid3d3d();
941 int xid = blockDim.x*blockIdx.x + threadIdx.x;
942 int yid = blockDim.y*blockIdx.y + threadIdx.y;
943 int zid = blockDim.z*blockIdx.z + threadIdx.z;
945 phi[gid] = fmod(winding*atan2(y[yid], x[xid]),2*PI);
947 wfc[gid].x = exp(-(x[xid]*x[xid]/(items[14]*items[14]*items[15]*items[15])
948 + y[yid]*y[yid]/(items[14]*items[14]*items[16]*items[16])
949 + z[zid]*z[zid]/(items[14]*items[14]*items[17]*items[17])))
951 wfc[gid].y = -exp(-(x[xid]*x[xid]/(items[14]*items[14]*items[15]*items[15])
952 + y[yid]*y[yid]/(items[14]*items[14]*items[16]*items[16])
953 + z[zid]*z[zid]/(items[14]*items[14]*items[17]*items[17])))
958 __global__ void ktorus_wfc(double *x, double *y, double *z, double *items,
959 double winding, double *phi, double2 *wfc){
961 int gid = getGid3d3d();
962 int xid = blockDim.x*blockIdx.x + threadIdx.x;
963 int yid = blockDim.y*blockIdx.y + threadIdx.y;
964 int zid = blockDim.z*blockIdx.z + threadIdx.z;
966 double rad = sqrt((x[xid] - items[6]) * (x[xid] - items[6])
967 + (y[yid] - items[7]) * (y[yid] - items[7]))
968 - 0.5*items[0]*items[12];
970 wfc[gid].x = exp(-( pow((rad)/(items[14]*items[15]*0.5),2) +
971 pow((z[zid])/(items[14]*items[17]*0.5),2) ) );
975 __global__ void aux_fields(double *V, double *K, double gdt, double dt,
976 double* Ax, double *Ay, double* Az,
977 double *px, double *py, double *pz,
978 double* pAx, double* pAy, double* pAz,
979 double2* GV, double2* EV, double2* GK, double2* EK,
980 double2* GpAx, double2* GpAy, double2* GpAz,
981 double2* EpAx, double2* EpAy, double2* EpAz){
982 int gid = getGid3d3d();
983 int xid = blockDim.x*blockIdx.x + threadIdx.x;
984 int yid = blockDim.y*blockIdx.y + threadIdx.y;
985 int zid = blockDim.z*blockIdx.z + threadIdx.z;
987 GV[gid].x = exp( -V[gid]*(gdt/(2*HBAR)));
988 GK[gid].x = exp( -K[gid]*(gdt/HBAR));
992 // Ax and Ay will be calculated here but are used only for
993 // debugging. They may be needed later for magnetic field calc
995 pAy[gid] = Ax[gid] * px[xid];
996 pAx[gid] = Ay[gid] * py[yid];
997 pAz[gid] = Az[gid] * pz[zid];
999 GpAx[gid].x = exp(-pAx[gid]*gdt);
1001 GpAy[gid].x = exp(-pAy[gid]*gdt);
1003 GpAz[gid].x = exp(-pAz[gid]*gdt);
1006 EV[gid].x=cos( -V[gid]*(dt/(2*HBAR)));
1007 EV[gid].y=sin( -V[gid]*(dt/(2*HBAR)));
1008 EK[gid].x=cos( -K[gid]*(dt/HBAR));
1009 EK[gid].y=sin( -K[gid]*(dt/HBAR));
1011 EpAz[gid].x=cos(-pAz[gid]*dt);
1012 EpAz[gid].y=sin(-pAz[gid]*dt);
1013 EpAy[gid].x=cos(-pAy[gid]*dt);
1014 EpAy[gid].y=sin(-pAy[gid]*dt);
1015 EpAx[gid].x=cos(-pAx[gid]*dt);
1016 EpAx[gid].y=sin(-pAx[gid]*dt);
1019 // Function to generate grids and treads for 2d and 3d cases
1020 void generate_grid(Grid& par){
1022 int dimnum = par.ival("dimnum");
1023 int xDim = par.ival("xDim");
1024 int yDim = par.ival("yDim");
1025 int zDim = par.ival("zDim");
1026 int xD = 1, yD = 1, zD = 1;
1027 int max_threads = 256;
1028 if (xDim < max_threads){
1033 if (xDim <= max_threads){
1034 par.threads.x = xDim;
1045 while (dim_tmp > max_threads){
1050 std::cout << "count is: " << count << '\n';
1052 par.threads.x = dim_tmp;
1061 else if (dimnum == 3){
1063 if (xDim <= max_threads){
1064 par.threads.x = xDim;
1075 while (dim_tmp > max_threads){
1080 std::cout << "count is: " << count << '\n';
1082 par.threads.x = dim_tmp;
1091 else if (dimnum == 1){
1092 par.threads.x = xDim;
1098 std::cout << "threads in x are: " << par.threads.x << '\n';
1099 std::cout << "dimensions are: " << par.grid.x << '\t'
1100 << par.grid.y << '\t'
1101 << par.grid.z << '\n';