1 #include "../include/ds.h"
2 #include "../include/unit_test.h"
3 #include "../include/parser.h"
4 #include "../include/evolution.h"
5 #include "../include/init.h"
6 #include "../include/dynamic.h"
7 #include "../include/vortex_3d.h"
14 // Adding tests for mathematical operator kernels
15 void math_operator_test();
16 __global__ void add_test(double2 *a, double2 *b, double2 *c);
17 __global__ void subtract_test(double2 *a, double2 *b, double2 *c);
18 __global__ void mult_test(double2 *a, double2 *b, double2 *c);
19 __global__ void mult_test(double2 *a, double b, double2 *c);
20 __global__ void pow_test(double2 *a, int b, double2 *c);
22 // Tests for cufftDoubleComplex functions
23 void cufftDoubleComplex_functions_test();
24 __global__ void complexMag_test(double2 *in, double *out);
25 __global__ void complexMag2_test(double2 *in, double *out);
26 void realCompMult_test();
29 // Tests for quantum operations
30 __global__ void make_complex_kernel(double *in, int *evolution_type,
32 void make_complex_test();
35 // Tests for complex mathematical operations
37 void scalarDiv_test();
42 void ast_cmult_test();
43 void ast_op_mult_test();
46 void energyCalc_test();
47 void braKetMult_test();
49 // Test for the Grid structure with parameters in it
50 void parameter_test();
52 // Test for the parsing function
55 // Testing the evolve_2d function in evolution.cu
58 // Testing the parSum function
61 // Simple test of grid / cuda stuff
65 // Test of 1D fft's along all 3d grids
68 // Test to check the equation parser for dynamic fields
71 // Test to make sure the kernel for the polynomial approx. of Bessel fxns works
74 // Test for the vortex tracking functions in vortex_3d
77 // Kernel testing will be added later
78 __device__ bool close(double a, double b, double threshold){
79 return (abs(a-b) < threshold);
83 /*----------------------------------------------------------------------------//
85 *-----------------------------------------------------------------------------*/
88 std::cout << "Starting unit tests..." << '\n';
92 << "Beginning testing of standard mathematical operation kernels...\n";
95 std::cout << "Beginning testing of cufftDoubleComplex kernels...\n";
96 cufftDoubleComplex_functions_test();
98 // Do not uncomment these 2
112 std::cout << "All tests completed. GPUE passed." << '\n';
115 void math_operator_test(){
117 // First, we need to create a set of grids and threads to read into the
118 // kernels for testing
120 dim3 threads = {1,1,1};
122 double2 *ha, *hb, *hc;
123 double2 *da, *db, *dc;
125 // Allocating single-element arrays to test kernels with.
126 ha = (double2*)malloc(sizeof(double2));
127 hb = (double2*)malloc(sizeof(double2));
128 hc = (double2*)malloc(sizeof(double2));
135 cudaMalloc((void**) &da, sizeof(double2));
136 cudaMalloc((void**) &db, sizeof(double2));
137 cudaMalloc((void**) &dc, sizeof(double2));
139 cudaMemcpy(da, ha, sizeof(double2), cudaMemcpyHostToDevice);
140 cudaMemcpy(db, hb, sizeof(double2), cudaMemcpyHostToDevice);
142 add_test<<<grid, threads>>>(da, db, dc);
143 cudaMemcpy(hc, dc, sizeof(double2), cudaMemcpyDeviceToHost);
145 if (abs(hc[0].x - 0.03) > 1e-16 || abs(hc[0].y - 0.3) > 1e-16){
146 std::cout << "Complex addition test failed!\n";
150 subtract_test<<<grid, threads>>>(da, db, dc);
151 cudaMemcpy(hc, dc, sizeof(double2), cudaMemcpyDeviceToHost);
153 if (hc[0].x != -0.01 || hc[0].y != -0.1){
154 std::cout << "Complex subtraction test failed!\n";
158 pow_test<<<grid, threads>>>(da, 3, dc);
159 cudaMemcpy(hc, dc, sizeof(double2), cudaMemcpyDeviceToHost);
161 if (abs(hc[0].x + 0.000299) > 1e-16 || abs(hc[0].y + 0.00097) > 1e-16){
162 std::cout << "Complex power test failed!\n";
166 mult_test<<<grid, threads>>>(da, db, dc);
167 cudaMemcpy(hc, dc, sizeof(double2), cudaMemcpyDeviceToHost);
169 if (abs(hc[0].x + 0.0198) > 1e-16 || abs(hc[0].y - 0.004) > 1e-16){
170 std::cout << "Complex multiplication test failed!\n";
174 mult_test<<<grid, threads>>>(da, 3.0, dc);
175 cudaMemcpy(hc, dc, sizeof(double2), cudaMemcpyDeviceToHost);
177 if (abs(hc[0].x - 0.03) > 1e-16 || abs(hc[0].y - 0.3) > 1e-16){
178 std::cout << "Complex multiplication test with real number failed!\n";
182 std::cout << "Complex addition, subtraction, multiplication, and powers have been tested\n";
185 __global__ void add_test(double2 *a, double2 *b, double2 *c){
186 c[0] = add(a[0],b[0]);
189 __global__ void subtract_test(double2 *a, double2 *b, double2 *c){
190 c[0] = subtract(a[0],b[0]);
193 __global__ void pow_test(double2 *a, int b, double2 *c){
197 __global__ void mult_test(double2 *a, double2 *b, double2 *c){
198 c[0] = mult(a[0],b[0]);
201 __global__ void mult_test(double2 *a, double b, double2 *c){
205 void cufftDoubleComplex_functions_test(){
207 // first creating the grid and threads
209 dim3 threads = {1,1,1};
211 double *hval_double, *dval_double, *dout, *hout;
212 double2 *hval_double2, *dval_double2;
216 hval_double = (double*)malloc(sizeof(double));
217 hval_double2 = (double2*)malloc(sizeof(double2));
218 hout = (double*)malloc(sizeof(double));
219 hin = (double2*)malloc(sizeof(double2));
221 hval_double[0] = 3.0;
222 hval_double2[0].x = 0.3;
223 hval_double2[0].y = 0.4;
228 cudaMalloc((void**)&dval_double, sizeof(double));
229 cudaMalloc((void**)&dval_double2, sizeof(double2));
230 cudaMalloc((void**)&dout, sizeof(double));
231 cudaMalloc((void**)&din, sizeof(double2));
234 // Testing make_cufftDoubleComplex function
235 cudaMemcpy(dval_double, hval_double, sizeof(double),
236 cudaMemcpyHostToDevice);
238 make_cufftDoubleComplex<<<grid, threads>>>(dval_double, dval_double2);
240 cudaMemcpy(hval_double2, dval_double2, sizeof(double2),
241 cudaMemcpyDeviceToHost);
243 if (hval_double2[0].x != 3.0 || hval_double2[0].y != 0){
244 std::cout << "Test of make_cufftDoubleComplex failed!\n";
248 // testing device complexMagnitude function
249 cudaMemcpy(din, hin, sizeof(double2), cudaMemcpyHostToDevice);
250 complexMag_test<<<grid, threads>>>(din, dout);
252 cudaMemcpy(hout, dout, sizeof(double), cudaMemcpyDeviceToHost);
255 std::cout << hout[0] << '\n';
256 std::cout << "Test of device complexMagnitude failed!\n";
260 // Testing global complexMagnitude function
261 complexMagnitude<<<grid, threads>>>(din, dout);
262 cudaMemcpy(hout, dout, sizeof(double), cudaMemcpyDeviceToHost);
265 std::cout << hout[0] << '\n';
266 std::cout << "Test of global complexMagnitude failed!\n";
270 complexMag2_test<<<grid, threads>>>(din, dout);
272 cudaMemcpy(hout, dout, sizeof(double), cudaMemcpyDeviceToHost);
274 if (hout[0] != 25.0){
275 std::cout << hout[0] << '\n';
276 std::cout << "Test of device complexMagnitudeSquared failed!\n";
280 // Testing global complexMagnitude function
281 complexMagnitudeSquared<<<grid, threads>>>(din, dout);
282 cudaMemcpy(hout, dout, sizeof(double), cudaMemcpyDeviceToHost);
284 if (hout[0] != 25.0){
285 std::cout << hout[0] << '\n';
286 std::cout << "Test of global complexMagnitudeSquared failed!\n";
291 std::cout << "make_cufftDoubleComplex, and complexMagnitude[Squared] have been tested\n";
295 __global__ void complexMag_test(double2 *in, double *out){
296 out[0] = complexMagnitude(in[0]);
299 __global__ void complexMag2_test(double2 *in, double *out){
300 out[0] = complexMagnitudeSquared(in[0]);
303 // Test to check the equation parser for dynamic fields
304 // For this test, we will need a general set of parameters to read in and a
305 // standard equation string to look at.
308 std::cout << "Beginning test of dynamic functions..." <<'\n';
309 std::string eqn_string = "(((3*x)+7)+(5-7)+cos(0)*1)+pow(120,2)";
315 par.store("omegaX",5.0);
316 par.store("omegaY",5.0);
317 par.store("omegaZ",5.0);
318 par.store("xMax",5.0);
319 par.store("yMax",5.0);
320 par.store("zMax",5.0);
321 par.store("fudge",5.0);
322 par.store("mass",5.0);
323 std::string val_string = "check_var";
325 EqnNode eqn_tree = parse_eqn(par, eqn_string, val_string);
327 std::cout << "finding the number of elements in abstract syntax tree...\n";
330 find_element_num(eqn_tree, num);
331 int element_num = num;
333 std::cout << "Total number of elements is: " << num << '\n';
335 std::cout << "Now to copy the tree to the GPU..." << '\n';
337 EqnNode_gpu *eqn_gpu, *eqn_cpu;
338 eqn_cpu = (EqnNode_gpu *)malloc(sizeof(EqnNode_gpu)*element_num);
340 tree_to_array(eqn_tree, eqn_cpu, num);
343 for (int i = 0; i < num; ++i){
344 std::cout << eqn_cpu[i].val << '\n';
345 std::cout << eqn_cpu[i].left << '\n';
346 std::cout << eqn_cpu[i].right << '\n' << '\n';
350 cudaMalloc((void**)&eqn_gpu, sizeof(EqnNode_gpu)*element_num);
351 cudaMemcpy(eqn_gpu, eqn_cpu, sizeof(EqnNode_gpu)*element_num,
352 cudaMemcpyHostToDevice);
354 // Now to check some simple evaluation
355 std::cout << "Now to check simple GPU evaluation..." << '\n';
357 double *array, *array_gpu;
358 array = (double *)malloc(sizeof(double)*n);
359 cudaMalloc(&array_gpu, sizeof(double)*n);
362 int grid = (int)ceil((float)n/threads);
364 //zeros<<<grid, threads>>>(array_gpu, n);
365 find_field<<<grid, threads>>>(array_gpu, 1, 0.0, 0.0, 0.0, 1,1,1,eqn_gpu);
367 cudaMemcpy(array, array_gpu, sizeof(double)*n, cudaMemcpyDeviceToHost);
369 for (int i = 0; i < n; ++i){
370 double eqn_val = (((3*i)+7)+(5-7)+cos(0)*1)+pow(120,2);
371 if (array[i] != eqn_val){
372 std::cout << "GPU evaluation failed in dynamic test!\n";
373 assert(array[i] == eqn_val);
377 // Now testing simple parsing of example "example.cfg"
378 std::cout << "Testing simple parameter parsing." << '\n';
379 par.store("param_file", (std::string)"src/example.cfg");
380 parse_param_file(par);
381 EqnNode_gpu *eqn = par.astval("V");
382 find_field<<<grid, threads>>>(array_gpu, 0.1, 0.1, 0.1, 1, 1, 1, 0, eqn);
383 cudaDeviceSynchronize();
385 cudaMemcpy(array, array_gpu, sizeof(double)*n, cudaMemcpyDeviceToHost);
388 for (int i = 0; i < n; ++i){
389 std::cout << array[i] << '\n';
393 std::cout << "Dynamic tests passed" <<'\n';
396 __global__ void bessel_test_kernel(double *j, double *j_poly, bool *val){
397 int xid = blockDim.x*blockIdx.x + threadIdx.x;
398 j[xid] = j0(xid * 2.0 / 128);
399 j_poly[xid] = poly_j(0,xid * 2.0 / 128, 40);
401 if (!close(j[xid],j_poly[xid], 0.0001)){
403 printf("Error at element %u in Bessel test!\tValues: %f, %f\n",
404 xid, j[xid], j_poly[xid]);
408 // Test for bessel functions
411 std::cout << "Testing Bessel Functions..." << '\n';
413 double *j_gpu, *j_poly_gpu;
416 cudaMalloc((void **)&j_gpu, sizeof(double)*n);
417 cudaMalloc((void **)&j_poly_gpu, sizeof(double)*n);
419 cudaMalloc((void **)&val_gpu, sizeof(bool));
420 val = (bool *)malloc(sizeof(bool));
422 cudaMemcpy(val_gpu, val, sizeof(bool), cudaMemcpyHostToDevice);
424 bessel_test_kernel<<<64,2>>>(j_gpu, j_poly_gpu, val_gpu);
425 cudaMemcpy(val, val_gpu, sizeof(bool), cudaMemcpyDeviceToHost);
428 std::cout << "Bessel Test Passed!" << '\n';
431 std::cout << "Bessel Test Failed!" << '\n';
437 // Test of 1D fft's along all 3d grids
438 // In particular, we need to test the generate_plan_other3d function
439 // These will be checked against 1d
442 std::cout << "Beginning cufft test.\n";
444 // For these tests, we are assuming that the x, y and z dimensions are
445 // All the same (2x2x2)
446 // Note that yDim needs to be singled out differently, but z/x need no loops
448 // now we need to create the necessary parameters and store everything
452 int gsize = xDim * yDim * zDim;
455 par.store("xDim", xDim);
456 par.store("yDim", yDim);
457 par.store("zDim", zDim);
459 cufftHandle plan_x, plan_y, plan_z;
460 // Now creating the plans
461 generate_plan_other3d(&plan_x, par, 0);
462 generate_plan_other3d(&plan_y, par, 1);
463 generate_plan_other3d(&plan_z, par, 2);
465 // And the result / error
469 // Creating the initial array for the x dimension fft
470 double2 *array, *gpu_array;
471 array = (double2 *) malloc(sizeof(double2)*gsize);
472 cudaMalloc((void**) &gpu_array, sizeof(double2)*gsize);
473 for (int i = 0; i < gsize; i++){
478 // transferring to gpu
479 err = cudaMemcpy(gpu_array, array, sizeof(double2)*gsize,
480 cudaMemcpyHostToDevice);
481 if (err != cudaSuccess){
482 std::cout << "Could not coppy array to device!" << '\n';
483 std::cout << "error code: " << err << '\n';
487 // Performing the x transformation
488 for (int i = 0; i < yDim; i++){
489 result = cufftExecZ2Z(plan_y, &gpu_array[i*xDim*yDim],
490 &gpu_array[i*xDim*yDim], CUFFT_FORWARD);
492 //result = cufftExecZ2Z(plan_z, gpu_array, gpu_array, CUFFT_FORWARD);
494 // transferring back to host to check output
495 err = cudaMemcpy(array, gpu_array, sizeof(double2)*gsize,
496 cudaMemcpyDeviceToHost);
497 if (err != cudaSuccess){
498 std::cout << "Could not coppy gpu_array to host!" << '\n';
499 std::cout << "error code: " << err << '\n';
504 for (int i = 0; i < gsize; i++){
505 std::cout << array[i].x << '\t' << array[i].y << '\n';
509 // Now to try the inverse direction
511 for (int i = 0; i < yDim; i++){
512 result = cufftExecZ2Z(plan_y, &gpu_array[i*xDim*yDim],
513 &gpu_array[i*xDim*yDim], CUFFT_INVERSE);
515 //result = cufftExecZ2Z(plan_z, gpu_array, gpu_array, CUFFT_INVERSE);
518 err = cudaMemcpy(array, gpu_array, sizeof(double2)*gsize,
519 cudaMemcpyDeviceToHost);
520 if (err != cudaSuccess){
521 std::cout << "Could not coppy gpu_array to host!" << '\n';
522 std::cout << "error code: " << err << '\n';
527 for (int i = 0; i < gsize; i++){
528 std::cout << array[i].x << '\t' << array[i].y << '\n';
532 std::cout << "cufft test passed!\n";
538 // Simple test of CUDA grid stuff
541 std::cout << "testing grid / threads and stuff" << '\n';
543 int max_threads = 256;
549 int xD = 1, yD = 1, zD = 1;
551 int gsize = xDim * yDim;
553 // Now to set up the CUDA grid / threads
558 par.store("xDim",xDim);
559 par.store("yDim",yDim);
560 par.store("zDim",zDim);
561 par.store("dimnum", 2);
565 if (xDim <= max_threads){
577 while (dim_tmp > max_threads){
582 std::cout << "count is: " << count << '\n';
592 std::cout << "threads in x are: " << block.x << '\n';
593 std::cout << "dimensions are: " << xD << '\t' << yD << '\t' << zD << '\n';
599 if (grid.x != par.grid.x || block.x != par.threads.x ||
600 grid.y != par.grid.y || block.y != par.threads.y ||
601 grid.z != par.grid.z || block.z != par.threads.z){
602 std::cout << "Gridding test 2D failed! Improper generation of threads!"
604 assert(grid.x == par.grid.x);
605 assert(grid.y == par.grid.y);
606 assert(grid.z == par.grid.z);
607 assert(block.x == par.threads.x);
608 assert(block.y == par.threads.y);
609 assert(block.z == par.threads.z);
612 int total_threads = block.x * block.y * block.z;
614 // Now we need to initialize our double * and send it to the gpu
615 double *host_array, *device_array;
616 host_array = (double *) malloc(sizeof(double)*gsize);
617 cudaMalloc((void**) &device_array, sizeof(double)*gsize);
619 // initializing 2d array
620 for (int i = 0; i < gsize; i++){
624 // Now to copy to device
625 cudaMemcpy(device_array, host_array,
626 sizeof(double)*gsize,
627 cudaMemcpyHostToDevice);
630 thread_test<<<grid,block>>>(device_array,device_array);
632 // Now to copy back and print
633 cudaMemcpy(host_array, device_array,
634 sizeof(double)*gsize,
635 cudaMemcpyDeviceToHost);
638 for (int i = 0; i < xDim; ++i){
639 for (int j = 0; j < yDim; ++j){
640 int index = i*yDim + j;
641 if (host_array[index] != index){
642 std::cout << "Threadding values improperly set!\n";
643 assert(host_array[index] == index);
648 std::cout << "2d grid tests completed. Now for 3d cases" << '\n';
652 // Simple test of CUDA grid stuff
655 std::cout << "testing grid / threads and stuff for 3d" << '\n';
657 int max_threads = 256;
663 int xD = 1, yD = 1, zD = 1;
665 int gsize = xDim * yDim * zDim;
667 // Now to set up the CUDA grid / threads
672 par.store("xDim",xDim);
673 par.store("yDim",yDim);
674 par.store("zDim",zDim);
675 par.store("dimnum", 3);
680 if (xDim <= max_threads){
692 while (dim_tmp > max_threads){
697 std::cout << "count is: " << count << '\n';
707 std::cout << "threads in x are: " << block.x << '\n';
708 std::cout << "dimensions are: " << xD << '\t' << yD << '\t' << zD << '\n';
714 if (grid.x != par.grid.x || block.x != par.threads.x ||
715 grid.y != par.grid.y || block.y != par.threads.y ||
716 grid.z != par.grid.z || block.z != par.threads.z){
717 std::cout << "Gridding test 3D failed! Improper generation of threads!"
719 assert(grid.x == par.grid.x);
720 assert(grid.y == par.grid.y);
721 assert(grid.z == par.grid.z);
722 assert(block.x == par.threads.x);
723 assert(block.y == par.threads.y);
724 assert(block.z == par.threads.z);
728 int total_threads = block.x * block.y * block.z;
730 // Now we need to initialize our double * and send it to the gpu
731 double *host_array, *device_array;
732 host_array = (double *) malloc(sizeof(double)*gsize);
733 cudaMalloc((void**) &device_array, sizeof(double)*gsize);
735 // initializing 2d array
736 for (int i = 0; i < gsize; i++){
740 // Now to copy to device
741 cudaMemcpy(device_array, host_array,
742 sizeof(double)*gsize,
743 cudaMemcpyHostToDevice);
746 thread_test<<<grid,block>>>(device_array,device_array);
748 // Now to copy back and print
749 cudaMemcpy(host_array, device_array,
750 sizeof(double)*gsize,
751 cudaMemcpyDeviceToHost);
754 for (int i = 0; i < xDim; ++i){
755 for (int j = 0; j < yDim; ++j){
756 for (int k = 0; k < zDim; ++k){
757 int index = i*yDim*zDim + j*yDim + k;
758 if (host_array[index] != index){
759 std::cout << "Threadding values improperly set!\n";
760 assert(host_array[index] == index);
766 std::cout << "3d grid tests completed. now for 3d cases" << '\n';
770 // Test of the parSum function in 3d
773 std::cout << "Beginning test of parallel summation.\n";
778 // first, we need to initialize the Grid and Cuda classes
783 // For now, we will assume an 64x64 array for summing
784 dim3 threads(16, 1, 1);
785 int total_threads = threads.x*threads.y*threads.z;
791 par.store("dimnum", 2);
792 par.store("xDim", 64);
793 par.store("yDim", 64);
794 par.store("zDim", 1);
798 par.threads = threads;
800 // Now we need to initialize the grid for the getGid3d3d kernel
808 // now we need to initialize the wfc to all 1's;
809 double2 *wfc, *host_sum;
810 wfc = (cufftDoubleComplex *) malloc(sizeof(cufftDoubleComplex) * gsize);
811 host_sum = (cufftDoubleComplex *)
812 malloc(sizeof(cufftDoubleComplex) * gsize / total_threads);
815 for (int i = 0; i < gsize; i++){
821 cudaMalloc((void**) &gpu_wfc, sizeof(cufftDoubleComplex)*gsize);
823 // copying wfc to device
824 err = cudaMemcpy(gpu_wfc, wfc, sizeof(cufftDoubleComplex)*gsize,
825 cudaMemcpyHostToDevice);
827 if (err!=cudaSuccess){
828 std::cout << "ERROR: Could not copy wfc to device!" << '\n';
831 // Creating parsum on device
833 parSum(gpu_wfc, par);
835 // copying parsum back
836 err = cudaMemcpy(wfc, gpu_wfc,
837 sizeof(cufftDoubleComplex)*gsize,
838 cudaMemcpyDeviceToHost);
839 if (err!=cudaSuccess){
840 std::cout << err << '\n';
841 std::cout << "ERROR: Could not copy wfc to the host!" << '\n';
845 for (int i = 0; i < gsize; ++i){
846 if (wfc[i].x != 2/sqrt(32768.0*dx*dy) ||
847 wfc[i].y != 2/sqrt(32768.0*dx*dy)){
848 std::cout << "Wavefunction not normalized!" << '\n';
849 std::cout << i << '\t' << wfc[i].x << '\t' << wfc[i].y << '\n';
850 assert(wfc[i].x == 2/sqrt(32768.0*dx*dy));
851 assert(wfc[i].y == 2/sqrt(32768.0*dx*dy));
855 // Now for the 3d case
856 // For now, we will assume a 16x16x16 array for summing
857 par.store("dimnum", 3);
858 par.store("xDim", 16);
859 par.store("yDim", 16);
860 par.store("zDim", 16);
865 // Now we need to initialize the grid for the getGid3d3d kernel
872 // copying host wfc back to device
873 err = cudaMemcpy(gpu_wfc, wfc, sizeof(cufftDoubleComplex)*gsize,
874 cudaMemcpyHostToDevice);
876 parSum(gpu_wfc, par);
878 // copying parsum back
879 err = cudaMemcpy(wfc, gpu_wfc,
880 sizeof(cufftDoubleComplex)*gsize,
881 cudaMemcpyDeviceToHost);
882 if (err!=cudaSuccess){
883 std::cout << "ERROR: Could not copy wfc to the host!" << '\n';
887 for (int i = 0; i < gsize; ++i){
888 if (wfc[i].x != 2/sqrt(32768.0*dx*dy*dz) ||
889 wfc[i].y != 2/sqrt(32768.0*dx*dy*dz)){
890 std::cout << "Wavefunction not normalized!" << '\n';
891 std::cout << wfc[i].x << '\t' << wfc[i].y << '\n';
892 assert(wfc[i].x == 2/sqrt(32768.0*dx*dy*dz));
893 assert(wfc[i].y == 2/sqrt(32768.0*dx*dy*dz));
897 std::cout << "Parallel summation test passed in 2 and 3D!\n";
901 // Test for the Grid structure with paramters in it
902 // Initialize all necessary variables and read them back out
903 void parameter_test(){
904 // For this test, we simply need to read in and out stuff from each
905 // class and structure in ds.h / ds.cc
907 // Certain variables will be used multiple times.
909 dstar_var = (double *)malloc(sizeof(double) * 5);
910 cufftDoubleComplex *cdc_var;
911 cdc_var = (cufftDoubleComplex *)malloc(sizeof(cufftDoubleComplex) * 5);
912 for (int i = 0; i < 5; ++i){
913 dstar_var[i] = (double)i * 0.5;
914 cdc_var[i].x = (double)i * 0.5;
915 cdc_var[i].y = (double)i * 0.5;
922 // Now testing the Grid class
924 grid_test.store("dstar_var",dstar_var);
925 grid_test.store("dvar", dvar);
926 grid_test.store("ivar", ivar);
927 grid_test.store("bvar", bvar);
929 assert(dstar_var == grid_test.dsval("dstar_var"));
930 assert(dvar == grid_test.dval("dvar"));
931 assert(ivar == grid_test.ival("ivar"));
932 assert(bvar == grid_test.bval("bvar"));
934 std::cout << "Grid class checked, now checking the Cuda class..." << '\n';
936 std::cout << "All data structures checked" << '\n';
940 // Test for the parsing function
943 // Testing the command-line parser with defaults and with modifications
944 std::cout << "Testing command-line parser with no arguments..." << '\n';
946 // First testing default values in and out of the parser function
947 char *fake_noargv[] = {NULL};
949 noarg_grid = parseArgs(0,fake_noargv);
951 // Checking contents of noarg_grid:
952 assert(noarg_grid.ival("xDim") == 256);
953 assert(noarg_grid.ival("yDim") == 256);
954 assert(noarg_grid.ival("zDim") == 256);
955 assert(noarg_grid.dval("omega") == 0);
956 assert(noarg_grid.dval("gammaY") == 1.0);
957 assert(noarg_grid.ival("gsteps") == 1);
958 assert(noarg_grid.ival("esteps") == 1);
959 assert(noarg_grid.dval("gdt") == 1e-4);
960 assert(noarg_grid.dval("dt") == 1e-4);
961 assert(noarg_grid.ival("device") == 0);
962 assert(noarg_grid.ival("atoms") == 1);
963 assert(noarg_grid.bval("read_wfc") == false);
964 assert(noarg_grid.ival("printSteps") == 100);
965 assert(noarg_grid.dval("winding") == 0);
966 assert(noarg_grid.bval("corotating") == false);
967 assert(noarg_grid.bval("gpe") == false);
968 assert(noarg_grid.dval("omegaZ") == 6.283);
969 assert(noarg_grid.dval("interaction") == 1);
970 assert(noarg_grid.dval("laser_power") == 0);
971 assert(noarg_grid.dval("angle_sweep") == 0);
972 assert(noarg_grid.ival("kick_it") == 0);
973 assert(noarg_grid.bval("write_it") == false);
974 assert(noarg_grid.dval("x0_shift") == 0);
975 assert(noarg_grid.dval("y0_shift") == 0);
976 assert(noarg_grid.dval("z0_shift") == 0);
977 assert(noarg_grid.dval("sepMinEpsilon") == 0);
978 assert(noarg_grid.bval("graph") == false);
979 assert(noarg_grid.bval("unit_test") == false);
980 assert(noarg_grid.dval("omegaX") == 6.283);
981 assert(noarg_grid.dval("omegaY") == 6.283);
982 assert(noarg_grid.sval("data_dir") == "data/");
983 assert(noarg_grid.bval("ramp") == false);
984 assert(noarg_grid.ival("ramp_type") == 1);
985 assert(noarg_grid.ival("dimnum") == 2);
986 assert(noarg_grid.bval("write_file") == true);
987 assert(noarg_grid.dval("fudge") == 1.0);
988 assert(noarg_grid.ival("kill_idx") == -1);
989 assert(noarg_grid.dval("mask_2d") == 1.5e-4);
990 assert(noarg_grid.dval("box_size") == 2.5e-5);
991 assert(noarg_grid.bval("found_sobel") == false);
992 assert(noarg_grid.Afn == "rotation");
993 assert(noarg_grid.Kfn == "rotation_K");
994 assert(noarg_grid.Vfn == "2d");
995 assert(noarg_grid.Wfcfn == "2d");
996 assert(noarg_grid.sval("conv_type") == "FFT");
997 assert(noarg_grid.ival("charge") == 0);
998 assert(noarg_grid.bval("flip") == false);
1000 // Now testing all values specified by command-line arguments
1001 std::cout << "Testing command-line parser with all arguments..." << '\n';
1002 std::vector<std::string> argarray(10);
1004 // I apologize for the mess... If you have a better way of creating the
1005 // char ** for this without running into memory issues, let me know!
1006 char *fake_fullargv[] = {strdup("./gpue"),
1007 strdup("-A"), strdup("rotation"),
1009 strdup("-b"), strdup("2.5e-5"),
1010 strdup("-C"), strdup("0"),
1011 strdup("-c"), strdup("3"),
1012 strdup("-D"), strdup("data"),
1014 strdup("-e"), strdup("1"),
1016 strdup("-G"), strdup("1"),
1017 strdup("-g"), strdup("1"),
1018 strdup("-i"), strdup("1"),
1019 strdup("-K"), strdup("0"),
1020 strdup("-k"), strdup("0"),
1021 strdup("-L"), strdup("0"),
1023 strdup("-n"), strdup("1"),
1024 strdup("-O"), strdup("0"),
1025 strdup("-P"), strdup("0"),
1026 strdup("-p"), strdup("100"),
1027 strdup("-Q"), strdup("0"),
1028 strdup("-q"), strdup("0"),
1029 strdup("-R"), strdup("1"),
1031 strdup("-S"), strdup("0"),
1033 strdup("-T"), strdup("1e-4"),
1034 strdup("-t"), strdup("1e-4"),
1035 strdup("-U"), strdup("0"),
1036 strdup("-V"), strdup("0"),
1038 strdup("-w"), strdup("0"),
1039 strdup("-X"), strdup("1.0"),
1040 strdup("-x"), strdup("256"),
1041 strdup("-Y"), strdup("1.0"),
1042 strdup("-y"), strdup("256"),
1043 strdup("-Z"), strdup("6.283"),
1044 strdup("-z"), strdup("256"),
1046 int fake_argc = sizeof(fake_fullargv) / sizeof(char *) - 1;
1048 // Now to read into gpue and see what happens
1050 fullarg_grid = parseArgs(fake_argc, fake_fullargv);
1052 // Checking contents of fullarg_grid:
1053 assert(fullarg_grid.ival("xDim") == 256);
1054 assert(fullarg_grid.ival("yDim") == 256);
1055 assert(fullarg_grid.ival("zDim") == 256);
1056 assert(fullarg_grid.dval("omega") == 0);
1057 assert(fullarg_grid.dval("gammaY") == 1.0);
1058 assert(fullarg_grid.ival("gsteps") == 1);
1059 assert(fullarg_grid.ival("esteps") == 1);
1060 assert(fullarg_grid.dval("gdt") == 1e-4);
1061 assert(fullarg_grid.dval("dt") == 1e-4);
1062 assert(fullarg_grid.ival("device") == 0);
1063 assert(fullarg_grid.ival("atoms") == 1);
1064 assert(fullarg_grid.bval("read_wfc") == false);
1065 assert(fullarg_grid.ival("printSteps") == 100);
1066 assert(fullarg_grid.dval("winding") == 0);
1067 assert(fullarg_grid.bval("corotating") == true);
1068 assert(fullarg_grid.bval("gpe") == true);
1069 assert(fullarg_grid.dval("omegaZ") == 6.283);
1070 assert(fullarg_grid.dval("interaction") == 1);
1071 assert(fullarg_grid.dval("laser_power") == 0);
1072 assert(fullarg_grid.dval("angle_sweep") == 0);
1073 assert(fullarg_grid.ival("kick_it") == 0);
1074 assert(fullarg_grid.bval("write_it") == true);
1075 assert(fullarg_grid.dval("x0_shift") == 0);
1076 assert(fullarg_grid.dval("y0_shift") == 0);
1077 assert(fullarg_grid.dval("z0_shift") == 0);
1078 assert(fullarg_grid.dval("sepMinEpsilon") == 0);
1079 assert(fullarg_grid.bval("graph") == true);
1080 assert(fullarg_grid.bval("unit_test") == false);
1081 assert(fullarg_grid.dval("omegaX") == 1.0);
1082 assert(fullarg_grid.dval("omegaY") == 1.0);
1083 assert(fullarg_grid.sval("data_dir") == "data/");
1084 assert(fullarg_grid.bval("ramp") == true);
1085 assert(fullarg_grid.ival("ramp_type") == 1);
1086 assert(fullarg_grid.ival("dimnum") == 3);
1087 assert(fullarg_grid.bval("write_file") == false);
1088 assert(fullarg_grid.dval("fudge") == 1.0);
1089 assert(fullarg_grid.ival("kill_idx") == 0);
1090 assert(fullarg_grid.dval("mask_2d") == 1.5e-4);
1091 assert(fullarg_grid.dval("box_size") == 2.5e-5);
1092 assert(fullarg_grid.bval("found_sobel") == false);
1093 assert(fullarg_grid.Afn == "rotation");
1094 assert(fullarg_grid.Kfn == "rotation_K3d");
1095 assert(fullarg_grid.Vfn == "3d");
1096 assert(fullarg_grid.Wfcfn == "3d");
1097 assert(fullarg_grid.sval("conv_type") == "FFT");
1098 assert(fullarg_grid.ival("charge") == 0);
1099 assert(fullarg_grid.bval("flip") == true);
1103 // Testing the evolve function in evolution.cu
1104 // This test will also test the energy function
1105 // Run the simulation in imaginary time for a simple harmonic oscillator and
1106 // check energies in 1, 2, and 3D.
1107 // Run the simulation in real time to make sure the energy remains the same
1110 std::cout << "Starting test of evolution function in nD...\n";
1112 // Setting default values
1115 par.store("omega", 0.0);
1116 par.store("gammaY", 1.0);
1117 par.store("device", 0);
1118 par.store("read_wfc", false);
1119 par.store("winding", 0.0);
1120 par.store("corotating", false);
1121 par.store("gpe", false);
1122 par.store("interaction",1.0);
1123 par.store("laser_power",0.0);
1124 par.store("angle_sweep",0.0);
1125 par.store("kick_it", 0);
1126 par.store("x0_shift",0.0);
1127 par.store("y0_shift",0.0);
1128 par.store("z0_shift",0.0);
1129 par.store("sepMinEpsilon",0.0);
1130 par.store("graph", false);
1131 par.store("unit_test",false);
1132 par.store("data_dir", (std::string)"data/");
1133 par.store("ramp", false);
1134 par.store("ramp_type", 1);
1135 par.store("dimnum", 2);
1136 par.store("fudge", 0.0);
1137 par.store("kill_idx", -1);
1138 par.store("mask_2d", 1.5e-4);
1139 par.store("found_sobel", false);
1140 par.store("use_param_file", false);
1141 par.store("param_file","param.cfg");
1142 par.store("cyl_coord",false);
1143 par.Afn = "rotation";
1144 par.Kfn = "rotation_K";
1147 par.store("conv_type", (std::string)"FFT");
1148 par.store("charge", 0);
1149 par.store("flip", false);
1150 par.store("thresh_const", 1.0);
1153 double thresh = 0.01;
1158 par.store("gdt", 1e-4);
1159 par.store("dt", 1e-4);
1160 par.store("atoms", 1);
1161 par.store("omegaZ", 1.0);
1162 par.store("omegaX", 1.0);
1163 par.store("omegaY", 1.0);
1164 par.store("esteps", esteps);
1165 par.store("gsteps", gsteps);
1166 par.store("printSteps", 30000);
1167 par.store("write_file", true);
1168 par.store("write_it", true);
1169 par.store("energy_calc", true);
1170 par.store("box_size", 0.00007);
1171 par.store("xDim", 64);
1172 par.store("yDim", 1);
1173 par.store("zDim", 1);
1176 // Running through all the dimensions to check the energy
1177 for (int i = 1; i <= 3; ++i){
1179 par.store("yDim", 64);
1182 par.store("zDim", 64);
1184 par.store("dimnum",i);
1187 if (par.bval("write_file")){
1188 FileIO::writeOutParam(buffer, par, "data/Params.dat");
1191 double omegaX = par.dval("omegaX");
1192 set_variables(par, 0);
1194 // Evolve and find the energy
1195 evolve(par, gsteps, 0, buffer);
1197 // Check that the energy is correct
1198 double energy = par.dval("energy");
1199 double energy_check = 0;
1200 energy_check = (double)i * 0.5 * HBAR * omegaX;
1202 if (abs(energy - energy_check) > thresh*energy_check){
1203 std::cout << "Energy is not correct in imaginary-time for "
1205 assert(energy == energy_check);
1208 // Run in real time to make sure that the energy is constant
1209 set_variables(par, 1);
1210 evolve(par, esteps, 1, buffer);
1211 double energy_ev = par.dval("energy");
1213 if (abs(energy - energy_ev) > thresh*energy_check){
1214 std::cout << "Energy is not constant in real-time for "
1216 assert(energy == energy_ev);
1222 __global__ void make_complex_kernel(double *in, int *evolution_type,
1225 //int id = threadIdx.x + blockIdx.x*blockDim.x;
1226 //out[id] = make_complex(in[id], evolution_type[id]);
1227 for (int i = 0; i < 3; ++i){
1228 out[i] = make_complex(in[i], evolution_type[i]);
1232 void make_complex_test(){
1234 // Creating a simple array to hold the 3 possible make_complex options
1235 double *input_array, *dinput_array;
1236 double2 *output_array, *doutput_array;
1237 int *evolution_type, *devolution_type;
1239 input_array = (double *)malloc(sizeof(double)*3);
1240 output_array = (double2 *)malloc(sizeof(double2)*3);
1241 evolution_type = (int *)malloc(sizeof(int)*3);
1243 input_array[0] = 10;
1244 input_array[1] = 10;
1245 input_array[2] = 10;
1247 evolution_type[0] = 0;
1248 evolution_type[1] = 1;
1249 evolution_type[2] = 2;
1251 cudaMalloc((void **)&dinput_array, sizeof(double)*3);
1252 cudaMalloc((void **)&doutput_array, sizeof(double2)*3);
1253 cudaMalloc((void **)&devolution_type, sizeof(int)*3);
1255 cudaMemcpy(dinput_array, input_array, sizeof(double)*3,
1256 cudaMemcpyHostToDevice);
1257 cudaMemcpy(devolution_type, evolution_type, sizeof(int)*3,
1258 cudaMemcpyHostToDevice);
1260 dim3 threads = {1,1,1};
1261 dim3 grid = {1,1,1};
1263 make_complex_kernel<<<1,1>>>(dinput_array, devolution_type,
1265 cudaDeviceSynchronize();
1267 cudaMemcpy(output_array, doutput_array, sizeof(double2)*3,
1268 cudaMemcpyDeviceToHost);
1271 double thresh = 0.000001;
1273 if (abs(output_array[0].x - input_array[0]) > thresh ||
1274 (output_array[0].y) > thresh){
1275 std::cout << "failed 1\n";
1278 if (abs(output_array[1].x - exp(-input_array[1])) > thresh ||
1279 abs(output_array[1].y) > thresh){
1280 std::cout << "failed 2\n";
1283 if (abs(output_array[2].x - cos(-input_array[2])) > thresh ||
1284 abs(output_array[2].y - sin(-input_array[2])) > thresh){
1285 std::cout << "failed 3\n";
1290 std::cout << "make_complex test passed!\n";
1293 std::cout << "make_complex test failed!\n";
1299 void cMultPhi_test(){
1300 // first, we are creating a double2 array to work with
1304 double2 *din1, *dout;
1307 in1 = (double2 *)malloc(sizeof(double2)*n);
1308 in2 = (double *)malloc(sizeof(double)*n);
1309 out = (double2 *)malloc(sizeof(double2)*n);
1311 cudaMalloc((void **)&din1, sizeof(double2)*n);
1312 cudaMalloc((void **)&din2, sizeof(double)*n);
1313 cudaMalloc((void **)&dout, sizeof(double2)*n);
1315 for (int i = 0; i < n; ++i){
1321 cudaMemcpy(din1, in1, sizeof(double2)*n, cudaMemcpyHostToDevice);
1322 cudaMemcpy(din2, in2, sizeof(double)*n, cudaMemcpyHostToDevice);
1324 cMultPhi<<<1,n>>>(din1, din2, dout);
1325 cudaDeviceSynchronize();
1327 cudaMemcpy(out, dout, sizeof(double2)*n, cudaMemcpyDeviceToHost);
1329 double thresh = 0.000001;
1331 for (int i = 0; i < n; ++i){
1332 if (abs(out[i].x-cos(in2[i])*in1[i].x-in1[i].y*sin(in2[i])) < thresh ||
1333 abs(out[i].y-in1[i].x*sin(in2[i])+in1[i].y*cos(in2[i])) < thresh){
1339 std::cout << "cMultPhi test passed!\n";
1342 std::cout << "cMultPhi test failed!\n";