1 /*-------------vortex_3d.cu---------------------------------------------------//
3 * Purpose: This file intends to perform a convolution of 3d data for vortex
4 * recognition in the GPUE code
6 * Notes: We will be using the window method for convolutions because it has
7 * a slightly better complxity case for a separable filter
8 * (which the Sobel filter definitely is!)
10 *-----------------------------------------------------------------------------*/
13 #include "../include/ds.h"
14 #include "../include/vortex_3d.h"
15 #include "../include/split_op.h"
16 #include "../include/kernels.h"
18 //We will need a few functions to deal with vortex skeletons
19 pos **find_skeletons(double* edges);
21 // Function to find the sobel operators and transfer to GPU
22 void find_sobel(Grid &par){
24 std::string conv_type = par.sval("conv_type");
27 // There will be two cases to take into account here, one for fft
28 // convolution and another for window
32 if (conv_type == "FFT"){
33 double2 *sobel_x, *sobel_y, *sobel_z;
34 xDim = par.ival("xDim");
35 yDim = par.ival("yDim");
36 zDim = par.ival("zDim");
38 sobel_x = (double2 *) malloc(sizeof(double2) *xDim*yDim*zDim);
39 sobel_y = (double2 *) malloc(sizeof(double2) *xDim*yDim*zDim);
40 sobel_z = (double2 *) malloc(sizeof(double2) *xDim*yDim*zDim);
42 // Now let's go ahead and pad these guys with 0's
43 for (int i = 0; i < xDim; ++i){
44 for (int j = 0; j < yDim; ++j){
45 for (int k = 0; k < zDim; ++k){
46 index = k + j * xDim + i * yDim * zDim;
54 // There is clearly a better way to do this with matrix multiplication
55 // the sobel operator is separable, so we just need to mix
56 // Gradient and triangle filters, check:
57 // https://en.wikipedia.org/wiki/Sobel_operator
61 for (int i = 0; i < 3; ++i){
62 for (int j = 0; j < 3; ++j){
63 for (int k = 0; k < 3; ++k){
64 index = k + zDim*j + zDim*yDim*i;
74 if (i == 0 && j == 0){
75 sobel_z[index].x = factor * 1;
77 if (i == 0 && j == 1){
78 sobel_z[index].x = factor * 2;
80 if (i == 0 && j == 2){
81 sobel_z[index].x = factor * 1;
83 if (i == 1 && j == 0){
84 sobel_z[index].x = factor * 2;
86 if (i == 1 && j == 1){
87 sobel_z[index].x = factor * 4;
89 if (i == 1 && j == 2){
90 sobel_z[index].x = factor * 2;
92 if (i == 2 && j == 0){
93 sobel_z[index].x = factor * 1;
95 if (i == 2 && j == 1){
96 sobel_z[index].x = factor * 2;
98 if (i == 2 && j == 2){
99 sobel_z[index].x = factor * 1;
106 for (int i = 0; i < 3; ++i){
107 for (int j = 0; j < 3; ++j){
108 for (int k = 0; k < 3; ++k){
109 index = k + zDim*j + zDim*yDim*i;
119 if (i == 0 && k == 0){
120 sobel_y[index].x = factor * 1;
122 if (i == 0 && k == 1){
123 sobel_y[index].x = factor * 2;
125 if (i == 0 && k == 2){
126 sobel_y[index].x = factor * 1;
128 if (i == 1 && k == 0){
129 sobel_y[index].x = factor * 2;
131 if (i == 1 && k == 1){
132 sobel_y[index].x = factor * 4;
134 if (i == 1 && k == 2){
135 sobel_y[index].x = factor * 2;
137 if (i == 2 && k == 0){
138 sobel_y[index].x = factor * 1;
140 if (i == 2 && k == 1){
141 sobel_y[index].x = factor * 2;
143 if (i == 2 && k == 2){
144 sobel_y[index].x = factor * 1;
151 for (int i = 0; i < 3; ++i){
152 for (int j = 0; j < 3; ++j){
153 for (int k = 0; k < 3; ++k){
154 index = k + zDim*j + zDim*yDim*i;
164 if (k == 0 && j == 0){
165 sobel_x[index].x = factor * 1;
167 if (k == 0 && j == 1){
168 sobel_x[index].x = factor * 2;
170 if (k == 0 && j == 2){
171 sobel_x[index].x = factor * 1;
173 if (k == 1 && j == 0){
174 sobel_x[index].x = factor * 2;
176 if (k == 1 && j == 1){
177 sobel_x[index].x = factor * 4;
179 if (k == 1 && j == 2){
180 sobel_x[index].x = factor * 2;
182 if (k == 2 && j == 0){
183 sobel_x[index].x = factor * 1;
185 if (k == 2 && j == 1){
186 sobel_x[index].x = factor * 2;
188 if (k == 2 && j == 2){
189 sobel_x[index].x = factor * 1;
196 par.store("sobel_x", sobel_x);
197 par.store("sobel_y", sobel_y);
198 par.store("sobel_z", sobel_z);
202 double *sobel_x, *sobel_y, *sobel_z;
207 sobel_x = (double *) malloc(sizeof(double) *27);
208 sobel_y = (double *) malloc(sizeof(double) *27);
209 sobel_z = (double *) malloc(sizeof(double) *27);
211 // There is clearly a better way to do this with matrix multiplication
212 // the sobel operator is separable, so we just need to mix
213 // Gradient and triangle filters, check:
214 // https://en.wikipedia.org/wiki/Sobel_operator
218 for (int i = 0; i < 3; ++i){
219 for (int j = 0; j < 3; ++j){
220 for (int k = 0; k < 3; ++k){
221 index = k + 3*j + 9*i;
231 if (i == 0 && j == 0){
232 sobel_z[index] = factor * 1;
234 if (i == 0 && j == 1){
235 sobel_z[index] = factor * 2;
237 if (i == 0 && j == 2){
238 sobel_z[index] = factor * 1;
240 if (i == 1 && j == 0){
241 sobel_z[index] = factor * 2;
243 if (i == 1 && j == 1){
244 sobel_z[index] = factor * 4;
246 if (i == 1 && j == 2){
247 sobel_z[index] = factor * 2;
249 if (i == 2 && j == 0){
250 sobel_z[index] = factor * 1;
252 if (i == 2 && j == 1){
253 sobel_z[index] = factor * 2;
255 if (i == 2 && j == 2){
256 sobel_z[index] = factor * 1;
263 for (int i = 0; i < 3; ++i){
264 for (int j = 0; j < 3; ++j){
265 for (int k = 0; k < 3; ++k){
266 index = k + 3*j + 9*i;
276 if (i == 0 && k == 0){
277 sobel_y[index] = factor * 1;
279 if (i == 0 && k == 1){
280 sobel_y[index] = factor * 2;
282 if (i == 0 && k == 2){
283 sobel_y[index] = factor * 1;
285 if (i == 1 && k == 0){
286 sobel_y[index] = factor * 2;
288 if (i == 1 && k == 1){
289 sobel_y[index] = factor * 4;
291 if (i == 1 && k == 2){
292 sobel_y[index] = factor * 2;
294 if (i == 2 && k == 0){
295 sobel_y[index] = factor * 1;
297 if (i == 2 && k == 1){
298 sobel_y[index] = factor * 2;
300 if (i == 2 && k == 2){
301 sobel_y[index] = factor * 1;
308 for (int i = 0; i < 3; ++i){
309 for (int j = 0; j < 3; ++j){
310 for (int k = 0; k < 3; ++k){
311 index = k + 3*j + 9*i;
321 if (k == 0 && j == 0){
322 sobel_x[index] = factor * 1;
324 if (k == 0 && j == 1){
325 sobel_x[index] = factor * 2;
327 if (k == 0 && j == 2){
328 sobel_x[index] = factor * 1;
330 if (k == 1 && j == 0){
331 sobel_x[index] = factor * 2;
333 if (k == 1 && j == 1){
334 sobel_x[index] = factor * 4;
336 if (k == 1 && j == 2){
337 sobel_x[index] = factor * 2;
339 if (k == 2 && j == 0){
340 sobel_x[index] = factor * 1;
342 if (k == 2 && j == 1){
343 sobel_x[index] = factor * 2;
345 if (k == 2 && j == 2){
346 sobel_x[index] = factor * 1;
354 par.store("sobel_x", sobel_x);
355 par.store("sobel_y", sobel_y);
356 par.store("sobel_z", sobel_z);
361 par.store("found_sobel", true);
364 void find_sobel_2d(Grid &par){
366 std::string conv_type = par.sval("conv_type");
367 int xDim, yDim, zDim;
369 // There will be two cases to take into account here, one for fft
370 // convolution and another for window
374 if (conv_type == "FFT"){
375 double2 *sobel_x, *sobel_y, *sobel_z;
376 xDim = par.ival("xDim");
377 yDim = par.ival("yDim");
379 sobel_x = (double2 *) malloc(sizeof(double2) *xDim*yDim);
380 sobel_y = (double2 *) malloc(sizeof(double2) *xDim*yDim);
382 // Now let's go ahead and pad these guys with 0's
383 for (int i = 0; i < xDim; ++i){
384 for (int j = 0; j < yDim; ++j){
385 index = j + i * yDim;
386 sobel_x[index].x = 0;
387 sobel_y[index].x = 0;
391 // There is clearly a better way to do this with matrix multiplication
392 // the sobel operator is separable, so we just need to mix
393 // Gradient and triangle filters, check:
394 // https://en.wikipedia.org/wiki/Sobel_operator
398 for (int i = 0; i < 3; ++i){
399 for (int j = 0; j < 3; ++j){
410 if (j == 0 && i == 0){
411 sobel_y[index].x = factor * 1;
413 if (j == 0 && i == 1){
414 sobel_y[index].x = factor * 2;
416 if (j == 0 && i == 2){
417 sobel_y[index].x = factor * 1;
419 if (j == 2 && i == 0){
420 sobel_y[index].x = factor * 1;
422 if (j == 2 && i == 1){
423 sobel_y[index].x = factor * 2;
425 if (j == 2 && i == 2){
426 sobel_y[index].x = factor * 1;
432 for (int i = 0; i < 3; ++i){
433 for (int j = 0; j < 3; ++j){
444 if (i == 0 && j == 0){
445 sobel_x[index].x = factor * 1;
447 if (i == 0 && j == 1){
448 sobel_x[index].x = factor * 2;
450 if (i == 0 && j == 2){
451 sobel_x[index].x = factor * 1;
453 if (i == 2 && j == 0){
454 sobel_x[index].x = factor * 1;
456 if (i == 2 && j == 1){
457 sobel_x[index].x = factor * 2;
459 if (i == 2 && j == 2){
460 sobel_x[index].x = factor * 1;
466 par.store("sobel_x", sobel_x);
467 par.store("sobel_y", sobel_y);
468 par.store("sobel_z", sobel_z);
472 double *sobel_x, *sobel_y, *sobel_z;
476 sobel_x = (double *) malloc(sizeof(double) *9);
477 sobel_y = (double *) malloc(sizeof(double) *9);
479 // There is clearly a better way to do this with matrix multiplication
480 // the sobel operator is separable, so we just need to mix
481 // Gradient and triangle filters, check:
482 // https://en.wikipedia.org/wiki/Sobel_operator
486 for (int i = 0; i < 3; ++i){
487 for (int j = 0; j < 3; ++j){
498 if (j == 0 && i == 0){
499 sobel_y[index] = factor * 1;
501 if (j == 0 && i == 1){
502 sobel_y[index] = factor * 2;
504 if (j == 0 && i == 2){
505 sobel_y[index] = factor * 1;
507 if (j == 2 && i == 0){
508 sobel_y[index] = factor * 1;
510 if (j == 2 && i == 1){
511 sobel_y[index] = factor * 2;
513 if (j == 2 && i == 2){
514 sobel_y[index] = factor * 1;
520 for (int i = 0; i < 3; ++i){
521 for (int j = 0; j < 3; ++j){
532 if (i == 0 && j == 0){
533 sobel_x[index] = factor * 1;
535 if (i == 0 && j == 1){
536 sobel_x[index] = factor * 2;
538 if (i == 0 && j == 2){
539 sobel_x[index] = factor * 1;
541 if (i == 2 && j == 0){
542 sobel_x[index] = factor * 1;
544 if (i == 2 && j == 1){
545 sobel_x[index] = factor * 2;
547 if (i == 2 && j == 2){
548 sobel_x[index] = factor * 1;
555 par.store("sobel_x", sobel_x);
556 par.store("sobel_y", sobel_y);
557 par.store("sobel_z", sobel_z);
564 // Function to transfer 3d sobel operators for non-fft convolution
565 void transfer_sobel(Grid &par){
567 // Grabbing necessary parameters
568 int dimnum = par.ival("dimnum");
569 int xDim = par.ival("xDim");
570 int yDim = par.ival("yDim");
571 int zDim = par.ival("zDim");
572 int gSize = xDim * yDim * zDim;
573 std::string conv_type = par.sval("conv_type");
575 double2 *sobel_x_gpu, *sobel_y_gpu, *sobel_z_gpu;
579 plan_nd = par.ival("plan_2d");
581 else if (dimnum == 3){
582 plan_nd = par.ival("plan_3d");
585 // creating space on device for 2 separate cases
586 // Note that in the case of the FFT, the Sobel operators will be double2's
587 // while in the case of the window transform, they will be double
588 if (conv_type == "FFT"){
589 double2 *sobel_x = par.cufftDoubleComplexval("sobel_x");
590 double2 *sobel_y = par.cufftDoubleComplexval("sobel_y");
591 double2 *sobel_z = par.cufftDoubleComplexval("sobel_z");
592 cudaMalloc((void**) &sobel_x_gpu, sizeof(double2) *gSize);
593 cudaMalloc((void**) &sobel_y_gpu, sizeof(double2) *gSize);
595 cudaMalloc((void**) &sobel_z_gpu, sizeof(double2) *gSize);
598 // Transferring to device
602 err = cudaMemcpy(sobel_x_gpu, sobel_x, sizeof(double2)*gSize,
603 cudaMemcpyHostToDevice);
604 if (err != cudaSuccess){
605 std::cout << "ERROR: Could not copy sobel_x to device!" << '\n';
610 err = cudaMemcpy(sobel_y_gpu, sobel_y, sizeof(double2)*gSize,
611 cudaMemcpyHostToDevice);
612 if (err != cudaSuccess){
613 std::cout << "ERROR: Could not copy sobel_y to device!" << '\n';
619 err = cudaMemcpy(sobel_z_gpu, sobel_z, sizeof(double2)*gSize,
620 cudaMemcpyHostToDevice);
621 if (err != cudaSuccess){
622 std::cout << "ERROR: Could not copy sobel_z to device!" << '\n';
627 // We only need the FFT's of the sobel operators. Let's generate those
628 cufftExecZ2Z(plan_nd, sobel_x_gpu, sobel_x_gpu, CUFFT_FORWARD);
629 cufftExecZ2Z(plan_nd, sobel_y_gpu, sobel_y_gpu, CUFFT_FORWARD);
631 cufftExecZ2Z(plan_nd, sobel_z_gpu, sobel_z_gpu, CUFFT_FORWARD);
634 // Storing in set of parameters
635 par.store("sobel_x_gpu", sobel_x_gpu);
636 par.store("sobel_y_gpu", sobel_y_gpu);
637 par.store("sobel_z_gpu", sobel_z_gpu);
640 double2 *sobel_x = par.cufftDoubleComplexval("sobel_x");
641 double2 *sobel_y = par.cufftDoubleComplexval("sobel_y");
642 double2 *sobel_z = par.cufftDoubleComplexval("sobel_z");
647 else if (dimnum == 3){
650 cudaMalloc((void**) &sobel_x_gpu, sizeof(double) *size);
651 cudaMalloc((void**) &sobel_y_gpu, sizeof(double) *size);
652 cudaMalloc((void**) &sobel_z_gpu, sizeof(double) *size);
653 // Transferring to device
657 err = cudaMemcpy(sobel_x_gpu, sobel_x, sizeof(double)*gSize,
658 cudaMemcpyHostToDevice);
659 if (err != cudaSuccess){
660 std::cout << "ERROR: Could not copy sobel_x to device!" << '\n';
665 err = cudaMemcpy(sobel_y_gpu, sobel_y, sizeof(double)*gSize,
666 cudaMemcpyHostToDevice);
667 if (err != cudaSuccess){
668 std::cout << "ERROR: Could not copy sobel_y to device!" << '\n';
674 err = cudaMemcpy(sobel_z_gpu, sobel_z, sizeof(double)*gSize,
675 cudaMemcpyHostToDevice);
676 if (err != cudaSuccess){
677 std::cout << "ERROR: Could not copy sobel_z to device!" << '\n';
682 // Storing in set of parameters
683 par.store("sobel_x_gpu", sobel_x_gpu);
684 par.store("sobel_y_gpu", sobel_y_gpu);
685 par.store("sobel_z_gpu", sobel_z_gpu);
689 par.store("found_sobel",true);
693 // function to transform a wavefunction to a field of edges
694 void find_edges(Grid &par,
695 double2* wfc, double* edges){
697 // for this, we simply need to take our sobel 3d sobel filter,
698 // FFT forward, multiply, FFT back.
700 int dimnum = par.ival("dimnum");
701 dim3 grid = par.grid;
702 dim3 threads = par.threads;
704 double2 *wfc_gpu = par.cufftDoubleComplexval("wfc_gpu");
705 int xDim = par.ival("xDim");
706 int yDim = par.ival("yDim");
707 int zDim = par.ival("zDim");
708 int gSize = xDim * yDim * zDim;
710 double *density_d, *edges_gpu;
711 double2 *density_d2, *gradient_x_fft, *gradient_y_fft, *gradient_z_fft;
713 // Now we need to grab the Sobel operators
714 if (par.bval("found_sobel") == false){
715 std::cout << "Finding sobel filters" << '\n';
722 cudaMalloc((void**) &density_d, sizeof(double) * gSize);
723 cudaMalloc((void**) &gradient_x_fft, sizeof(double2) * gSize);
724 cudaMalloc((void**) &gradient_y_fft, sizeof(double2) * gSize);
726 cudaMalloc((void**) &gradient_z_fft, sizeof(double2) * gSize);
728 cudaMalloc((void**) &density_d2, sizeof(double2) * gSize);
729 cudaMalloc((void**) &edges_gpu, sizeof(double) * gSize);
732 density_d = par.dsval("density_d");
733 edges_gpu = par.dsval("edges_gpu");
734 gradient_x_fft = par.cufftDoubleComplexval("gradient_x_fft");
735 gradient_y_fft = par.cufftDoubleComplexval("gradient_y_fft");
737 gradient_z_fft = par.cufftDoubleComplexval("gradient_z_fft");
739 density_d2 = par.cufftDoubleComplexval("density_d2");
742 cufftHandle plan_3d = par.ival("plan_3d");
744 // now to perform the complexMagnitudeSquared operation
745 complexMagnitudeSquared<<<grid,threads>>>(wfc_gpu, density_d);
747 // Pulling operators from find_sobel(par)
748 double2 *sobel_x_gpu = par.cufftDoubleComplexval("sobel_x_gpu");
749 double2 *sobel_y_gpu = par.cufftDoubleComplexval("sobel_y_gpu");
750 double2 *sobel_z_gpu;
752 sobel_z_gpu = par.cufftDoubleComplexval("sobel_z_gpu");
755 // This should work in principle, but the D2Z transform plays tricks
756 // Generating plan for d2z in 3d
757 //cufftHandle plan_3d2z;
758 //cufftPlan3d(&plan_3d2z, xDim, yDim, zDim, CUFFT_D2Z);
760 make_cufftDoubleComplex<<<grid, threads>>>(density_d, density_d2);
762 // Now fft forward, multiply, fft back
763 cufftExecZ2Z(plan_3d, density_d2, gradient_x_fft, CUFFT_FORWARD);
764 cufftExecZ2Z(plan_3d, density_d2, gradient_y_fft, CUFFT_FORWARD);
766 cufftExecZ2Z(plan_3d, density_d2, gradient_z_fft, CUFFT_FORWARD);
769 // Now to perform the multiplication
770 cMult<<<grid, threads>>>(gradient_x_fft, sobel_x_gpu, gradient_x_fft);
771 cMult<<<grid, threads>>>(gradient_y_fft, sobel_y_gpu, gradient_y_fft);
773 cMult<<<grid, threads>>>(gradient_z_fft, sobel_z_gpu, gradient_z_fft);
777 cufftExecZ2Z(plan_3d, gradient_x_fft, gradient_x_fft, CUFFT_INVERSE);
778 cufftExecZ2Z(plan_3d, gradient_y_fft, gradient_y_fft, CUFFT_INVERSE);
780 cufftExecZ2Z(plan_3d, gradient_z_fft, gradient_z_fft, CUFFT_INVERSE);
784 l2_norm<<<grid, threads>>>(gradient_x_fft, gradient_y_fft, edges_gpu);
786 else if (dimnum == 3){
787 l2_norm<<<grid, threads>>>(gradient_x_fft, gradient_y_fft,
788 gradient_z_fft, edges_gpu);
791 // Copying edges back
792 cudaMemcpy(edges, edges_gpu, sizeof(double) * gSize,
793 cudaMemcpyDeviceToHost);
795 // Method to find edges based on window approach -- more efficient,
796 // but difficult to implement
798 par.store("density_d", density_d);
799 par.store("density_d2", density_d2);
800 par.store("edges_gpu", edges_gpu);
801 par.store("gradient_x_fft", gradient_x_fft);
802 par.store("gradient_y_fft", gradient_y_fft);
803 par.store("gradient_z_fft", gradient_z_fft);