GPUE  v1.0
GPU Gross-Pitaevskii Equation numerical solver for Bose-Einstein condensates
vortex_3d.cu
Go to the documentation of this file.
1 /*-------------vortex_3d.cu---------------------------------------------------//
2 *
3 * Purpose: This file intends to perform a convolution of 3d data for vortex
4 * recognition in the GPUE code
5 *
6 * Notes: We will be using the window method for convolutions because it has
7 * a slightly better complxity case for a separable filter
8 * (which the Sobel filter definitely is!)
9 *
10 *-----------------------------------------------------------------------------*/
11 
12 #include <algorithm>
13 #include "../include/ds.h"
14 #include "../include/vortex_3d.h"
15 #include "../include/split_op.h"
16 #include "../include/kernels.h"
17 
18 //We will need a few functions to deal with vortex skeletons
19 pos **find_skeletons(double* edges);
20 
21 // Function to find the sobel operators and transfer to GPU
22 void find_sobel(Grid &par){
23 
24  std::string conv_type = par.sval("conv_type");
25  int xDim, yDim, zDim;
26 
27  // There will be two cases to take into account here, one for fft
28  // convolution and another for window
29 
30  int index = 0;
31 
32  if (conv_type == "FFT"){
33  double2 *sobel_x, *sobel_y, *sobel_z;
34  xDim = par.ival("xDim");
35  yDim = par.ival("yDim");
36  zDim = par.ival("zDim");
37 
38  sobel_x = (double2 *) malloc(sizeof(double2) *xDim*yDim*zDim);
39  sobel_y = (double2 *) malloc(sizeof(double2) *xDim*yDim*zDim);
40  sobel_z = (double2 *) malloc(sizeof(double2) *xDim*yDim*zDim);
41 
42  // Now let's go ahead and pad these guys with 0's
43  for (int i = 0; i < xDim; ++i){
44  for (int j = 0; j < yDim; ++j){
45  for (int k = 0; k < zDim; ++k){
46  index = k + j * xDim + i * yDim * zDim;
47  sobel_x[index].x = 0;
48  sobel_y[index].x = 0;
49  sobel_z[index].x = 0;
50  }
51  }
52  }
53 
54  // There is clearly a better way to do this with matrix multiplication
55  // the sobel operator is separable, so we just need to mix
56  // Gradient and triangle filters, check:
57  // https://en.wikipedia.org/wiki/Sobel_operator
58 
59  // Starting with S_z
60  int factor;
61  for (int i = 0; i < 3; ++i){
62  for (int j = 0; j < 3; ++j){
63  for (int k = 0; k < 3; ++k){
64  index = k + zDim*j + zDim*yDim*i;
65  if (k == 0){
66  factor = -1;
67  }
68  if (k == 1){
69  factor = 0;
70  }
71  if (k == 2){
72  factor = 1;
73  }
74  if (i == 0 && j == 0){
75  sobel_z[index].x = factor * 1;
76  }
77  if (i == 0 && j == 1){
78  sobel_z[index].x = factor * 2;
79  }
80  if (i == 0 && j == 2){
81  sobel_z[index].x = factor * 1;
82  }
83  if (i == 1 && j == 0){
84  sobel_z[index].x = factor * 2;
85  }
86  if (i == 1 && j == 1){
87  sobel_z[index].x = factor * 4;
88  }
89  if (i == 1 && j == 2){
90  sobel_z[index].x = factor * 2;
91  }
92  if (i == 2 && j == 0){
93  sobel_z[index].x = factor * 1;
94  }
95  if (i == 2 && j == 1){
96  sobel_z[index].x = factor * 2;
97  }
98  if (i == 2 && j == 2){
99  sobel_z[index].x = factor * 1;
100  }
101  }
102  }
103  }
104 
105  // S_y
106  for (int i = 0; i < 3; ++i){
107  for (int j = 0; j < 3; ++j){
108  for (int k = 0; k < 3; ++k){
109  index = k + zDim*j + zDim*yDim*i;
110  if (j == 0){
111  factor = -1;
112  }
113  if (j == 1){
114  factor = 0;
115  }
116  if (j == 2){
117  factor = 1;
118  }
119  if (i == 0 && k == 0){
120  sobel_y[index].x = factor * 1;
121  }
122  if (i == 0 && k == 1){
123  sobel_y[index].x = factor * 2;
124  }
125  if (i == 0 && k == 2){
126  sobel_y[index].x = factor * 1;
127  }
128  if (i == 1 && k == 0){
129  sobel_y[index].x = factor * 2;
130  }
131  if (i == 1 && k == 1){
132  sobel_y[index].x = factor * 4;
133  }
134  if (i == 1 && k == 2){
135  sobel_y[index].x = factor * 2;
136  }
137  if (i == 2 && k == 0){
138  sobel_y[index].x = factor * 1;
139  }
140  if (i == 2 && k == 1){
141  sobel_y[index].x = factor * 2;
142  }
143  if (i == 2 && k == 2){
144  sobel_y[index].x = factor * 1;
145  }
146  }
147  }
148  }
149 
150  // Now for S_x
151  for (int i = 0; i < 3; ++i){
152  for (int j = 0; j < 3; ++j){
153  for (int k = 0; k < 3; ++k){
154  index = k + zDim*j + zDim*yDim*i;
155  if (i == 0){
156  factor = -1;
157  }
158  if (i == 1){
159  factor = 0;
160  }
161  if (i == 2){
162  factor = 1;
163  }
164  if (k == 0 && j == 0){
165  sobel_x[index].x = factor * 1;
166  }
167  if (k == 0 && j == 1){
168  sobel_x[index].x = factor * 2;
169  }
170  if (k == 0 && j == 2){
171  sobel_x[index].x = factor * 1;
172  }
173  if (k == 1 && j == 0){
174  sobel_x[index].x = factor * 2;
175  }
176  if (k == 1 && j == 1){
177  sobel_x[index].x = factor * 4;
178  }
179  if (k == 1 && j == 2){
180  sobel_x[index].x = factor * 2;
181  }
182  if (k == 2 && j == 0){
183  sobel_x[index].x = factor * 1;
184  }
185  if (k == 2 && j == 1){
186  sobel_x[index].x = factor * 2;
187  }
188  if (k == 2 && j == 2){
189  sobel_x[index].x = factor * 1;
190  }
191  }
192  }
193  }
194 
195 
196  par.store("sobel_x", sobel_x);
197  par.store("sobel_y", sobel_y);
198  par.store("sobel_z", sobel_z);
199 
200  }
201  else{
202  double *sobel_x, *sobel_y, *sobel_z;
203  xDim = 3;
204  yDim = 3;
205  zDim = 3;
206 
207  sobel_x = (double *) malloc(sizeof(double) *27);
208  sobel_y = (double *) malloc(sizeof(double) *27);
209  sobel_z = (double *) malloc(sizeof(double) *27);
210 
211  // There is clearly a better way to do this with matrix multiplication
212  // the sobel operator is separable, so we just need to mix
213  // Gradient and triangle filters, check:
214  // https://en.wikipedia.org/wiki/Sobel_operator
215 
216  // Starting with S_z
217  int factor;
218  for (int i = 0; i < 3; ++i){
219  for (int j = 0; j < 3; ++j){
220  for (int k = 0; k < 3; ++k){
221  index = k + 3*j + 9*i;
222  if (k == 0){
223  factor = -1;
224  }
225  if (k == 1){
226  factor = 0;
227  }
228  if (k == 2){
229  factor = 1;
230  }
231  if (i == 0 && j == 0){
232  sobel_z[index] = factor * 1;
233  }
234  if (i == 0 && j == 1){
235  sobel_z[index] = factor * 2;
236  }
237  if (i == 0 && j == 2){
238  sobel_z[index] = factor * 1;
239  }
240  if (i == 1 && j == 0){
241  sobel_z[index] = factor * 2;
242  }
243  if (i == 1 && j == 1){
244  sobel_z[index] = factor * 4;
245  }
246  if (i == 1 && j == 2){
247  sobel_z[index] = factor * 2;
248  }
249  if (i == 2 && j == 0){
250  sobel_z[index] = factor * 1;
251  }
252  if (i == 2 && j == 1){
253  sobel_z[index] = factor * 2;
254  }
255  if (i == 2 && j == 2){
256  sobel_z[index] = factor * 1;
257  }
258  }
259  }
260  }
261 
262  // S_y
263  for (int i = 0; i < 3; ++i){
264  for (int j = 0; j < 3; ++j){
265  for (int k = 0; k < 3; ++k){
266  index = k + 3*j + 9*i;
267  if (j == 0){
268  factor = -1;
269  }
270  if (j == 1){
271  factor = 0;
272  }
273  if (j == 2){
274  factor = 1;
275  }
276  if (i == 0 && k == 0){
277  sobel_y[index] = factor * 1;
278  }
279  if (i == 0 && k == 1){
280  sobel_y[index] = factor * 2;
281  }
282  if (i == 0 && k == 2){
283  sobel_y[index] = factor * 1;
284  }
285  if (i == 1 && k == 0){
286  sobel_y[index] = factor * 2;
287  }
288  if (i == 1 && k == 1){
289  sobel_y[index] = factor * 4;
290  }
291  if (i == 1 && k == 2){
292  sobel_y[index] = factor * 2;
293  }
294  if (i == 2 && k == 0){
295  sobel_y[index] = factor * 1;
296  }
297  if (i == 2 && k == 1){
298  sobel_y[index] = factor * 2;
299  }
300  if (i == 2 && k == 2){
301  sobel_y[index] = factor * 1;
302  }
303  }
304  }
305  }
306 
307  // Now for S_x
308  for (int i = 0; i < 3; ++i){
309  for (int j = 0; j < 3; ++j){
310  for (int k = 0; k < 3; ++k){
311  index = k + 3*j + 9*i;
312  if (i == 0){
313  factor = -1;
314  }
315  if (i == 1){
316  factor = 0;
317  }
318  if (i == 2){
319  factor = 1;
320  }
321  if (k == 0 && j == 0){
322  sobel_x[index] = factor * 1;
323  }
324  if (k == 0 && j == 1){
325  sobel_x[index] = factor * 2;
326  }
327  if (k == 0 && j == 2){
328  sobel_x[index] = factor * 1;
329  }
330  if (k == 1 && j == 0){
331  sobel_x[index] = factor * 2;
332  }
333  if (k == 1 && j == 1){
334  sobel_x[index] = factor * 4;
335  }
336  if (k == 1 && j == 2){
337  sobel_x[index] = factor * 2;
338  }
339  if (k == 2 && j == 0){
340  sobel_x[index] = factor * 1;
341  }
342  if (k == 2 && j == 1){
343  sobel_x[index] = factor * 2;
344  }
345  if (k == 2 && j == 2){
346  sobel_x[index] = factor * 1;
347  }
348  }
349  }
350  }
351 
352 
353 
354  par.store("sobel_x", sobel_x);
355  par.store("sobel_y", sobel_y);
356  par.store("sobel_z", sobel_z);
357  }
358 
359  transfer_sobel(par);
360 
361  par.store("found_sobel", true);
362 }
363 
364 void find_sobel_2d(Grid &par){
365 
366  std::string conv_type = par.sval("conv_type");
367  int xDim, yDim, zDim;
368 
369  // There will be two cases to take into account here, one for fft
370  // convolution and another for window
371 
372  int index = 0;
373 
374  if (conv_type == "FFT"){
375  double2 *sobel_x, *sobel_y, *sobel_z;
376  xDim = par.ival("xDim");
377  yDim = par.ival("yDim");
378 
379  sobel_x = (double2 *) malloc(sizeof(double2) *xDim*yDim);
380  sobel_y = (double2 *) malloc(sizeof(double2) *xDim*yDim);
381 
382  // Now let's go ahead and pad these guys with 0's
383  for (int i = 0; i < xDim; ++i){
384  for (int j = 0; j < yDim; ++j){
385  index = j + i * yDim;
386  sobel_x[index].x = 0;
387  sobel_y[index].x = 0;
388  }
389  }
390 
391  // There is clearly a better way to do this with matrix multiplication
392  // the sobel operator is separable, so we just need to mix
393  // Gradient and triangle filters, check:
394  // https://en.wikipedia.org/wiki/Sobel_operator
395 
396  // S_y
397  int factor;
398  for (int i = 0; i < 3; ++i){
399  for (int j = 0; j < 3; ++j){
400  index = j + yDim*i;
401  if (j == 0){
402  factor = -1;
403  }
404  if (j == 1){
405  factor = 0;
406  }
407  if (j == 2){
408  factor = 1;
409  }
410  if (j == 0 && i == 0){
411  sobel_y[index].x = factor * 1;
412  }
413  if (j == 0 && i == 1){
414  sobel_y[index].x = factor * 2;
415  }
416  if (j == 0 && i == 2){
417  sobel_y[index].x = factor * 1;
418  }
419  if (j == 2 && i == 0){
420  sobel_y[index].x = factor * 1;
421  }
422  if (j == 2 && i == 1){
423  sobel_y[index].x = factor * 2;
424  }
425  if (j == 2 && i == 2){
426  sobel_y[index].x = factor * 1;
427  }
428  }
429  }
430 
431  // Now for S_x
432  for (int i = 0; i < 3; ++i){
433  for (int j = 0; j < 3; ++j){
434  index = j + yDim*i;
435  if (i == 0){
436  factor = -1;
437  }
438  if (i == 1){
439  factor = 0;
440  }
441  if (i == 2){
442  factor = 1;
443  }
444  if (i == 0 && j == 0){
445  sobel_x[index].x = factor * 1;
446  }
447  if (i == 0 && j == 1){
448  sobel_x[index].x = factor * 2;
449  }
450  if (i == 0 && j == 2){
451  sobel_x[index].x = factor * 1;
452  }
453  if (i == 2 && j == 0){
454  sobel_x[index].x = factor * 1;
455  }
456  if (i == 2 && j == 1){
457  sobel_x[index].x = factor * 2;
458  }
459  if (i == 2 && j == 2){
460  sobel_x[index].x = factor * 1;
461  }
462  }
463  }
464 
465 
466  par.store("sobel_x", sobel_x);
467  par.store("sobel_y", sobel_y);
468  par.store("sobel_z", sobel_z);
469 
470  }
471  else{
472  double *sobel_x, *sobel_y, *sobel_z;
473  xDim = 3;
474  yDim = 3;
475 
476  sobel_x = (double *) malloc(sizeof(double) *9);
477  sobel_y = (double *) malloc(sizeof(double) *9);
478 
479  // There is clearly a better way to do this with matrix multiplication
480  // the sobel operator is separable, so we just need to mix
481  // Gradient and triangle filters, check:
482  // https://en.wikipedia.org/wiki/Sobel_operator
483 
484  int factor;
485  // S_y
486  for (int i = 0; i < 3; ++i){
487  for (int j = 0; j < 3; ++j){
488  index = j + 3*i;
489  if (j == 0){
490  factor = -1;
491  }
492  if (j == 1){
493  factor = 0;
494  }
495  if (j == 2){
496  factor = 1;
497  }
498  if (j == 0 && i == 0){
499  sobel_y[index] = factor * 1;
500  }
501  if (j == 0 && i == 1){
502  sobel_y[index] = factor * 2;
503  }
504  if (j == 0 && i == 2){
505  sobel_y[index] = factor * 1;
506  }
507  if (j == 2 && i == 0){
508  sobel_y[index] = factor * 1;
509  }
510  if (j == 2 && i == 1){
511  sobel_y[index] = factor * 2;
512  }
513  if (j == 2 && i == 2){
514  sobel_y[index] = factor * 1;
515  }
516  }
517  }
518 
519  // Now for S_x
520  for (int i = 0; i < 3; ++i){
521  for (int j = 0; j < 3; ++j){
522  index = j + 3*i;
523  if (i == 0){
524  factor = -1;
525  }
526  if (i == 1){
527  factor = 0;
528  }
529  if (i == 2){
530  factor = 1;
531  }
532  if (i == 0 && j == 0){
533  sobel_x[index] = factor * 1;
534  }
535  if (i == 0 && j == 1){
536  sobel_x[index] = factor * 2;
537  }
538  if (i == 0 && j == 2){
539  sobel_x[index] = factor * 1;
540  }
541  if (i == 2 && j == 0){
542  sobel_x[index] = factor * 1;
543  }
544  if (i == 2 && j == 1){
545  sobel_x[index] = factor * 2;
546  }
547  if (i == 2 && j == 2){
548  sobel_x[index] = factor * 1;
549  }
550  }
551  }
552 
553 
554 
555  par.store("sobel_x", sobel_x);
556  par.store("sobel_y", sobel_y);
557  par.store("sobel_z", sobel_z);
558  }
559 
560  transfer_sobel(par);
561 
562 }
563 
564 // Function to transfer 3d sobel operators for non-fft convolution
565 void transfer_sobel(Grid &par){
566 
567  // Grabbing necessary parameters
568  int dimnum = par.ival("dimnum");
569  int xDim = par.ival("xDim");
570  int yDim = par.ival("yDim");
571  int zDim = par.ival("zDim");
572  int gSize = xDim * yDim * zDim;
573  std::string conv_type = par.sval("conv_type");
574 
575  double2 *sobel_x_gpu, *sobel_y_gpu, *sobel_z_gpu;
576 
577  cufftHandle plan_nd;
578  if (dimnum == 2){
579  plan_nd = par.ival("plan_2d");
580  }
581  else if (dimnum == 3){
582  plan_nd = par.ival("plan_3d");
583  }
584 
585  // creating space on device for 2 separate cases
586  // Note that in the case of the FFT, the Sobel operators will be double2's
587  // while in the case of the window transform, they will be double
588  if (conv_type == "FFT"){
589  double2 *sobel_x = par.cufftDoubleComplexval("sobel_x");
590  double2 *sobel_y = par.cufftDoubleComplexval("sobel_y");
591  double2 *sobel_z = par.cufftDoubleComplexval("sobel_z");
592  cudaMalloc((void**) &sobel_x_gpu, sizeof(double2) *gSize);
593  cudaMalloc((void**) &sobel_y_gpu, sizeof(double2) *gSize);
594  if (dimnum == 3){
595  cudaMalloc((void**) &sobel_z_gpu, sizeof(double2) *gSize);
596  }
597 
598  // Transferring to device
599  cudaError_t err;
600 
601  // Sobel_x
602  err = cudaMemcpy(sobel_x_gpu, sobel_x, sizeof(double2)*gSize,
603  cudaMemcpyHostToDevice);
604  if (err != cudaSuccess){
605  std::cout << "ERROR: Could not copy sobel_x to device!" << '\n';
606  exit(1);
607  }
608 
609  // Sobel_y
610  err = cudaMemcpy(sobel_y_gpu, sobel_y, sizeof(double2)*gSize,
611  cudaMemcpyHostToDevice);
612  if (err != cudaSuccess){
613  std::cout << "ERROR: Could not copy sobel_y to device!" << '\n';
614  exit(1);
615  }
616 
617  if (dimnum == 3){
618  // Sobel_z
619  err = cudaMemcpy(sobel_z_gpu, sobel_z, sizeof(double2)*gSize,
620  cudaMemcpyHostToDevice);
621  if (err != cudaSuccess){
622  std::cout << "ERROR: Could not copy sobel_z to device!" << '\n';
623  exit(1);
624  }
625  }
626 
627  // We only need the FFT's of the sobel operators. Let's generate those
628  cufftExecZ2Z(plan_nd, sobel_x_gpu, sobel_x_gpu, CUFFT_FORWARD);
629  cufftExecZ2Z(plan_nd, sobel_y_gpu, sobel_y_gpu, CUFFT_FORWARD);
630  if (dimnum == 3){
631  cufftExecZ2Z(plan_nd, sobel_z_gpu, sobel_z_gpu, CUFFT_FORWARD);
632  }
633 
634  // Storing in set of parameters
635  par.store("sobel_x_gpu", sobel_x_gpu);
636  par.store("sobel_y_gpu", sobel_y_gpu);
637  par.store("sobel_z_gpu", sobel_z_gpu);
638  }
639  else{
640  double2 *sobel_x = par.cufftDoubleComplexval("sobel_x");
641  double2 *sobel_y = par.cufftDoubleComplexval("sobel_y");
642  double2 *sobel_z = par.cufftDoubleComplexval("sobel_z");
643  int size;
644  if (dimnum == 2){
645  size = 9;
646  }
647  else if (dimnum == 3){
648  size = 27;
649  }
650  cudaMalloc((void**) &sobel_x_gpu, sizeof(double) *size);
651  cudaMalloc((void**) &sobel_y_gpu, sizeof(double) *size);
652  cudaMalloc((void**) &sobel_z_gpu, sizeof(double) *size);
653  // Transferring to device
654  cudaError_t err;
655 
656  // Sobel_x
657  err = cudaMemcpy(sobel_x_gpu, sobel_x, sizeof(double)*gSize,
658  cudaMemcpyHostToDevice);
659  if (err != cudaSuccess){
660  std::cout << "ERROR: Could not copy sobel_x to device!" << '\n';
661  exit(1);
662  }
663 
664  // Sobel_y
665  err = cudaMemcpy(sobel_y_gpu, sobel_y, sizeof(double)*gSize,
666  cudaMemcpyHostToDevice);
667  if (err != cudaSuccess){
668  std::cout << "ERROR: Could not copy sobel_y to device!" << '\n';
669  exit(1);
670  }
671 
672  if (dimnum == 3){
673  // Sobel_z
674  err = cudaMemcpy(sobel_z_gpu, sobel_z, sizeof(double)*gSize,
675  cudaMemcpyHostToDevice);
676  if (err != cudaSuccess){
677  std::cout << "ERROR: Could not copy sobel_z to device!" << '\n';
678  exit(1);
679  }
680  }
681 
682  // Storing in set of parameters
683  par.store("sobel_x_gpu", sobel_x_gpu);
684  par.store("sobel_y_gpu", sobel_y_gpu);
685  par.store("sobel_z_gpu", sobel_z_gpu);
686 
687  }
688 
689  par.store("found_sobel",true);
690 
691 }
692 
693 // function to transform a wavefunction to a field of edges
694 void find_edges(Grid &par,
695  double2* wfc, double* edges){
696 
697  // for this, we simply need to take our sobel 3d sobel filter,
698  // FFT forward, multiply, FFT back.
699 
700  int dimnum = par.ival("dimnum");
701  dim3 grid = par.grid;
702  dim3 threads = par.threads;
703 
704  double2 *wfc_gpu = par.cufftDoubleComplexval("wfc_gpu");
705  int xDim = par.ival("xDim");
706  int yDim = par.ival("yDim");
707  int zDim = par.ival("zDim");
708  int gSize = xDim * yDim * zDim;
709 
710  double *density_d, *edges_gpu;
711  double2 *density_d2, *gradient_x_fft, *gradient_y_fft, *gradient_z_fft;
712 
713  // Now we need to grab the Sobel operators
714  if (par.bval("found_sobel") == false){
715  std::cout << "Finding sobel filters" << '\n';
716  if (dimnum == 2){
717  find_sobel_2d(par);
718  }
719  if (dimnum == 3){
720  find_sobel(par);
721  }
722  cudaMalloc((void**) &density_d, sizeof(double) * gSize);
723  cudaMalloc((void**) &gradient_x_fft, sizeof(double2) * gSize);
724  cudaMalloc((void**) &gradient_y_fft, sizeof(double2) * gSize);
725  if (dimnum == 3){
726  cudaMalloc((void**) &gradient_z_fft, sizeof(double2) * gSize);
727  }
728  cudaMalloc((void**) &density_d2, sizeof(double2) * gSize);
729  cudaMalloc((void**) &edges_gpu, sizeof(double) * gSize);
730  }
731  else{
732  density_d = par.dsval("density_d");
733  edges_gpu = par.dsval("edges_gpu");
734  gradient_x_fft = par.cufftDoubleComplexval("gradient_x_fft");
735  gradient_y_fft = par.cufftDoubleComplexval("gradient_y_fft");
736  if (dimnum == 3){
737  gradient_z_fft = par.cufftDoubleComplexval("gradient_z_fft");
738  }
739  density_d2 = par.cufftDoubleComplexval("density_d2");
740  }
741 
742  cufftHandle plan_3d = par.ival("plan_3d");
743 
744  // now to perform the complexMagnitudeSquared operation
745  complexMagnitudeSquared<<<grid,threads>>>(wfc_gpu, density_d);
746 
747  // Pulling operators from find_sobel(par)
748  double2 *sobel_x_gpu = par.cufftDoubleComplexval("sobel_x_gpu");
749  double2 *sobel_y_gpu = par.cufftDoubleComplexval("sobel_y_gpu");
750  double2 *sobel_z_gpu;
751  if (dimnum == 3){
752  sobel_z_gpu = par.cufftDoubleComplexval("sobel_z_gpu");
753  }
754 
755  // This should work in principle, but the D2Z transform plays tricks
756  // Generating plan for d2z in 3d
757  //cufftHandle plan_3d2z;
758  //cufftPlan3d(&plan_3d2z, xDim, yDim, zDim, CUFFT_D2Z);
759 
760  make_cufftDoubleComplex<<<grid, threads>>>(density_d, density_d2);
761 
762  // Now fft forward, multiply, fft back
763  cufftExecZ2Z(plan_3d, density_d2, gradient_x_fft, CUFFT_FORWARD);
764  cufftExecZ2Z(plan_3d, density_d2, gradient_y_fft, CUFFT_FORWARD);
765  if (dimnum == 3){
766  cufftExecZ2Z(plan_3d, density_d2, gradient_z_fft, CUFFT_FORWARD);
767  }
768 
769  // Now to perform the multiplication
770  cMult<<<grid, threads>>>(gradient_x_fft, sobel_x_gpu, gradient_x_fft);
771  cMult<<<grid, threads>>>(gradient_y_fft, sobel_y_gpu, gradient_y_fft);
772  if (dimnum == 3){
773  cMult<<<grid, threads>>>(gradient_z_fft, sobel_z_gpu, gradient_z_fft);
774  }
775 
776  // FFT back
777  cufftExecZ2Z(plan_3d, gradient_x_fft, gradient_x_fft, CUFFT_INVERSE);
778  cufftExecZ2Z(plan_3d, gradient_y_fft, gradient_y_fft, CUFFT_INVERSE);
779  if (dimnum == 3){
780  cufftExecZ2Z(plan_3d, gradient_z_fft, gradient_z_fft, CUFFT_INVERSE);
781  }
782 
783  if (dimnum == 2){
784  l2_norm<<<grid, threads>>>(gradient_x_fft, gradient_y_fft, edges_gpu);
785  }
786  else if (dimnum == 3){
787  l2_norm<<<grid, threads>>>(gradient_x_fft, gradient_y_fft,
788  gradient_z_fft, edges_gpu);
789  }
790 
791  // Copying edges back
792  cudaMemcpy(edges, edges_gpu, sizeof(double) * gSize,
793  cudaMemcpyDeviceToHost);
794 
795  // Method to find edges based on window approach -- more efficient,
796  // but difficult to implement
797 
798  par.store("density_d", density_d);
799  par.store("density_d2", density_d2);
800  par.store("edges_gpu", edges_gpu);
801  par.store("gradient_x_fft", gradient_x_fft);
802  par.store("gradient_y_fft", gradient_y_fft);
803  par.store("gradient_z_fft", gradient_z_fft);
804 }