GPUE  v1.0
GPU Gross-Pitaevskii Equation numerical solver for Bose-Einstein condensates
dynamic.cu
Go to the documentation of this file.
1 #include <algorithm>
2 #include <limits>
3 #include <stack>
4 #include <cufft.h>
5 #include <fstream>
6 
7 #include "../include/dynamic.h"
8 #include "../include/ds.h"
9 #include "../include/kernels.h"
10 
11 __device__ double factorial(double val){
12  double ret_val = val;
13  if (val < 1){
14  return 1;
15  }
16  for (int i = floor(val) - 1; i > 0; --i){
17  ret_val *= i;
18  }
19  return ret_val;
20 }
21 
22 // Simple functions to subtract, add, multiply and divide
23 double ast_subtract(double a, double b){
24  return a-b;
25 }
26 
27 __device__ double subtract_gpu(double a, double b){
28  return a - b;
29 }
30 
31 double ast_add(double a, double b){
32  return a+b;
33 }
34 
35 __device__ double add_gpu(double a, double b){
36  return a + b;
37 }
38 
39 double ast_multiply(double a, double b){
40  return a*b;
41 }
42 
43 __device__ double multiply_gpu(double a, double b){
44  return a * b;
45 }
46 
47 double ast_divide(double a, double b){
48  return a/b;
49 }
50 
51 __device__ double divide_gpu(double a, double b){
52  return a/b;
53 }
54 
55 double ast_cos(double a, double b){
56  return cos(a);
57 }
58 
59 __device__ double cos_gpu(double a, double b){
60  return cos(a);
61 }
62 
63 double ast_sin(double a, double b){
64  return sin(a);
65 }
66 
67 __device__ double sin_gpu(double a, double b){
68  return sin(a);
69 }
70 
71 double ast_tan(double a, double b){
72  return tan(a);
73 }
74 
75 __device__ double tan_gpu(double a, double b){
76  return tan(a);
77 }
78 
79 double ast_sqrt(double a, double b){
80  return sqrt(a);
81 }
82 
83 __device__ double sqrt_gpu(double a, double b){
84  return sqrt(a);
85 }
86 
87 
88 __device__ double pow_gpu(double a, double b){
89  return pow(a, (int)b);
90 }
91 
92 
93 double ast_exp(double a, double b){
94  return exp(a);
95 }
96 
97 __device__ double exp_gpu(double a, double b){
98  return exp(a);
99 }
100 
101 __device__ double poly_j(int v, double x, int n){
102  double jval = 0;
103  double sigma, b;
104  for (int i = 0; i < n; ++i){
105  b = pow(-1,i)*(factorial(n+i-1) * pow(n,1-2*i))/
106  (factorial(i)*factorial(n-i)*factorial(v+i));
107  sigma = b*pow(x*0.5,2*i+v);
108  jval += sigma;
109  }
110  return jval;
111 }
112 
113 __device__ double2 poly_i(int v, double2 x, int n){
114  double2 ival, sigma;
115  double b;
116  x = mult({0,1},x);
117  for (int i = 0; i < n; ++i){
118  b = pow(-1,i)*(factorial(n+i-1) * pow(n,1-2*i))/
119  (factorial(i)*factorial(n-i)*factorial(v+i));
120  sigma = mult(pow(mult(x,0.5),2*i+v),b);
121  ival = add(ival, sigma);
122  }
123  return ival;
124 }
125 
126 __device__ double2 poly_k(int v, double2 x, int n){
127  return mult(subtract(poly_i(-v, x, n), poly_i(v, x, n)),
128  M_PI/(2*sin(n*M_PI)));
129 }
130 
131 __device__ double k2n_gpu(double a, double b){
132  double2 x = {b,0};
133  double2 val = poly_k((int)a, x, 30);
134  return mult(val, val).x;
135 }
136 
137 
138 __device__ double jn_gpu(double a, double b){
139  if ((int)a == 0){
140  return j0(b);
141  }
142  if ((int)a == 1){
143  return j1(b);
144  }
145  else{
146  return jn(a,b);
147  }
148 }
149 
150 __device__ double yn_gpu(double a, double b){
151  if ((int)a == 0){
152  return y0(b);
153  }
154  if ((int)a == 1){
155  return y1(b);
156  }
157  else{
158  return yn(a,b);
159  }
160 }
161 
162 void allocate_eqn(Grid &par, std::string val_string, std::string eqn_string){
163  par.store(val_string + "_time", false);
164  EqnNode eqn_tree = parse_eqn(par, eqn_string, val_string);
165  EqnNode_gpu *eqn_gpu, *eqn_cpu;
166 
167  int num = 0;
168  find_element_num(eqn_tree, num);
169  int element_num = num;
170  std::cout << "final element_num is: " << element_num << '\n';
171 
172  eqn_cpu = (EqnNode_gpu *)malloc(sizeof(EqnNode_gpu)*element_num);
173 
174  num = 0;
175  tree_to_array(eqn_tree, eqn_cpu, num);
176 
177  cudaMalloc((void**)&eqn_gpu,sizeof(EqnNode_gpu)*element_num);
178  cudaMemcpy(eqn_gpu, eqn_cpu,sizeof(EqnNode_gpu)*element_num,
179  cudaMemcpyHostToDevice);
180 
181  par.store(val_string, eqn_gpu);
182  par.store(val_string, eqn_tree);
183  free(eqn_cpu);
184 
185 }
186 
187 void parse_param_file(Grid &par){
188 
189  // First, we need to open the file and read it in
190  std::string line, eqn_string, val_string;
191  std::ifstream file (par.sval("param_file"));
192 
193  if(file.is_open()){
194  while(getline(file, line)){
195  line.erase(remove_if(line.begin(), line.end(), isspace),
196  line.end());
197  int it = line.find("=");
198  if (it < 0){
199  //std::cout << "No equals sign!" << '\n';
200  eqn_string += line.substr(0,line.size());
201  //std::cout << val_string << '\t' << eqn_string << '\n';
202  }
203  else{
204  if (val_string != ""){
205  if (val_string == "V"){
206  eqn_string+="+0.5*mass*(Ax*Ax+Ay*Ay+Az*Az)";
207  }
208  allocate_eqn(par, val_string, eqn_string);
209  val_string = "";
210  eqn_string = "";
211  }
212  val_string = line.substr(0, it);
213  eqn_string = line.substr(it+1, line.size() - it-1);
214  //std::cout << val_string << '\t' << eqn_string << '\n';
215  }
216 
217  //std::cout << line << '\n';
218  }
219  allocate_eqn(par, val_string, eqn_string);
220  file.close();
221  }
222 }
223 
224 
225 // We assume that we have already removed unnecessary spaces and such from
226 // our eqn_string
227 EqnNode parse_eqn(Grid &par, std::string eqn_string, std::string val_str){
228  //std::cout << eqn_string << '\n';
229 
230  // Because this will be called recursively, we need to return if the string
231  // length is 0
232  if (eqn_string.length() == 0){
233  std::cout << "There's nothing here!" << '\n';
234  exit(1);
235  }
236 
237  // vector of all possibe mathematical operators (not including functions)
238  std::vector<std::string> moperators(5);
239  moperators = {
240  "-", "+", "/", "*", "^"
241  };
242 
243  // we need a map for these operators
244  std::unordered_map<char, int> moperator_map;
245  moperator_map['+'] = 1;
246  moperator_map['-'] = 2;
247  moperator_map['*'] = 3;
248  moperator_map['/'] = 4;
249  moperator_map['^'] = 5;
250 
251  // And another vector for brackets of various types which indicate recursive
252  // parsing of the equation
253  std::vector<char> mbrackets;
254  mbrackets = {
255  '(', '[', ']', ')'
256  };
257 
258  // vector of all possible mathematical functions... more to come
259  std::vector<std::string> mfunctions(6);
260  mfunctions = {
261  "sin", "cos", "exp", "tan", "sqrt", "pow"
262  };
263 
264  // We also need a specific map for the functions above
265  std::unordered_map<std::string, int> mfunctions_map;
266  mfunctions_map["cos"] = 6;
267  mfunctions_map["sin"] = 7;
268  mfunctions_map["tan"] = 8;
269  mfunctions_map["sqrt"] = 9;
270  mfunctions_map["exp"] = 10;
271  //mfunctions_map["erf"] = ;
272  mfunctions_map["pow"] = 5;
273  mfunctions_map["jn"] = 11;
274  mfunctions_map["yn"] = 12;
275  mfunctions_map["k2n"] = 13;
276 
277  // first, we need to parse the equation string and remove parentheses
278  // Then we'll sort according to the math operators (mops)
279  int half = mbrackets.size() / 2;
280  std::stack<int> open_bra;
281  std::vector<int> ignored_positions;
282  for (int i = 0; i < eqn_string.size(); ++i){
283  for (int j = 0; j < mbrackets.size() / 2; ++j){
284  if (eqn_string[i] == mbrackets[j]){
285  open_bra.push(i);
286  }
287  }
288 
289  // Now we need to look for the closing bracket
290  for (int j = mbrackets.size()/2; j < mbrackets.size(); ++j){
291  if (eqn_string[i] == mbrackets[j]){
292  ignored_positions.push_back(open_bra.top());
293  ignored_positions.push_back(i);
294  open_bra.pop();
295  }
296  }
297  }
298 
299 
300  // If parentheses cover the entire expression, we
301  // 1. Remove the parentheses
302  // 2. subtract 1 from bra_positions
303  std::string temp_string = eqn_string;
304  if (ignored_positions.size() > 0){
305  if (ignored_positions[ignored_positions.size()-1]
306  == temp_string.size() - 1 &&
307  ignored_positions[ignored_positions.size()-2] == 0){
308  ignored_positions.erase(ignored_positions.end()-1);
309  ignored_positions.erase(ignored_positions.end()-1);
310 
311  eqn_string = eqn_string.substr(1, eqn_string.size() - 2);
312 
313  for (int i = 0; i < ignored_positions.size(); ++i){
314  --ignored_positions[i];
315  }
316  }
317  temp_string = eqn_string;
318 
319  // Now we remove the parentheses from the eqn_string
320  std::vector<int> temp_positions = ignored_positions;
321  for (int i = 0; i < temp_positions.size(); i += 2){
322  temp_string.erase(temp_positions[i],
323  temp_positions[i+1] - temp_positions[i]+1);
324  for (int j = i+2; j < temp_positions.size(); ++j){
325  if (temp_positions[j] > temp_positions[i]){
326  temp_positions[j]
327  -= temp_positions[i+1] - temp_positions[i] + 1;
328  }
329  }
330  }
331  }
332 
333  // Creating the EqnNode
334  EqnNode eqn_tree;
335 
336  bool only_nums =
337  (temp_string.find_first_not_of("0123456789.")
338  == std::string::npos);
339  if (only_nums){
340  eqn_tree.val = atof(temp_string.c_str());
341  return eqn_tree;
342  }
343  else if(temp_string.size() == 1){
344  if(temp_string[0] == 'x'){
345  eqn_tree.is_dynamic = true;
346  eqn_tree.var = 'x';
347  return eqn_tree;
348  }
349  else if(temp_string[0] == 'y'){
350  eqn_tree.is_dynamic = true;
351  eqn_tree.var = 'y';
352  return eqn_tree;
353  }
354  else if(temp_string[0] == 'z'){
355  eqn_tree.is_dynamic = true;
356  eqn_tree.var = 'z';
357  return eqn_tree;
358  }
359  else if(temp_string[0] == 't'){
360  eqn_tree.is_dynamic = true;
361  par.store(val_str + "_time", true);
362  eqn_tree.var = 't';
363  return eqn_tree;
364  }
365  }
366 
367  //std::cout << temp_string << '\n';
368 
369  // We'll need to parse the equation string in reverse PEMDAS
370  // So we go through the moperators, then mbrackets / mfunctions
371  bool mop_found = false;
372  int mop_point = 0;
373  while (!mop_found){
374  for (auto &mop : moperators){
375  if (temp_string.find(mop)<temp_string.size() && !mop_found){
376  mop_point = temp_string.find(mop);
377  mop_found = true;
378  }
379  }
380  if (!mop_found){
381  if(auto it = mfunctions_map.find(temp_string)
382  != mfunctions_map.end()){
383 
384  mop_point = temp_string.size()-1;
385 
386  // Check for commas
387  std::string check_string = eqn_string.substr(mop_point+1,
388  eqn_string.size() - mop_point-1);
389  if(check_string.find(",") < check_string.size()){
390  eqn_tree.op_num = mfunctions_map[temp_string];
391  eqn_tree.has_op = true;
392  eqn_tree.left = (EqnNode *)malloc(sizeof(EqnNode));
393  eqn_tree.right = (EqnNode *)malloc(sizeof(EqnNode));
394 
395  int comma_loc = check_string.find(",");
396  eqn_tree.left[0] = parse_eqn(par,
397  check_string.substr(1, comma_loc - 1), val_str);
398  eqn_tree.right[0] = parse_eqn(par,
399  check_string.substr(comma_loc+1,
400  check_string.size()-comma_loc-2),
401  val_str);
402  }
403  else{
404  eqn_tree.op_num = mfunctions_map[temp_string];
405  eqn_tree.has_op = true;
406  eqn_tree.left = (EqnNode *)malloc(sizeof(EqnNode));
407  eqn_tree.left[0] = parse_eqn(par, check_string, val_str);
408  }
409 
410  }
411  else{
412  if (par.is_ast_cpu(temp_string)){
413  //std::cout << "found ast" << '\n';
414  eqn_tree = par.ast_cpuval(temp_string);
415  par.store(val_str + "_time", true);
416  }
417  else if(par.is_double(temp_string)){
418  //std::cout << "found double " << temp_string << "!"<< '\n';
419  eqn_tree.val = par.dval(temp_string);
420  }
421  else{
422  std::cout << "No value " << temp_string << " found!\n";
423  exit(1);
424  }
425  }
426  return eqn_tree;
427 
428  }
429  }
430 
431 /*
432  std::cout << "ignored positions are: " << '\n';
433  for (int &pos : ignored_positions){
434  std::cout << pos << '\n';
435  }
436 */
437 
438  // Now we need to find the mop_point position in the eqn_string
439  // We know the temp_string and how many positions we removed and where.
440  if (ignored_positions.size() > 0){
441  int count = 0;
442  for (int i = 0; i <= mop_point; ++i){
443  for (int j = 0; j < ignored_positions.size(); j += 2){
444  if (ignored_positions[j] == count){
445  count += ignored_positions[j+1] - ignored_positions[j] + 1;
446  }
447  }
448  count++;
449  }
450 
451  mop_point = count-1;
452  }
453 
454  //std::cout << "eqn_string is: " << eqn_string << '\n';
455  //std::cout << "mop point is: " << mop_point << '\n';
456 
457  // Now we need to store the operator into the eqn_tree
458  eqn_tree.op_num = moperator_map[eqn_string[mop_point]];
459  eqn_tree.has_op = true;
460 
461  // Now we need to parse the left and right banches...
462  eqn_tree.left = (EqnNode *)malloc(sizeof(EqnNode));
463  eqn_tree.left[0] = parse_eqn(par, eqn_string.substr(0, mop_point), val_str);
464 
465  eqn_tree.right = (EqnNode *)malloc(sizeof(EqnNode));
466  eqn_tree.right[0] = parse_eqn(par, eqn_string.substr(mop_point+1,
467  eqn_string.size() - mop_point-1), val_str);
468 
469  return eqn_tree;
470 }
471 
472 void tree_to_array(EqnNode eqn, EqnNode_gpu *eqn_array, int &element_num){
473 
474  eqn_array[element_num].val = eqn.val;
475  eqn_array[element_num].var = eqn.var;
476  eqn_array[element_num].is_dynamic = eqn.is_dynamic;
477 
478  // variable defining difference between functions that use 1 or 2 inputs.
479  int function_thresh = 6;
480 
481  bool only_left = false;
482  eqn_array[element_num].op_num = eqn.op_num;
483  if (eqn.op_num >= function_thresh){
484  only_left = true;
485  }
486 
487  if (eqn.has_op == false){
488  eqn_array[element_num].left = -1;
489  eqn_array[element_num].right = -1;
490  return;
491  }
492  else{
493  int temp = element_num;
494  element_num++;
495  eqn_array[temp].left = element_num;
496  tree_to_array(eqn.left[0], eqn_array, element_num);
497 
498  if (!only_left){
499  element_num++;
500  eqn_array[temp].right = element_num;
501  tree_to_array(eqn.right[0], eqn_array, element_num);
502  }
503  else{
504  eqn_array[temp].right = -1;
505  }
506  }
507 }
508 
509 void find_element_num(EqnNode eqn_tree, int &element_num){
510 
511  element_num++;
512  if (eqn_tree.has_op == false){
513  return;
514  }
515  else{
516  find_element_num(eqn_tree.left[0], element_num);
517  // For single operators, the right node doesn't exist...
518  if(eqn_tree.op_num < 6 ){
519  find_element_num(eqn_tree.right[0], element_num);
520  }
521  }
522 }
523 
524 /*----------------------------------------------------------------------------//
525 * GPU KERNELS
526 *-----------------------------------------------------------------------------*/
527 __device__ double evaluate_eqn_gpu(EqnNode_gpu *eqn, double x, double y,
528  double z, double time, int element_num){
529 
530  if (eqn[element_num].right < 0 &&
531  eqn[element_num].left < 0){
532  if (eqn[element_num].is_dynamic){
533  if(eqn[element_num].var == 'x'){
534  return x;
535  }
536  if(eqn[element_num].var == 'y'){
537  return y;
538  }
539  if(eqn[element_num].var == 'z'){
540  return z;
541  }
542  if(eqn[element_num].var == 't'){
543  return time;
544  }
545  }
546  else{
547  return eqn[element_num].val;
548  }
549  }
550 
551  double val1;
552  double val2;
553  if (eqn[element_num].left > 0){
554  val1 = evaluate_eqn_gpu(eqn, x, y, z, time, eqn[element_num].left);
555  }
556  else{
557  val1 = 0;
558  }
559 
560  if (eqn[element_num].right > 0){
561  val2 = evaluate_eqn_gpu(eqn, x, y, z, time, eqn[element_num].right);
562  }
563  else{
564  val2 = 0;
565  }
566 
567  //return add_gpu(val1, val2);
568  switch(eqn[element_num].op_num){
569  case 0:
570  {
571  printf("GPU kernel failure! Improper equation tree!");
572  break;
573  }
574  case 1:
575  return add_gpu(val1, val2);
576  case 2:
577  return subtract_gpu(val1, val2);
578  case 3:
579  return multiply_gpu(val1, val2);
580  case 4:
581  return divide_gpu(val1, val2);
582  case 5:
583  return pow_gpu(val1, val2);
584  case 6:
585  return cos_gpu(val1, val2);
586  case 7:
587  return sin_gpu(val1, val2);
588  case 8:
589  return tan_gpu(val1, val2);
590  case 9:
591  return sqrt_gpu(val1, val2);
592  case 10:
593  return exp_gpu(val1, val2);
594  case 11:
595  return jn_gpu(val1, val2);
596  case 12:
597  return yn_gpu(val1, val2);
598  case 13:
599  return k2n_gpu(val1, val2);
600  }
601  return 0;
602 
603 }
604 
605 __global__ void find_field(double *field, double dx, double dy, double dz,
606  double xMax, double yMax, double zMax,
607  double time, EqnNode_gpu *eqn){
608  int gid = getGid3d3d();
609  int xid = blockIdx.x*blockDim.x + threadIdx.x;
610  int yid = blockIdx.y*blockDim.y + threadIdx.y;
611  int zid = blockIdx.z*blockDim.z + threadIdx.z;
612 
613  field[gid] = evaluate_eqn_gpu(eqn, dx*xid - xMax,
614  dy*yid - yMax,
615  dz*zid - zMax, time, 0);
616 }
617 
618 __global__ void zeros(double *field, int n){
619  int xid = blockDim.x*blockIdx.x + threadIdx.x;
620 
621  if (xid < n){
622  field[xid] = 0;
623  }
624 
625 }