7 #include "../include/dynamic.h"
8 #include "../include/ds.h"
9 #include "../include/kernels.h"
11 __device__ double factorial(double val){
16 for (int i = floor(val) - 1; i > 0; --i){
22 // Simple functions to subtract, add, multiply and divide
23 double ast_subtract(double a, double b){
27 __device__ double subtract_gpu(double a, double b){
31 double ast_add(double a, double b){
35 __device__ double add_gpu(double a, double b){
39 double ast_multiply(double a, double b){
43 __device__ double multiply_gpu(double a, double b){
47 double ast_divide(double a, double b){
51 __device__ double divide_gpu(double a, double b){
55 double ast_cos(double a, double b){
59 __device__ double cos_gpu(double a, double b){
63 double ast_sin(double a, double b){
67 __device__ double sin_gpu(double a, double b){
71 double ast_tan(double a, double b){
75 __device__ double tan_gpu(double a, double b){
79 double ast_sqrt(double a, double b){
83 __device__ double sqrt_gpu(double a, double b){
88 __device__ double pow_gpu(double a, double b){
89 return pow(a, (int)b);
93 double ast_exp(double a, double b){
97 __device__ double exp_gpu(double a, double b){
101 __device__ double poly_j(int v, double x, int n){
104 for (int i = 0; i < n; ++i){
105 b = pow(-1,i)*(factorial(n+i-1) * pow(n,1-2*i))/
106 (factorial(i)*factorial(n-i)*factorial(v+i));
107 sigma = b*pow(x*0.5,2*i+v);
113 __device__ double2 poly_i(int v, double2 x, int n){
117 for (int i = 0; i < n; ++i){
118 b = pow(-1,i)*(factorial(n+i-1) * pow(n,1-2*i))/
119 (factorial(i)*factorial(n-i)*factorial(v+i));
120 sigma = mult(pow(mult(x,0.5),2*i+v),b);
121 ival = add(ival, sigma);
126 __device__ double2 poly_k(int v, double2 x, int n){
127 return mult(subtract(poly_i(-v, x, n), poly_i(v, x, n)),
128 M_PI/(2*sin(n*M_PI)));
131 __device__ double k2n_gpu(double a, double b){
133 double2 val = poly_k((int)a, x, 30);
134 return mult(val, val).x;
138 __device__ double jn_gpu(double a, double b){
150 __device__ double yn_gpu(double a, double b){
162 void allocate_eqn(Grid &par, std::string val_string, std::string eqn_string){
163 par.store(val_string + "_time", false);
164 EqnNode eqn_tree = parse_eqn(par, eqn_string, val_string);
165 EqnNode_gpu *eqn_gpu, *eqn_cpu;
168 find_element_num(eqn_tree, num);
169 int element_num = num;
170 std::cout << "final element_num is: " << element_num << '\n';
172 eqn_cpu = (EqnNode_gpu *)malloc(sizeof(EqnNode_gpu)*element_num);
175 tree_to_array(eqn_tree, eqn_cpu, num);
177 cudaMalloc((void**)&eqn_gpu,sizeof(EqnNode_gpu)*element_num);
178 cudaMemcpy(eqn_gpu, eqn_cpu,sizeof(EqnNode_gpu)*element_num,
179 cudaMemcpyHostToDevice);
181 par.store(val_string, eqn_gpu);
182 par.store(val_string, eqn_tree);
187 void parse_param_file(Grid &par){
189 // First, we need to open the file and read it in
190 std::string line, eqn_string, val_string;
191 std::ifstream file (par.sval("param_file"));
194 while(getline(file, line)){
195 line.erase(remove_if(line.begin(), line.end(), isspace),
197 int it = line.find("=");
199 //std::cout << "No equals sign!" << '\n';
200 eqn_string += line.substr(0,line.size());
201 //std::cout << val_string << '\t' << eqn_string << '\n';
204 if (val_string != ""){
205 if (val_string == "V"){
206 eqn_string+="+0.5*mass*(Ax*Ax+Ay*Ay+Az*Az)";
208 allocate_eqn(par, val_string, eqn_string);
212 val_string = line.substr(0, it);
213 eqn_string = line.substr(it+1, line.size() - it-1);
214 //std::cout << val_string << '\t' << eqn_string << '\n';
217 //std::cout << line << '\n';
219 allocate_eqn(par, val_string, eqn_string);
225 // We assume that we have already removed unnecessary spaces and such from
227 EqnNode parse_eqn(Grid &par, std::string eqn_string, std::string val_str){
228 //std::cout << eqn_string << '\n';
230 // Because this will be called recursively, we need to return if the string
232 if (eqn_string.length() == 0){
233 std::cout << "There's nothing here!" << '\n';
237 // vector of all possibe mathematical operators (not including functions)
238 std::vector<std::string> moperators(5);
240 "-", "+", "/", "*", "^"
243 // we need a map for these operators
244 std::unordered_map<char, int> moperator_map;
245 moperator_map['+'] = 1;
246 moperator_map['-'] = 2;
247 moperator_map['*'] = 3;
248 moperator_map['/'] = 4;
249 moperator_map['^'] = 5;
251 // And another vector for brackets of various types which indicate recursive
252 // parsing of the equation
253 std::vector<char> mbrackets;
258 // vector of all possible mathematical functions... more to come
259 std::vector<std::string> mfunctions(6);
261 "sin", "cos", "exp", "tan", "sqrt", "pow"
264 // We also need a specific map for the functions above
265 std::unordered_map<std::string, int> mfunctions_map;
266 mfunctions_map["cos"] = 6;
267 mfunctions_map["sin"] = 7;
268 mfunctions_map["tan"] = 8;
269 mfunctions_map["sqrt"] = 9;
270 mfunctions_map["exp"] = 10;
271 //mfunctions_map["erf"] = ;
272 mfunctions_map["pow"] = 5;
273 mfunctions_map["jn"] = 11;
274 mfunctions_map["yn"] = 12;
275 mfunctions_map["k2n"] = 13;
277 // first, we need to parse the equation string and remove parentheses
278 // Then we'll sort according to the math operators (mops)
279 int half = mbrackets.size() / 2;
280 std::stack<int> open_bra;
281 std::vector<int> ignored_positions;
282 for (int i = 0; i < eqn_string.size(); ++i){
283 for (int j = 0; j < mbrackets.size() / 2; ++j){
284 if (eqn_string[i] == mbrackets[j]){
289 // Now we need to look for the closing bracket
290 for (int j = mbrackets.size()/2; j < mbrackets.size(); ++j){
291 if (eqn_string[i] == mbrackets[j]){
292 ignored_positions.push_back(open_bra.top());
293 ignored_positions.push_back(i);
300 // If parentheses cover the entire expression, we
301 // 1. Remove the parentheses
302 // 2. subtract 1 from bra_positions
303 std::string temp_string = eqn_string;
304 if (ignored_positions.size() > 0){
305 if (ignored_positions[ignored_positions.size()-1]
306 == temp_string.size() - 1 &&
307 ignored_positions[ignored_positions.size()-2] == 0){
308 ignored_positions.erase(ignored_positions.end()-1);
309 ignored_positions.erase(ignored_positions.end()-1);
311 eqn_string = eqn_string.substr(1, eqn_string.size() - 2);
313 for (int i = 0; i < ignored_positions.size(); ++i){
314 --ignored_positions[i];
317 temp_string = eqn_string;
319 // Now we remove the parentheses from the eqn_string
320 std::vector<int> temp_positions = ignored_positions;
321 for (int i = 0; i < temp_positions.size(); i += 2){
322 temp_string.erase(temp_positions[i],
323 temp_positions[i+1] - temp_positions[i]+1);
324 for (int j = i+2; j < temp_positions.size(); ++j){
325 if (temp_positions[j] > temp_positions[i]){
327 -= temp_positions[i+1] - temp_positions[i] + 1;
333 // Creating the EqnNode
337 (temp_string.find_first_not_of("0123456789.")
338 == std::string::npos);
340 eqn_tree.val = atof(temp_string.c_str());
343 else if(temp_string.size() == 1){
344 if(temp_string[0] == 'x'){
345 eqn_tree.is_dynamic = true;
349 else if(temp_string[0] == 'y'){
350 eqn_tree.is_dynamic = true;
354 else if(temp_string[0] == 'z'){
355 eqn_tree.is_dynamic = true;
359 else if(temp_string[0] == 't'){
360 eqn_tree.is_dynamic = true;
361 par.store(val_str + "_time", true);
367 //std::cout << temp_string << '\n';
369 // We'll need to parse the equation string in reverse PEMDAS
370 // So we go through the moperators, then mbrackets / mfunctions
371 bool mop_found = false;
374 for (auto &mop : moperators){
375 if (temp_string.find(mop)<temp_string.size() && !mop_found){
376 mop_point = temp_string.find(mop);
381 if(auto it = mfunctions_map.find(temp_string)
382 != mfunctions_map.end()){
384 mop_point = temp_string.size()-1;
387 std::string check_string = eqn_string.substr(mop_point+1,
388 eqn_string.size() - mop_point-1);
389 if(check_string.find(",") < check_string.size()){
390 eqn_tree.op_num = mfunctions_map[temp_string];
391 eqn_tree.has_op = true;
392 eqn_tree.left = (EqnNode *)malloc(sizeof(EqnNode));
393 eqn_tree.right = (EqnNode *)malloc(sizeof(EqnNode));
395 int comma_loc = check_string.find(",");
396 eqn_tree.left[0] = parse_eqn(par,
397 check_string.substr(1, comma_loc - 1), val_str);
398 eqn_tree.right[0] = parse_eqn(par,
399 check_string.substr(comma_loc+1,
400 check_string.size()-comma_loc-2),
404 eqn_tree.op_num = mfunctions_map[temp_string];
405 eqn_tree.has_op = true;
406 eqn_tree.left = (EqnNode *)malloc(sizeof(EqnNode));
407 eqn_tree.left[0] = parse_eqn(par, check_string, val_str);
412 if (par.is_ast_cpu(temp_string)){
413 //std::cout << "found ast" << '\n';
414 eqn_tree = par.ast_cpuval(temp_string);
415 par.store(val_str + "_time", true);
417 else if(par.is_double(temp_string)){
418 //std::cout << "found double " << temp_string << "!"<< '\n';
419 eqn_tree.val = par.dval(temp_string);
422 std::cout << "No value " << temp_string << " found!\n";
432 std::cout << "ignored positions are: " << '\n';
433 for (int &pos : ignored_positions){
434 std::cout << pos << '\n';
438 // Now we need to find the mop_point position in the eqn_string
439 // We know the temp_string and how many positions we removed and where.
440 if (ignored_positions.size() > 0){
442 for (int i = 0; i <= mop_point; ++i){
443 for (int j = 0; j < ignored_positions.size(); j += 2){
444 if (ignored_positions[j] == count){
445 count += ignored_positions[j+1] - ignored_positions[j] + 1;
454 //std::cout << "eqn_string is: " << eqn_string << '\n';
455 //std::cout << "mop point is: " << mop_point << '\n';
457 // Now we need to store the operator into the eqn_tree
458 eqn_tree.op_num = moperator_map[eqn_string[mop_point]];
459 eqn_tree.has_op = true;
461 // Now we need to parse the left and right banches...
462 eqn_tree.left = (EqnNode *)malloc(sizeof(EqnNode));
463 eqn_tree.left[0] = parse_eqn(par, eqn_string.substr(0, mop_point), val_str);
465 eqn_tree.right = (EqnNode *)malloc(sizeof(EqnNode));
466 eqn_tree.right[0] = parse_eqn(par, eqn_string.substr(mop_point+1,
467 eqn_string.size() - mop_point-1), val_str);
472 void tree_to_array(EqnNode eqn, EqnNode_gpu *eqn_array, int &element_num){
474 eqn_array[element_num].val = eqn.val;
475 eqn_array[element_num].var = eqn.var;
476 eqn_array[element_num].is_dynamic = eqn.is_dynamic;
478 // variable defining difference between functions that use 1 or 2 inputs.
479 int function_thresh = 6;
481 bool only_left = false;
482 eqn_array[element_num].op_num = eqn.op_num;
483 if (eqn.op_num >= function_thresh){
487 if (eqn.has_op == false){
488 eqn_array[element_num].left = -1;
489 eqn_array[element_num].right = -1;
493 int temp = element_num;
495 eqn_array[temp].left = element_num;
496 tree_to_array(eqn.left[0], eqn_array, element_num);
500 eqn_array[temp].right = element_num;
501 tree_to_array(eqn.right[0], eqn_array, element_num);
504 eqn_array[temp].right = -1;
509 void find_element_num(EqnNode eqn_tree, int &element_num){
512 if (eqn_tree.has_op == false){
516 find_element_num(eqn_tree.left[0], element_num);
517 // For single operators, the right node doesn't exist...
518 if(eqn_tree.op_num < 6 ){
519 find_element_num(eqn_tree.right[0], element_num);
524 /*----------------------------------------------------------------------------//
526 *-----------------------------------------------------------------------------*/
527 __device__ double evaluate_eqn_gpu(EqnNode_gpu *eqn, double x, double y,
528 double z, double time, int element_num){
530 if (eqn[element_num].right < 0 &&
531 eqn[element_num].left < 0){
532 if (eqn[element_num].is_dynamic){
533 if(eqn[element_num].var == 'x'){
536 if(eqn[element_num].var == 'y'){
539 if(eqn[element_num].var == 'z'){
542 if(eqn[element_num].var == 't'){
547 return eqn[element_num].val;
553 if (eqn[element_num].left > 0){
554 val1 = evaluate_eqn_gpu(eqn, x, y, z, time, eqn[element_num].left);
560 if (eqn[element_num].right > 0){
561 val2 = evaluate_eqn_gpu(eqn, x, y, z, time, eqn[element_num].right);
567 //return add_gpu(val1, val2);
568 switch(eqn[element_num].op_num){
571 printf("GPU kernel failure! Improper equation tree!");
575 return add_gpu(val1, val2);
577 return subtract_gpu(val1, val2);
579 return multiply_gpu(val1, val2);
581 return divide_gpu(val1, val2);
583 return pow_gpu(val1, val2);
585 return cos_gpu(val1, val2);
587 return sin_gpu(val1, val2);
589 return tan_gpu(val1, val2);
591 return sqrt_gpu(val1, val2);
593 return exp_gpu(val1, val2);
595 return jn_gpu(val1, val2);
597 return yn_gpu(val1, val2);
599 return k2n_gpu(val1, val2);
605 __global__ void find_field(double *field, double dx, double dy, double dz,
606 double xMax, double yMax, double zMax,
607 double time, EqnNode_gpu *eqn){
608 int gid = getGid3d3d();
609 int xid = blockIdx.x*blockDim.x + threadIdx.x;
610 int yid = blockIdx.y*blockDim.y + threadIdx.y;
611 int zid = blockIdx.z*blockDim.z + threadIdx.z;
613 field[gid] = evaluate_eqn_gpu(eqn, dx*xid - xMax,
615 dz*zid - zMax, time, 0);
618 __global__ void zeros(double *field, int n){
619 int xid = blockDim.x*blockIdx.x + threadIdx.x;