[a953943] | 1 | #ifdef __OPENCL_VERSION__ |
---|
| 2 | # define USE_OPENCL |
---|
| 3 | #endif |
---|
| 4 | |
---|
| 5 | // If opencl is not available |
---|
| 6 | #ifndef USE_OPENCL |
---|
| 7 | # include <math.h> |
---|
| 8 | # define REAL(x) (x) |
---|
| 9 | # define real double |
---|
| 10 | # define global |
---|
| 11 | # define local |
---|
| 12 | # define kernel extern "C" |
---|
| 13 | # include "NR_BessJ1.cpp" |
---|
| 14 | #endif |
---|
| 15 | |
---|
| 16 | #ifndef M_PI |
---|
| 17 | # define M_PI REAL(3.141592653589793) |
---|
| 18 | #endif |
---|
| 19 | #define RADIANS REAL(0.017453292519943295) |
---|
| 20 | #define PI_E8 REAL(3.141592653589793e8) |
---|
| 21 | |
---|
| 22 | real f(real qx, real qy, real subone, real subtwo, real radius, real length, real thickness, real weight, real axis_theta, real axis_phi); |
---|
| 23 | real f(real qx, real qy, real subone, real subtwo, real radius, real length, real thickness, real weight, real axis_theta, real axis_phi) |
---|
| 24 | { |
---|
| 25 | const real q = sqrt(qx*qx+qy*qy); |
---|
| 26 | const real theta = axis_theta*RADIANS; |
---|
| 27 | real alpha = acos(cos(theta)*cos(axis_phi*RADIANS)*qx/q + sin(theta)*qy/q); |
---|
| 28 | |
---|
| 29 | if (alpha == 0.0){ |
---|
| 30 | alpha = 1.0e-26; |
---|
| 31 | } |
---|
| 32 | |
---|
| 33 | real si1=0; real si2=0; real be1=0; real be2=0; |
---|
| 34 | |
---|
| 35 | const real vol2 = M_PI*(radius+thickness)*(radius+thickness)*(length+2.0*thickness); |
---|
| 36 | |
---|
| 37 | const real besarg1 = q*radius*sin(alpha); |
---|
| 38 | const real besarg2 = q*(radius+thickness)*sin(alpha); |
---|
| 39 | const real sinarg1 = q*length/2.0*cos(alpha); |
---|
| 40 | const real sinarg2 = q*(length/2.0+thickness)*cos(alpha); |
---|
| 41 | |
---|
| 42 | if (besarg1 == 0.0){be1 = 0.5;} |
---|
| 43 | else{be1 = NR_BessJ1(besarg1)/besarg1;} |
---|
| 44 | |
---|
| 45 | if (besarg2 == 0.0){be2 = 0.5;} |
---|
| 46 | else{be2 = NR_BessJ1(besarg2)/besarg2;} |
---|
| 47 | |
---|
| 48 | if (sinarg1 == 0.0){si1 = 1.0;} |
---|
| 49 | else{si1 = sin(sinarg1)/sinarg1;} |
---|
| 50 | |
---|
| 51 | if (sinarg2 == 0.0){si2 = 1.0;} |
---|
| 52 | else{si2 = sin(sinarg2)/sinarg2;} |
---|
| 53 | |
---|
| 54 | const real tt = 2.0*vol2*(subone)*si2*be2+2.0*(M_PI*radius*radius*(length))*(subtwo)*si1*be1; |
---|
| 55 | |
---|
| 56 | real answer = (tt*tt)*sin(alpha)/fabs(sin(alpha)); |
---|
| 57 | //answer *= answer/(PI_E8*(radius+thickness)*(radius+thickness)*(length+2.0*thickness)); |
---|
| 58 | |
---|
| 59 | return weight*answer*answer/PI_E8; |
---|
| 60 | |
---|
| 61 | } |
---|
| 62 | |
---|
| 63 | kernel void CoreShellCylinderKernel(global const real *qx, global const real *qy, global real *result, |
---|
| 64 | #ifdef USE_OPENCL |
---|
| 65 | global real *loops_g, |
---|
| 66 | #else |
---|
| 67 | const int Nq, |
---|
| 68 | #endif |
---|
| 69 | local real *loops, const real cutoff, |
---|
| 70 | const real scale, const real background, |
---|
| 71 | const real subone, const real subtwo, |
---|
| 72 | const int Nradius, const int Nlength, const int Nthick, const int Ntheta, const int Nphi) |
---|
| 73 | { |
---|
| 74 | #ifdef USE_OPENCL |
---|
| 75 | // copy loops info to local memory |
---|
| 76 | event_t e = async_work_group_copy(loops, loops_g, (Nradius+Nlength+Nthick+Ntheta+Nphi)*2, 0); |
---|
| 77 | wait_group_events(1, &e); |
---|
| 78 | |
---|
| 79 | int i = get_global_id(0); |
---|
| 80 | int count = get_global_size(0); |
---|
| 81 | |
---|
| 82 | if(i < count) |
---|
| 83 | #else |
---|
| 84 | #pragma omp parallel for |
---|
| 85 | for (int i=0; i<Nq; i++) |
---|
| 86 | #endif |
---|
| 87 | { |
---|
| 88 | const real qxi=qx[i]; |
---|
| 89 | const real qyi=qy[i]; |
---|
| 90 | real ret=REAL(0.0), norm=REAL(0.0), norm_vol=REAL(0.0), vol=REAL(0.0); |
---|
| 91 | for (int ri=0; ri < Nradius; ri++) { |
---|
| 92 | const real rv = loops[2*ri]; |
---|
| 93 | const real rw = loops[2*ri+1]; |
---|
| 94 | for (int li=0; li < Nlength; li++) { |
---|
| 95 | const real lv = loops[2*(li+Nradius)]; |
---|
| 96 | const real lw = loops[2*(li+Nradius)+1]; |
---|
| 97 | for (int ti=0; ti < Nthick; ti++) { |
---|
| 98 | const real tv = loops[2*(ti+Nradius+Nlength)]; ////////////////////////// |
---|
| 99 | const real tw = loops[2*(ti+Nradius+Nlength)+1]; |
---|
| 100 | for (int thi=0; thi < Ntheta; thi++) { |
---|
| 101 | const real thv = loops[2*(thi+Nradius+Nlength+Nthick)]; |
---|
| 102 | const real thw = loops[2*(thi+Nradius+Nlength+Nthick)+1]; |
---|
| 103 | // #pragma unroll |
---|
| 104 | for (int phi=0; phi < Nphi; phi++) { |
---|
| 105 | const real phv = loops[2*(phi+Nradius+Nlength+Ntheta+Nthick)]; |
---|
| 106 | const real phw = loops[2*(phi+Nradius+Nlength+Ntheta+Nthick)+1]; |
---|
| 107 | |
---|
| 108 | const real weight = rw*lw*tw*thw*phw; |
---|
| 109 | //ret += qxi + qyi + sub + rv + lv + weight + thv + phv; |
---|
| 110 | if (weight > cutoff) { |
---|
| 111 | ret += f(qxi, qyi, subone, subtwo, rv, lv, tv, weight, thv, phv); |
---|
| 112 | norm += weight; |
---|
| 113 | vol += rw*lw*tw*(rv+tv)*(rv+tv)*(lv+2.0*tv); |
---|
| 114 | norm_vol += rw*lw*tw; |
---|
| 115 | } |
---|
| 116 | } |
---|
| 117 | } |
---|
| 118 | } |
---|
| 119 | } |
---|
| 120 | } |
---|
| 121 | //if (Ntheta>1) norm = norm/(M_PI/2); |
---|
| 122 | if (vol != REAL(0.0) && norm_vol != REAL(0.0)) { |
---|
| 123 | ret *= norm_vol/vol; |
---|
| 124 | } |
---|
| 125 | result[i] = scale*ret/norm+background; |
---|
| 126 | |
---|
| 127 | } |
---|
| 128 | } |
---|