[ae7d639] | 1 | #ifdef __OPENCL_VERSION__ |
---|
| 2 | # define USE_OPENCL |
---|
| 3 | #endif |
---|
| 4 | |
---|
| 5 | // If opencl is not available |
---|
| 6 | #ifndef USE_OPENCL |
---|
| 7 | # include <math.h> |
---|
| 8 | # define REAL(x) (x) |
---|
| 9 | # define real double |
---|
| 10 | # define global |
---|
| 11 | # define local |
---|
| 12 | # define kernel extern "C" |
---|
| 13 | #endif |
---|
| 14 | |
---|
| 15 | #ifndef M_PI |
---|
| 16 | # define M_PI REAL(3.141592653589793) |
---|
| 17 | #endif |
---|
| 18 | #define RADIANS REAL(0.017453292519943295) |
---|
| 19 | #define PI_E8 REAL(3.141592653589793e8) |
---|
| 20 | |
---|
| 21 | real f(real qx, real qy, real sub, real radius_a, real radius_b, real scale, real axis_theta, real axis_phi); |
---|
| 22 | real f(real qx, real qy, real sub, real radius_a, real radius_b, real scale, real axis_theta, real axis_phi) |
---|
| 23 | { |
---|
| 24 | real ret = 0; |
---|
| 25 | const real q = sqrt(qx*qx + qy*qy); |
---|
| 26 | const real theta = axis_theta*RADIANS; |
---|
| 27 | const real cos_val = cos(theta)*cos(axis_phi*RADIANS)*(qx/q) + sin(theta)*(qy/q); |
---|
| 28 | |
---|
| 29 | const real arg = q*radius_b*sqrt(1.0+(cos_val*cos_val*(((radius_a*radius_a/(radius_b*radius_b))-1.0)))); |
---|
| 30 | if(arg == 0.0){ |
---|
| 31 | ret = 1.0/3.0; |
---|
| 32 | } |
---|
| 33 | else{ |
---|
| 34 | ret = (sin(arg)-arg*cos(arg))/(arg*arg*arg); |
---|
| 35 | } |
---|
| 36 | ret*=ret*9.0*sub*sub*4.0/3.0*acos(-1.0)*radius_b*radius_b*radius_a*(1.0e8); |
---|
| 37 | |
---|
| 38 | return radius_a*scale*ret*pow(radius_b, 2); |
---|
| 39 | } |
---|
| 40 | |
---|
| 41 | kernel void EllipsoidKernel(global const real *qx, global const real *qy, global real *result, |
---|
| 42 | #ifdef USE_OPENCL |
---|
| 43 | global real *loops_g, |
---|
| 44 | #else |
---|
| 45 | const int Nq, |
---|
| 46 | #endif |
---|
| 47 | local real *loops, const real cutoff, |
---|
| 48 | const real scale, const real background, |
---|
| 49 | const real sub, |
---|
| 50 | const int Nradius_a, const int Nradius_b, const int Ntheta, const int Nphi) |
---|
| 51 | { |
---|
| 52 | #ifdef USE_OPENCL |
---|
| 53 | // copy loops info to local memory |
---|
| 54 | event_t e = async_work_group_copy(loops, loops_g, (Nradius_a+Nradius_b+Ntheta+Nphi)*2, 0); |
---|
| 55 | wait_group_events(1, &e); |
---|
| 56 | |
---|
| 57 | int i = get_global_id(0); |
---|
| 58 | int count = get_global_size(0); |
---|
| 59 | |
---|
| 60 | if(i < count) |
---|
| 61 | #else |
---|
| 62 | #pragma omp parallel for |
---|
| 63 | for (int i=0; i<Nq; i++) |
---|
| 64 | #endif |
---|
| 65 | { |
---|
| 66 | const real qxi=qx[i]; |
---|
| 67 | const real qyi=qy[i]; |
---|
| 68 | real ret=REAL(0.0), norm=REAL(0.0), norm_vol=REAL(0.0), vol=REAL(0.0); |
---|
| 69 | for (int ai=0; ai < Nradius_a; ai++) { |
---|
| 70 | const real rav = loops[2*ai]; |
---|
| 71 | const real raw = loops[2*ai+1]; |
---|
| 72 | for (int bi=0; bi < Nradius_b; bi++) { |
---|
| 73 | const real rbv = loops[2*(bi+Nradius_a)]; |
---|
| 74 | const real rbw = loops[2*(bi+Nradius_a)+1]; |
---|
| 75 | for (int thi=0; thi < Ntheta; thi++) { |
---|
| 76 | const real thv = loops[2*(thi+Nradius_a+Nradius_b)]; |
---|
| 77 | const real thw = loops[2*(thi+Nradius_a+Nradius_b)+1]; |
---|
| 78 | // #pragma unroll |
---|
| 79 | for (int phi=0; phi < Nphi; phi++) { |
---|
| 80 | const real phv = loops[2*(phi+Nradius_a+Nradius_b+Ntheta)]; |
---|
| 81 | const real phw = loops[2*(phi+Nradius_a+Nradius_b+Ntheta)+1]; |
---|
| 82 | |
---|
| 83 | const real weight = raw*rbw*thw*phw; |
---|
| 84 | //ret += qxi + qyi + sub + rav + rbv + weight + thv + phv; |
---|
| 85 | if (weight > cutoff) { |
---|
| 86 | ret += f(qxi, qyi, sub, rav, rbv, weight, thv, phv); |
---|
| 87 | norm += weight; |
---|
[a953943] | 88 | vol += raw*rbw*rav*rbv*rbv; |
---|
| 89 | norm_vol += raw*rbw; |
---|
[ae7d639] | 90 | } |
---|
| 91 | } |
---|
| 92 | } |
---|
| 93 | } |
---|
| 94 | } |
---|
| 95 | //if (Ntheta>1) norm = norm/(M_PI/2); |
---|
| 96 | if (vol != REAL(0.0) && norm_vol != REAL(0.0)) { |
---|
| 97 | ret *= norm_vol/vol; |
---|
| 98 | } |
---|
| 99 | result[i] = scale*ret/norm+background; |
---|
| 100 | } |
---|
| 101 | } |
---|