1 | #ifdef __OPENCL_VERSION__ |
---|
2 | # define USE_OPENCL |
---|
3 | #endif |
---|
4 | |
---|
5 | // If opencl is not available |
---|
6 | #ifndef USE_OPENCL |
---|
7 | # include <math.h> |
---|
8 | # define REAL(x) (x) |
---|
9 | # define real double |
---|
10 | # define global |
---|
11 | # define local |
---|
12 | # define kernel extern "C" |
---|
13 | # include "NR_BessJ1.cpp" |
---|
14 | #endif |
---|
15 | |
---|
16 | #ifndef M_PI |
---|
17 | # define M_PI REAL(3.141592653589793) |
---|
18 | #endif |
---|
19 | #define RADIANS REAL(0.017453292519943295) |
---|
20 | #define PI_E8 REAL(3.141592653589793e8) |
---|
21 | real f(real qx, real qy, real sub, real rr, real h, real scale, real cyl_theta, real cyl_phi); |
---|
22 | real f(real qx, real qy, real sub, real rr, real h, real scale, real cyl_theta, real cyl_phi) |
---|
23 | { |
---|
24 | const real qq = sqrt(qx*qx+qy*qy); |
---|
25 | |
---|
26 | const real theta = cyl_theta*RADIANS; |
---|
27 | const real cos_val = cos(theta)*cos(cyl_phi*RADIANS)*(qx/qq) + sin(theta)*(qy/qq); |
---|
28 | |
---|
29 | real alpha = acos(cos_val); |
---|
30 | if(alpha == REAL(0.0)) { |
---|
31 | alpha = REAL(1.0e-26); |
---|
32 | } |
---|
33 | const real sin_alpha = sin(alpha); |
---|
34 | const real besarg = qq*rr*sin_alpha; |
---|
35 | const real siarg = qq*h/2*cos(alpha); |
---|
36 | |
---|
37 | real be, si; |
---|
38 | if (besarg == REAL(0.0)) { |
---|
39 | be = sin_alpha; |
---|
40 | } else { |
---|
41 | const real bj = NR_BessJ1(besarg)/besarg; |
---|
42 | be = REAL(4.0)*bj*bj*sin_alpha; |
---|
43 | } |
---|
44 | if (siarg == REAL(0.0)) { |
---|
45 | si = REAL(1.0); |
---|
46 | } else { |
---|
47 | si = sin(siarg)/siarg; |
---|
48 | } |
---|
49 | const real form = be*si*si/sin_alpha; |
---|
50 | return PI_E8*scale*form*sub*sub*rr*rr*rr*rr*h*h; |
---|
51 | } |
---|
52 | |
---|
53 | kernel void CylinderKernel(global const real *qx, global const real *qy, global real *result, |
---|
54 | #ifdef USE_OPENCL |
---|
55 | global real *loops_g, |
---|
56 | #else |
---|
57 | const int Nq, |
---|
58 | #endif |
---|
59 | local real *loops, const real cutoff, |
---|
60 | const real scale, const real background, |
---|
61 | const real sub, |
---|
62 | const int Nradius, const int Nlength, const int Ntheta, const int Nphi) |
---|
63 | { |
---|
64 | #ifdef USE_OPENCL |
---|
65 | // copy loops info to local memory |
---|
66 | event_t e = async_work_group_copy(loops, loops_g, (Nradius+Nlength+Ntheta+Nphi)*2, 0); |
---|
67 | wait_group_events(1, &e); |
---|
68 | |
---|
69 | int i = get_global_id(0); |
---|
70 | int count = get_global_size(0); |
---|
71 | |
---|
72 | if(i < count) |
---|
73 | #else |
---|
74 | #pragma omp parallel for |
---|
75 | for (int i=0; i<Nq; i++) |
---|
76 | #endif |
---|
77 | { |
---|
78 | const real qxi=qx[i]; |
---|
79 | const real qyi=qy[i]; |
---|
80 | real ret=REAL(0.0), norm=REAL(0.0), norm_vol=REAL(0.0), vol=REAL(0.0); |
---|
81 | for (int ri=0; ri < Nradius; ri++) { |
---|
82 | const real rv = loops[2*ri]; |
---|
83 | const real rw = loops[2*ri+1]; |
---|
84 | for (int li=0; li < Nlength; li++) { |
---|
85 | const real lv = loops[2*(li+Nradius)]; |
---|
86 | const real lw = loops[2*(li+Nradius)+1]; |
---|
87 | for (int thi=0; thi < Ntheta; thi++) { |
---|
88 | const real thv = loops[2*(thi+Nradius+Nlength)]; |
---|
89 | const real thw = loops[2*(thi+Nradius+Nlength)+1]; |
---|
90 | // #pragma unroll |
---|
91 | for (int phi=0; phi < Nphi; phi++) { |
---|
92 | const real phv = loops[2*(phi+Nradius+Nlength+Ntheta)]; |
---|
93 | const real phw = loops[2*(phi+Nradius+Nlength+Ntheta)+1]; |
---|
94 | |
---|
95 | const real weight = rw*lw*thw*phw; |
---|
96 | //ret += qxi + qyi + sub + rv + lv + weight + thv + phv; |
---|
97 | if (weight > cutoff) { |
---|
98 | ret += f(qxi, qyi, sub, rv, lv, weight, thv, phv); |
---|
99 | norm += weight; |
---|
100 | vol += rw*lw*rv*rv*lv; |
---|
101 | norm_vol += rw*lw; |
---|
102 | } |
---|
103 | } |
---|
104 | } |
---|
105 | } |
---|
106 | } |
---|
107 | //if (Ntheta>1) norm = norm/(M_PI/2); |
---|
108 | if (vol != REAL(0.0) && norm_vol != REAL(0.0)) { |
---|
109 | ret *= norm_vol/vol; |
---|
110 | } |
---|
111 | result[i] = scale*ret/norm+background; |
---|
112 | } |
---|
113 | } |
---|