1 | #ifdef __OPENCL_VERSION__ |
---|
2 | # define USE_OPENCL |
---|
3 | #endif |
---|
4 | |
---|
5 | // If opencl is not available |
---|
6 | #ifndef USE_OPENCL |
---|
7 | # include <math.h> |
---|
8 | # define REAL(x) (x) |
---|
9 | # define real double |
---|
10 | # define global |
---|
11 | # define local |
---|
12 | # define kernel extern "C" |
---|
13 | # include "NR_BessJ1.cpp" |
---|
14 | #endif |
---|
15 | |
---|
16 | #ifndef M_PI |
---|
17 | # define M_PI REAL(3.141592653589793) |
---|
18 | #endif |
---|
19 | #define RADIANS REAL(0.017453292519943295) |
---|
20 | #define PI_E8 REAL(3.141592653589793e8) |
---|
21 | |
---|
22 | real f(real qx, real qy, real subone, real subtwo, real radius, real length, real thickness, real weight, real axis_theta, real axis_phi); |
---|
23 | real f(real qx, real qy, real subone, real subtwo, real radius, real length, real thickness, real weight, real axis_theta, real axis_phi) |
---|
24 | { |
---|
25 | const real q = sqrt(qx*qx+qy*qy); |
---|
26 | const real theta = axis_theta*RADIANS; |
---|
27 | real alpha = acos(cos(theta)*cos(axis_phi*RADIANS)*qx/q + sin(theta)*qy/q); |
---|
28 | |
---|
29 | if (alpha == 0.0){ |
---|
30 | alpha = 1.0e-26; |
---|
31 | } |
---|
32 | |
---|
33 | real si1=0; real si2=0; real be1=0; real be2=0; |
---|
34 | |
---|
35 | const real vol2 = M_PI*(radius+thickness)*(radius+thickness)*(length+2.0*thickness); |
---|
36 | |
---|
37 | const real besarg1 = q*radius*sin(alpha); |
---|
38 | const real besarg2 = q*(radius+thickness)*sin(alpha); |
---|
39 | const real sinarg1 = q*length/2.0*cos(alpha); |
---|
40 | const real sinarg2 = q*(length/2.0+thickness)*cos(alpha); |
---|
41 | |
---|
42 | if (besarg1 == 0.0){be1 = 0.5;} |
---|
43 | else{be1 = NR_BessJ1(besarg1)/besarg1;} |
---|
44 | |
---|
45 | if (besarg2 == 0.0){be2 = 0.5;} |
---|
46 | else{be2 = NR_BessJ1(besarg2)/besarg2;} |
---|
47 | |
---|
48 | if (sinarg1 == 0.0){si1 = 1.0;} |
---|
49 | else{si1 = sin(sinarg1)/sinarg1;} |
---|
50 | |
---|
51 | if (sinarg2 == 0.0){si2 = 1.0;} |
---|
52 | else{si2 = sin(sinarg2)/sinarg2;} |
---|
53 | |
---|
54 | const real tt = 2.0*vol2*(subone)*si2*be2+2.0*(M_PI*radius*radius*(length))*(subtwo)*si1*be1; |
---|
55 | |
---|
56 | real answer = (tt*tt)*sin(alpha)/fabs(sin(alpha)); |
---|
57 | //answer *= answer/(PI_E8*(radius+thickness)*(radius+thickness)*(length+2.0*thickness)); |
---|
58 | |
---|
59 | return weight*answer*answer/PI_E8; |
---|
60 | |
---|
61 | } |
---|
62 | |
---|
63 | kernel void CoreShellCylinderKernel(global const real *qx, global const real *qy, global real *result, |
---|
64 | #ifdef USE_OPENCL |
---|
65 | global real *loops_g, |
---|
66 | #else |
---|
67 | const int Nq, |
---|
68 | #endif |
---|
69 | local real *loops, const real cutoff, |
---|
70 | const real scale, const real background, |
---|
71 | const real subone, const real subtwo, |
---|
72 | const int Nradius, const int Nlength, const int Nthick, const int Ntheta, const int Nphi) |
---|
73 | { |
---|
74 | #ifdef USE_OPENCL |
---|
75 | // copy loops info to local memory |
---|
76 | event_t e = async_work_group_copy(loops, loops_g, (Nradius+Nlength+Nthick+Ntheta+Nphi)*2, 0); |
---|
77 | wait_group_events(1, &e); |
---|
78 | |
---|
79 | int i = get_global_id(0); |
---|
80 | int count = get_global_size(0); |
---|
81 | |
---|
82 | if(i < count) |
---|
83 | #else |
---|
84 | #pragma omp parallel for |
---|
85 | for (int i=0; i<Nq; i++) |
---|
86 | #endif |
---|
87 | { |
---|
88 | const real qxi=qx[i]; |
---|
89 | const real qyi=qy[i]; |
---|
90 | real ret=REAL(0.0), norm=REAL(0.0), norm_vol=REAL(0.0), vol=REAL(0.0); |
---|
91 | for (int ri=0; ri < Nradius; ri++) { |
---|
92 | const real rv = loops[2*ri]; |
---|
93 | const real rw = loops[2*ri+1]; |
---|
94 | for (int li=0; li < Nlength; li++) { |
---|
95 | const real lv = loops[2*(li+Nradius)]; |
---|
96 | const real lw = loops[2*(li+Nradius)+1]; |
---|
97 | for (int ti=0; ti < Nthick; ti++) { |
---|
98 | const real tv = loops[2*(ti+Nradius+Nlength)]; ////////////////////////// |
---|
99 | const real tw = loops[2*(ti+Nradius+Nlength)+1]; |
---|
100 | for (int thi=0; thi < Ntheta; thi++) { |
---|
101 | const real thv = loops[2*(thi+Nradius+Nlength+Nthick)]; |
---|
102 | const real thw = loops[2*(thi+Nradius+Nlength+Nthick)+1]; |
---|
103 | // #pragma unroll |
---|
104 | for (int phi=0; phi < Nphi; phi++) { |
---|
105 | const real phv = loops[2*(phi+Nradius+Nlength+Ntheta+Nthick)]; |
---|
106 | const real phw = loops[2*(phi+Nradius+Nlength+Ntheta+Nthick)+1]; |
---|
107 | |
---|
108 | const real weight = rw*lw*tw*thw*phw; |
---|
109 | //ret += qxi + qyi + sub + rv + lv + weight + thv + phv; |
---|
110 | if (weight > cutoff) { |
---|
111 | ret += f(qxi, qyi, subone, subtwo, rv, lv, tv, weight, thv, phv); |
---|
112 | norm += weight; |
---|
113 | vol += rw*lw*tw*(rv+tv)*(rv+tv)*(lv+2.0*tv); |
---|
114 | norm_vol += rw*lw*tw; |
---|
115 | } |
---|
116 | } |
---|
117 | } |
---|
118 | } |
---|
119 | } |
---|
120 | } |
---|
121 | //if (Ntheta>1) norm = norm/(M_PI/2); |
---|
122 | if (vol != REAL(0.0) && norm_vol != REAL(0.0)) { |
---|
123 | ret *= norm_vol/vol; |
---|
124 | } |
---|
125 | result[i] = scale*ret/norm+background; |
---|
126 | |
---|
127 | } |
---|
128 | } |
---|