Changeset 8a21ba3 in sasmodels
- Timestamp:
- Jul 11, 2014 3:40:30 PM (11 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 09e15be
- Parents:
- 496b252
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
Kernel-CapCyl.cpp
r2de9a5e r8a21ba3 1 __kernel void CapCylinderKernel(__global const float *qx, __global const float *qy, __global float *_ptvalue, __global float*vol_i,2 const float rad_cyl, const float rad_cap, const float length, const float thet, const float ph, const floatsub,3 const float scale, const float phi_weight, const float theta_float, const float rad_cap_weight, const floatrad_cyl_weight,4 const float length_weight, const int total, const int size, __const float Gauss76Wt, __const floatGauss76Z)1 __kernel void CapCylinderKernel(__global const real *qx, __global const real *qy, __global real *_ptvalue, __global real *vol_i, 2 const real rad_cyl, const real rad_cap, const real length, const real thet, const real ph, const real sub, 3 const real scale, const real phi_weight, const real theta_float, const real rad_cap_weight, const real rad_cyl_weight, 4 const real length_weight, const int total, const int size, __const real Gauss76Wt, __const real Gauss76Z) 5 5 //ph is phi, sub is sldc-slds, thet is theta 6 6 { … … 8 8 if(i < total) 9 9 { 10 floatq = sqrt(qx[i]*qx[i] + qy[i]*qy[i]);11 floatpi = 4.0*atan(1.0);12 floattheta = thet*pi/180.0;13 floatphi = ph*pi/180.0;14 floatcyl_x = cos(theta)*cos(phi);15 floatcyl_y = sin(theta);16 floatcos_val = cyl_x*qx[i]/q + cyl_y*qy[i]/q;17 floatalpha = acos(cos_val);18 float yyy=0; float ans1=0; float ans2=0; float y=0; float xx=0; float ans=0; float zij=0; float be=0; floatsummj=0;10 real q = sqrt(qx[i]*qx[i] + qy[i]*qy[i]); 11 real pi = 4.0*atan(1.0); 12 real theta = thet*pi/180.0; 13 real phi = ph*pi/180.0; 14 real cyl_x = cos(theta)*cos(phi); 15 real cyl_y = sin(theta); 16 real cos_val = cyl_x*qx[i]/q + cyl_y*qy[i]/q; 17 real alpha = acos(cos_val); 18 real yyy=0; real ans1=0; real ans2=0; real y=0; real xx=0; real ans=0; real zij=0; real be=0; real summj=0; 19 19 20 floathDist = -1.0*sqrt(fabs(rad_cap*rad_cap-rad_cyl*rad_cyl));20 real hDist = -1.0*sqrt(fabs(rad_cap*rad_cap-rad_cyl*rad_cyl)); 21 21 vol_i[i] = pi*rad_cyl*rad_cyl*length+2.0*pi/3.0*((rad_cap-hDist)*(rad_cap-hDist)*(2*rad_cap+hDist)); 22 floatvaj = -1.0*hDist/rad_cap;22 real vaj = -1.0*hDist/rad_cap; 23 23 24 24 for(int j=0;j<76;j++) //the 76 corresponds to the Gauss constants … … 28 28 summj += yyy; 29 29 } 30 floatinner = (1.0-vaj)/2.0*summj*4.0*pi*rad_cap*rad_cap*rad_cap;31 floatarg1 = q*length/2.0*cos(alpha);32 floatarg2 = q*rad_cyl*sin(alpha);30 real inner = (1.0-vaj)/2.0*summj*4.0*pi*rad_cap*rad_cap*rad_cap; 31 real arg1 = q*length/2.0*cos(alpha); 32 real arg2 = q*rad_cyl*sin(alpha); 33 33 yyy = inner; 34 34 … … 44 44 yyy += pi*rad_cyl*rad_cyl*length*sin(arg1)/arg1*2.0*be; 45 45 } 46 floatanswer=yyy*yyy*1.0e8*sub*sub*scale/pi*rad_cyl*rad_cyl*length+2.0*pi*(2.0*rad_cap*rad_cap*rad_cap/3.0+rad_cap*rad_cap*hDist-hDist*hDist*hDist/3.0);46 real answer=yyy*yyy*1.0e8*sub*sub*scale/pi*rad_cyl*rad_cyl*length+2.0*pi*(2.0*rad_cap*rad_cap*rad_cap/3.0+rad_cap*rad_cap*hDist-hDist*hDist*hDist/3.0); 47 47 answer/=sin(alpha); 48 48 -
Kernel-Ellipse.cpp
r5378e40 r8a21ba3 1 __kernel void EllipsoidKernel(const float radius_a_weight, const float radius_b_weight, const floataxis_theta_weight,2 const float axis_phi_weight, const float scale, const float radius_a, const float radius_b, const float sub, const float background, const float axis_theta, const float axis_phi, __global const float*qx,3 __global const float *qy, __global float*_ptvalue, const int length, const int size)1 __kernel void EllipsoidKernel(const real radius_a_weight, const real radius_b_weight, const real axis_theta_weight, 2 const real axis_phi_weight, const real scale, const real radius_a, const real radius_b, const real sub, const real axis_theta, const real axis_phi, __global const real *qx, 3 __global const real *qy, __global real *_ptvalue, const int length, const int size) 4 4 { 5 5 int i = get_global_id(0); 6 6 if(i < length){ 7 float ret = 0; 8 float q = sqrt(qx[i]*qx[i] + qy[i]*qy[i]); 9 float pi = 4.0*atan(1.0); 10 float theta = axis_theta*pi/180.0; 11 float h = axis_phi*pi/180.0; 12 float cyl_x = cos(theta)*cos(h); 13 float cyl_y = sin(theta); 14 float cos_val = cyl_x*(qx[i]/q) + cyl_y*(qy[i]/q); 7 real ret = 0; 8 real q = sqrt(qx[i]*qx[i] + qy[i]*qy[i]); 9 real pi = 4.0*atan(1.0); 10 real theta = axis_theta*pi/180.0; 11 real cyl_x = cos(theta)*cos(axis_phi*pi/180.0); 12 real cyl_y = sin(theta); 13 real cos_val = cyl_x*(qx[i]/q) + cyl_y*(qy[i]/q); 15 14 16 float nu = radius_a/radius_b; 17 float arg = q*radius_b*sqrt(1.0+(cos_val*cos_val*((nu*nu)-1.0))); 15 real arg = q*radius_b*sqrt(1.0+(cos_val*cos_val*(((radius_a*radius_a/(radius_b*radius_b))-1.0)))); 18 16 if(arg == 0.0){ 19 17 ret = 1.0/3.0; … … 22 20 ret = (sin(arg)-arg*cos(arg))/(arg*arg*arg); 23 21 } 24 ret*=ret*9.0*sub*sub; 25 ret*=(4.0/3.0*acos(-1.0)*radius_b*radius_b*radius_a)*scale*(1.0e8); 26 ret+=background; 22 ret*=ret*9.0*sub*sub*4.0/3.0*acos(-1.0)*radius_b*radius_b*radius_a*scale*(1.0e8); 27 23 28 24 _ptvalue[i] = radius_a_weight*radius_b_weight*axis_theta_weight*radius_a*axis_phi_weight*ret*pow(radius_b, 2); 29 30 25 if(size > 1){ 31 26 _ptvalue[i] *= fabs(cos(axis_theta*pi/180.0)); -
code_capcyl.py
r2de9a5e r8a21ba3 66 66 sum, norm, norm_vol, vol = 0.0, 0.0, 0.0, 0.0 67 67 size = len(theta.weight) 68 sub = pars['sld_capcyl']-np.float32(['sld_solv']) 68 sub = pars['sld_capcyl']-pars['sld_solv'] 69 real = np.float32 if self.qx.dtype == np.dtype('float32') else np.float64 69 70 70 71 for i in xrange(len(rad_cyl.weight)): … … 75 76 76 77 self.prg.CapCylinderKernel(queue, self.qx.shape, None, self.qx_b, self.qy_b, self.res_b, 77 self.vol_b, np.float32(rad_cyl.value[i]), np.float32(rad_cap.value[m]), np.float32(length.value[j]),78 np.float32(theta.value[k]), np.float32(phi.value[l]), np.float32(sub), np.float32(pars['scale']),79 np.float32(phi.weight[l]), np.float32(theta.weight[k]), np.float32(rad_cap.weight[m]),80 np.float32(rad_cyl.weight[i]), np.float32(length.weight[j]), np.uint32(self.qx.size), np.uint32(size),78 self.vol_b, real(rad_cyl.value[i]), real(rad_cap.value[m]), real(length.value[j]), 79 real(theta.value[k]), real(phi.value[l]), real(sub), real(pars['scale']), 80 real(phi.weight[l]), real(theta.weight[k]), real(rad_cap.weight[m]), 81 real(rad_cyl.weight[i]), real(length.weight[j]), np.uint32(self.qx.size), np.uint32(size), 81 82 self.Gauss76W_b, self.Gauss76Z_b) 82 83 -
code_cylinder.py
r2de9a5e r8a21ba3 68 68 for k in xrange(len(cyl_theta.weight)): 69 69 for l in xrange(len(cyl_phi.weight)): 70 71 72 70 self.prg.CylinderKernel(queue, self.qx.shape, None, self.qx_b, self.qy_b, self.res_b, real(sub), 73 71 real(radius.value[i]), real(length.value[j]), real(pars['scale']), -
code_ellipse.py
r2de9a5e r8a21ba3 6 6 import pyopencl as cl 7 7 from weights import GaussianDispersion 8 from sasmodel import card 8 9 10 def set_precision(src, qx, qy, dtype): 11 qx = np.ascontiguousarray(qx, dtype=dtype) 12 qy = np.ascontiguousarray(qy, dtype=dtype) 13 if np.dtype(dtype) == np.dtype('float32'): 14 header = """\ 15 #define real float 16 """ 17 else: 18 header = """\ 19 #pragma OPENCL EXTENSION cl_khr_fp64: enable 20 #define real double 21 """ 22 return header+src, qx, qy 9 23 10 24 class GpuEllipse(object): … … 13 27 } 14 28 PD_PARS = ['radius_a', 'radius_b', 'axis_theta', 'axis_phi'] 15 def __init__(self, qx, qy):16 29 17 self.qx = np.asarray(qx, np.float32)18 self.qy = np.asarray(qy, np.float32) 19 #create context, queue, and build program20 s elf.ctx = cl.create_some_context()21 self. queue = cl.CommandQueue(self.ctx)22 self. prg = cl.Program(self.ctx, open('Kernel-Ellipse.cpp').read()).build()30 def __init__(self, qx, qy, dtype='float32'): 31 32 ctx,_queue = card() 33 src, qx, qy = set_precision(open('Kernel-Ellipse.cpp').read(), qx, qy, dtype=dtype) 34 self.prg = cl.Program(ctx, src).build() 35 self.qx, self.qy = qx, qy 23 36 24 37 #buffers 25 38 mf = cl.mem_flags 26 self.qx_b = cl.Buffer( self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qx)27 self.qy_b = cl.Buffer( self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qy)28 self.res_b = cl.Buffer( self.ctx, mf.WRITE_ONLY, qx.nbytes)39 self.qx_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qx) 40 self.qy_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qy) 41 self.res_b = cl.Buffer(ctx, mf.WRITE_ONLY, qx.nbytes) 29 42 self.res = np.empty_like(self.qx) 30 43 31 44 def eval(self, pars): 32 45 #b_n = radius_b # want, a_n = radius_a # want, etc 46 _ctx,queue = card() 33 47 radius_a, radius_b, axis_theta, axis_phi = \ 34 48 [GaussianDispersion(int(pars[base+'_pd_n']), pars[base+'_pd'], pars[base+'_pd_nsigma']) … … 43 57 sum, norm, norm_vol, vol = 0.0, 0.0, 0.0, 0.0 44 58 size = len(axis_theta.weight) 45 sub = pars['sldEll'] - pars['sldSolv'] 59 sub = pars['sldEll'] - pars['sldSolv'] 60 real = np.float32 if self.qx.dtype == np.dtype('float32') else np.float64 46 61 47 62 #Loop over radius weight points … … 54 69 for l in xrange(len(axis_phi.weight)): 55 70 #call the kernel 56 self.prg.EllipsoidKernel( self.queue, self.qx.shape, None, np.float32(radius_a.weight[i]),57 np.float32(radius_b.weight[j]), np.float32(axis_theta.weight[k]),58 np.float32(axis_phi.weight[l]), np.float32(pars['scale']), np.float32(radius_a.value[i]),59 np.float32(radius_b.value[j]), np.float32(sub),np.float32(pars['background']),60 np.float32(axis_theta.value[k]), np.float32(axis_phi.value[l]), self.qx_b, self.qy_b,61 self.res_b,np.uint32(self.qx.size), np.uint32(len(axis_theta.weight)))71 self.prg.EllipsoidKernel(queue, self.qx.shape, None, real(radius_a.weight[i]), 72 real(radius_b.weight[j]), real(axis_theta.weight[k]), 73 real(axis_phi.weight[l]), real(pars['scale']), real(radius_a.value[i]), 74 real(radius_b.value[j]), real(sub), real(axis_theta.value[k]), 75 real(axis_phi.value[l]), self.qx_b, self.qy_b, self.res_b, 76 np.uint32(self.qx.size), np.uint32(len(axis_theta.weight))) 62 77 #copy result back from buffer 63 cl.enqueue_copy( self.queue, self.res, self.res_b)78 cl.enqueue_copy(queue, self.res, self.res_b) 64 79 sum += self.res 65 80 vol += radius_a.weight[i]*radius_b.weight[j]*pow(radius_b.value[j], 2)*radius_a.value[i] -
compare.py
r8a20be5 r8a21ba3 38 38 return theory 39 39 40 def demo(N=1):40 def cyl(N=1): 41 41 import sys 42 42 import matplotlib.pyplot as plt 43 import numpy as np44 43 45 44 if len(sys.argv) > 1: … … 61 60 cpu_time = toc()*1000./N 62 61 63 from c ylcodeimport GpuCylinder62 from code_cylinder import GpuCylinder 64 63 model = SasModel(data, GpuCylinder, dtype='f', **pars) 64 tic() 65 for i in range(N): 66 gpu = model.theory() 67 gpu_time = toc()*1000./N 68 69 relerr = (gpu - cpu)/cpu 70 print "max(|(ocl-omp)/ocl|)", max(abs(relerr)) 71 print "omp t=%.1f ms"%cpu_time 72 print "ocl t=%.1f ms"%gpu_time 73 74 plt.subplot(131); plot_data(data, cpu); plt.title("omp t=%.1f ms"%cpu_time) 75 plt.subplot(132); plot_data(data, gpu); plt.title("ocl t=%.1f ms"%gpu_time) 76 plt.subplot(133); plot_data(data, 1e8*relerr); plt.title("relerr x 10^8"); plt.colorbar() 77 plt.show() 78 79 def ellipse(N=1): 80 import sys 81 import matplotlib.pyplot as plt 82 83 if len(sys.argv) > 1: 84 N = int(sys.argv[1]) 85 data = load_data('JUN03289.DAT') 86 set_beam_stop(data, 0.004) 87 88 pars = dict(scale=.027, radius_a=60, radius_b=180, sldEll=.297e-6, sldSolv=5.773e-6, background=4.9, 89 axis_theta=0, axis_phi=90, radius_a_pd=0.1, radius_a_pd_n=10, radius_a_pd_nsigma=3, radius_b_pd=0.1, radius_b_pd_n=10, 90 radius_b_pd_nsigma=3, axis_theta_pd=0.1, axis_theta_pd_n=6, axis_theta_pd_nsigma=3, axis_phi_pd=0.1, 91 axis_phi_pd_n=6, axis_phi_pd_nsigma=3,) 92 93 model = sasview_model('ellipsoid', **pars) 94 tic() 95 for i in range(N): 96 cpu = sasview_eval(model, data) 97 cpu_time = toc()*1000./N 98 99 from code_ellipse import GpuEllipse 100 model = SasModel(data, GpuEllipse, dtype='f', **pars) 65 101 tic() 66 102 for i in range(N): … … 80 116 81 117 if __name__ == "__main__": 82 demo()118 ellipse() 83 119 84 120 -
fit.py
r496b252 r8a21ba3 25 25 """ 26 26 27 """ 27 28 28 model = SasModel(data, GpuEllipse, scale=.027, radius_a=60, radius_b=180, sldEll=.297e-6, sldSolv=5.773e-6, background=4.9, 29 29 axis_theta=0, axis_phi=90, radius_a_pd=0.1, radius_a_pd_n=10, radius_a_pd_nsigma=3, radius_b_pd=0.1, radius_b_pd_n=10, 30 30 radius_b_pd_nsigma=3, axis_theta_pd=0.1, axis_theta_pd_n=6, axis_theta_pd_nsigma=3, axis_phi_pd=0.1, 31 31 axis_phi_pd_n=6, axis_phi_pd_nsigma=3, dtype='float') 32 """ 32 33 33 34 34 """ … … 37 37 """ 38 38 39 39 """ 40 40 model = SasModel(data, GpuCoreShellCylinder, scale=1, radius=64.1, thickness=1, length=266.96, core_sld=1e-6, shell_sld=4e-6, 41 41 solvent_sld=1e-6, background=0, axis_theta=0, axis_phi=0, radius_pd=0.1, radius_pd_n=10, radius_pd_nsigma=3, … … 43 43 axis_theta_pd=0.1, axis_theta_pd_n=2, axis_theta_pd_nsigma=3, axis_phi_pd=0.1, axis_phi_pd_n=2, 44 44 axis_phi_pd_nsigma=3, dtype='float') 45 45 """ 46 46 47 47
Note: See TracChangeset
for help on using the changeset viewer.