-                      r8a20be5
+                      r8faffcd
 import pyopencl as cl
 from weights import GaussianDispersion
+from sasmodel import card
+def set_precision(src, qx, qy, dtype):
+    qx = np.ascontiguousarray(qx, dtype=dtype)
+    qy = np.ascontiguousarray(qy, dtype=dtype)
+    if np.dtype(dtype) == np.dtype('float32'):
+        header = """\
+#define real float
+"""
+    else:
+        header = """\
+#pragma OPENCL EXTENSION cl_khr_fp64: enable
+#define real double
+"""
+    return header+src, qx, qy
 class GpuCapCylinder(object):
 …
     PD_PARS = ['rad_cyl', 'length', 'rad_cap', 'theta', 'phi']
     def __init__(self, qx, qy):
+    def __init__(self, qx, qy, dtype='float32'):
-        self.qx = np.asarray(qx, np.float32)
-        self.qy = np.asarray(qy, np.float32)
         #create context, queue, and build program
+        self.ctx = cl.create_some_context()
+        self.queue = cl.CommandQueue(self.ctx)
+        ctx,_queue = card()
+        trala = open('NR_BessJ1.cpp').read()+"\n"+open('Capcyl_Kfun.cpp').read()+"\n"+open('Kernel-Cylinder.cpp').read()
+        src, qx, qy = set_precision(trala, qx, qy, dtype=dtype)
         self.prg = cl.Program(self.ctx, open('Kernel-CapCyl.cpp').read()).build()
+        self.prg = cl.Program(ctx, open('Kernel-CapCyl.cpp').read()).build()
         #buffers
         mf = cl.mem_flags
         self.qx_b = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qx)
         self.qy_b = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qy)
         self.res_b = cl.Buffer(self.ctx, mf.WRITE_ONLY, qx.nbytes)
+        self.qx_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qx)
+        self.qy_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qy)
+        self.res_b = cl.Buffer(ctx, mf.WRITE_ONLY, qx.nbytes)
         self.res = np.empty_like(self.qx)
         self.vol_i = float(0.0)
         self.vol_b = cl.Buffer(self.ctx, mf.WRITE_ONLY, self.vol_i.nbytes)
+        self.vol_b = cl.Buffer(ctx, mf.WRITE_ONLY, self.vol_i.nbytes)
     def eval(self, pars):
+        _ctx,queue = card()
         rad_cyl,length,rad_cap,theta,phi = \
             [GaussianDispersion(int(pars[base+'_pd_n']), pars[base+'_pd'], pars[base+'_pd_nsigma'])
 …
                         for l in xrange(len(phi.weight)):
                             self.prg.CapCylinderKernel(self.queue, self.qx.shape, None, self.qx_b, self.qy_b, self.res_b,
+                            self.prg.CapCylinderKernel(queue, self.qx.shape, None, self.qx_b, self.qy_b, self.res_b,
                                         self.vol_b, np.float32(rad_cyl.value[i]), np.float32(rad_cap.value[m]), np.float32(length.value[j]),
                                         np.float32(theta.value[k]), np.float32(phi.value[l]), np.float32(sub), np.float32(pars['scale']),
 …
                                         np.float32(rad_cyl.weight[i]), np.float32(length.weight[j]), np.uint32(self.qx.size), np.uint32(size))
                             cl.enqueue_copy(self.queue, self.res, self.res_b)
                             cl.enqueue_copy(self.queue, self.vol_i, self.vol_b)
+                            cl.enqueue_copy(queue, self.res, self.res_b)
+                            cl.enqueue_copy(queue, self.vol_i, self.vol_b)
                             sum += self.res

Note: See TracChangeset for help on using the changeset viewer.

SasView

Changeset 8faffcd in sasmodels for capcylcope.py

Legend:

capcylcope.py

Download in other formats: