[a953943] | 1 | #!/usr/bin/env python |
---|
| 2 | # -*- coding: utf-8 -*- |
---|
| 3 | |
---|
| 4 | |
---|
| 5 | import ctypes |
---|
| 6 | from ctypes import c_int, c_double, c_void_p |
---|
| 7 | import numpy as np |
---|
| 8 | import pyopencl as cl |
---|
| 9 | from pyopencl import mem_flags as mf |
---|
| 10 | |
---|
| 11 | from weights import GaussianDispersion |
---|
| 12 | from sasmodel import card, set_precision, set_precision_1d, tic, toc |
---|
| 13 | |
---|
| 14 | class GpuCoreShellCylinder(object): |
---|
| 15 | PARS = {'scale':1, 'radius':1, 'thickness':1, 'length':1, 'core_sld':1e-6, 'shell_sld':-1e-6, 'solvent_sld':0, |
---|
| 16 | 'background':0, 'axis_theta':0, 'axis_phi':0} |
---|
| 17 | PD_PARS = ['radius', 'length', 'thickness', 'axis_phi', 'axis_theta'] |
---|
| 18 | |
---|
| 19 | def __init__(self, qx, qy, dtype='float32', cutoff=1e-5): |
---|
| 20 | #create context, queue, and build program |
---|
| 21 | ctx,_queue = card() |
---|
| 22 | src, qx, qy = set_precision(open('Kernel/NR_BessJ1.cpp').read()+"\n"+open('Kernel/Kernel-CoreShellCylinder_f.cpp').read(), qx, qy, dtype=dtype) |
---|
| 23 | self.prg = cl.Program(ctx, src).build() |
---|
| 24 | self.qx, self.qy = qx, qy |
---|
| 25 | self.cutoff = cutoff |
---|
| 26 | |
---|
| 27 | self.qx_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qx) |
---|
| 28 | self.qy_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qy) |
---|
| 29 | self.res_b = cl.Buffer(ctx, mf.READ_WRITE, self.qx.nbytes) |
---|
| 30 | self.res = np.empty_like(self.qx) |
---|
| 31 | |
---|
| 32 | def eval(self, pars): |
---|
| 33 | tic() |
---|
| 34 | |
---|
| 35 | ctx,queue = card() |
---|
| 36 | real = np.float32 if self.qx.dtype == np.dtype('float32') else np.float64 |
---|
| 37 | loops, loop_lengths = make_loops(pars, dtype=self.qx.dtype) |
---|
| 38 | loops_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) |
---|
| 39 | loops_l = cl.LocalMemory(len(loops.data)) |
---|
| 40 | |
---|
| 41 | self.prg.CoreShellCylinderKernel(queue, self.qx.shape, None, |
---|
| 42 | self.qx_b, self.qy_b, self.res_b, |
---|
| 43 | loops_b, loops_l, real(self.cutoff), |
---|
| 44 | real(pars['scale']), real(pars['background']), |
---|
| 45 | real(pars['shell_sld']-pars['solvent_sld']), |
---|
| 46 | real(pars['core_sld']-pars['shell_sld']), |
---|
| 47 | *[np.uint32(pn) for pn in loop_lengths]) |
---|
| 48 | |
---|
| 49 | cl.enqueue_copy(queue, self.res, self.res_b) |
---|
| 50 | print toc()*1000, self.qx.shape[0] |
---|
| 51 | |
---|
| 52 | return self.res |
---|
| 53 | |
---|
| 54 | class CPUCoreShellCylinder(GpuCoreShellCylinder): |
---|
| 55 | def __init__(self, qx, qy, dtype='float32', cutoff=1e-5): |
---|
| 56 | self.qx, self.qy = [np.ascontiguousarray(v,'d') for v in qx,qy] |
---|
| 57 | self.cutoff = cutoff |
---|
| 58 | self.res = np.empty_like(self.qx) |
---|
| 59 | self.dll = ctypes.CDLL('Kernel/coreshellcylinder.so') |
---|
| 60 | self.fn = self.dll['CoreShellCylinderKernel'] |
---|
| 61 | self.fn.argtypes = [ |
---|
| 62 | c_void_p, c_void_p, c_void_p, c_int, |
---|
| 63 | c_void_p, c_double, c_double, c_double, c_double, c_double, |
---|
| 64 | c_int, c_int, c_int, c_int, c_int |
---|
| 65 | ] |
---|
| 66 | def eval(self, pars): |
---|
| 67 | loops, loop_lengths = make_loops(pars, dtype=self.qx.dtype) |
---|
| 68 | self.fn(self.qx.ctypes.data, self.qy.ctypes.data, self.res.ctypes.data, len(self.qx), |
---|
| 69 | loops.ctypes.data, self.cutoff, pars['scale'], pars['background'], |
---|
| 70 | pars['shell_sld']-pars['solvent_sld'], pars['core_sld']-pars['shell-sld'], *loop_lengths) |
---|
| 71 | |
---|
| 72 | return self.res |
---|
| 73 | |
---|
| 74 | |
---|
| 75 | def make_loops(pars, dtype='double'): |
---|
| 76 | # 0.2 ms on sparkle to form the final loops |
---|
| 77 | radius, length, thickness, axis_theta, axis_phi = \ |
---|
| 78 | [GaussianDispersion(int(pars[base+'_pd_n']), pars[base+'_pd'], pars[base+'_pd_nsigma']) |
---|
| 79 | for base in GpuCoreShellCylinder.PD_PARS] |
---|
| 80 | parts = [ |
---|
| 81 | radius.get_weights(pars['radius'], 0, 10000, True), |
---|
| 82 | length.get_weights(pars['length'], 0, 10000, True), |
---|
| 83 | thickness.get_weights(pars['thickness'], 0, 10000, True), |
---|
| 84 | axis_theta.get_weights(pars['axis_theta'], -np.inf, np.inf, False), |
---|
| 85 | axis_phi.get_weights(pars['axis_phi'], -np.inf, np.inf, False), |
---|
| 86 | ] |
---|
| 87 | # Make sure that weights are normalized to peaks at 1 so that |
---|
| 88 | # the tolerance term can be used properly on truncated distributions |
---|
| 89 | loops = np.hstack((v,w/w.max()) for v,w in parts) |
---|
| 90 | #loops = np.hstack(parts) |
---|
| 91 | loops = np.ascontiguousarray(loops.T, dtype).flatten() |
---|
| 92 | return loops, [len(p[0]) for p in parts] |
---|