Changeset a42fec0 in sasmodels for Models/code_cylinder.py


Ignore:
Timestamp:
Aug 4, 2014 5:20:07 PM (10 years ago)
Author:
HMP1 <helen.park@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
8cdb9f1
Parents:
099e053
Message:

Speed-up of 3X, compare.py working

File:
1 edited

Legend:

Unmodified
Added
Removed
  • Models/code_cylinder.py

    rca6c007 ra42fec0  
    66 
    77from weights import GaussianDispersion 
    8 from sasmodel import card 
     8from sasmodel import card, set_precision, set_precision_1d 
    99 
    10  
    11 def set_precision(src, qx, qy, dtype): 
    12     qx = np.ascontiguousarray(qx, dtype=dtype) 
    13     qy = np.ascontiguousarray(qy, dtype=dtype) 
    14     if np.dtype(dtype) == np.dtype('float32'): 
    15         header = """\ 
    16 #define real float 
    17 """ 
    18     else: 
    19         header = """\ 
    20 #pragma OPENCL EXTENSION cl_khr_fp64: enable 
    21 #define real double 
    22 """ 
    23     return header+src, qx, qy 
    24  
    25 def set_precision_1d(src, q, dtype): 
    26     q = np.ascontiguousarray(q, dtype=dtype) 
    27     if np.dtype(dtype) == np.dtype('float32'): 
    28         header = """\ 
    29 #define real float 
    30 """ 
    31     else: 
    32         header = """\ 
    33 #pragma OPENCL EXTENSION cl_khr_fp64: enable 
    34 #define real double 
    35 """ 
    36     return header+src, q 
    3710 
    3811class GpuCylinder(object): 
     
    5528        self.qx_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qx) 
    5629        self.qy_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.qy) 
    57         self.res_b = cl.Buffer(ctx, mf.WRITE_ONLY, qx.nbytes) 
     30        self.res_b = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, self.qx.nbytes) 
    5831        self.res = np.empty_like(self.qx) 
    5932 
     
    6134 
    6235        _ctx,queue = card() 
     36        self.res[:] = 0 
     37        cl.enqueue_copy(queue, self.res_b, self.res) 
    6338        radius, length, cyl_theta, cyl_phi = \ 
    6439            [GaussianDispersion(int(pars[base+'_pd_n']), pars[base+'_pd'], pars[base+'_pd_nsigma']) 
     
    6843        radius.value, radius.weight = radius.get_weights(pars['radius'], 0, 10000, True) 
    6944        length.value, length.weight = length.get_weights(pars['length'], 0, 10000, True) 
    70         cyl_theta.value, cyl_theta.weight = cyl_theta.get_weights(pars['cyl_theta'], -90, 180, False) 
    71         cyl_phi.value, cyl_phi.weight = cyl_phi.get_weights(pars['cyl_phi'], -90, 180, False) 
     45        cyl_theta.value, cyl_theta.weight = cyl_theta.get_weights(pars['cyl_theta'], -np.inf, np.inf, False) 
     46        cyl_phi.value, cyl_phi.weight = cyl_phi.get_weights(pars['cyl_phi'], -np.inf, np.inf, False) 
    7247 
    7348        #Perform the computation, with all weight points 
     
    8762                                           real(cyl_phi.weight[l]), real(cyl_theta.value[k]), real(cyl_phi.value[l]), 
    8863                                           np.uint32(self.qx.size), np.uint32(size)) 
    89                         cl.enqueue_copy(queue, self.res, self.res_b) 
    90                         sum += self.res 
     64 
    9165                        vol += radius.weight[i]*length.weight[j]*pow(radius.value[i], 2)*length.value[j] 
    9266                        norm_vol += radius.weight[i]*length.weight[j] 
     
    9569       # if size > 1: 
    9670        #    norm /= math.asin(1.0) 
     71        cl.enqueue_copy(queue, self.res, self.res_b) 
     72        sum = self.res 
    9773        if vol != 0.0 and norm_vol != 0.0: 
    9874            sum *= norm_vol/vol 
Note: See TracChangeset for help on using the changeset viewer.