Changeset 8fb2a94 in sasmodels for explore


Ignore:
Timestamp:
Jan 11, 2018 1:51:11 PM (7 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
c11d09f
Parents:
97be877
Message:

play with numba JIT compiler

File:
1 edited

Legend:

Unmodified
Added
Removed
  • explore/realspace.py

    r97be877 r8fb2a94  
    244244        return values, self._adjust(points) 
    245245 
    246 def calc_Iqxy(qx, qy, rho, points, volume=1, view=(0, 0, 0)): 
    247     x, y, z = points.T 
     246NUMBA = False 
     247if NUMBA: 
     248    from numba import njit 
     249    @njit("f8[:](f8[:],f8[:],f8[:],f8[:],f8[:],f8[:],f8[:])") 
     250    def _Iqxy(values, x, y, z, qa, qb, qc): 
     251        Iq = np.zeros_like(qa) 
     252        for j in range(len(Iq)): 
     253            total = 0. + 0j 
     254            for k in range(len(Iq)): 
     255                total += values[k]*np.exp(1j*(qa[j]*x[k] + qb[j]*y[k] + qc[j]*z[k])) 
     256            Iq[j] = abs(total)**2 
     257        return Iq 
     258 
     259def calc_Iqxy(qx, qy, rho, points, volume=1.0, view=(0, 0, 0)): 
    248260    qx, qy = np.broadcast_arrays(qx, qy) 
    249261    qa, qb, qc = invert_view(qx, qy, view) 
    250262    rho, volume = np.broadcast_arrays(rho, volume) 
    251263    values = rho*volume 
     264    x, y, z = points.T 
    252265 
    253266    # I(q) = |sum V(r) rho(r) e^(1j q.r)|^2 / sum V(r) 
    254     Iq = [abs(np.sum(values*np.exp(1j*(qa_k*x + qb_k*y + qc_k*z))))**2 
    255             for qa_k, qb_k, qc_k in zip(qa.flat, qb.flat, qc.flat)] 
    256     return np.array(Iq).reshape(qx.shape) / np.sum(volume) 
     267    if NUMBA: 
     268        Iq = _Iqxy(values, x, y, z, qa.flatten(), qb.flatten(), qc.flatten()) 
     269    else: 
     270        Iq = [abs(np.sum(values*np.exp(1j*(qa_k*x + qb_k*y + qc_k*z))))**2 
     271              for qa_k, qb_k, qc_k in zip(qa.flat, qb.flat, qc.flat)] 
     272    return np.asarray(Iq).reshape(qx.shape) / np.sum(volume) 
    257273 
    258274def _calc_Pr_nonuniform(r, rho, points): 
     
    275291            print("processing %d of %d"%(k, len(rho)-1)) 
    276292    Pr = extended_Pr[1:-1] 
    277     return Pr / Pr.max() 
    278  
    279 def calc_Pr(r, rho, points): 
    280     # P(r) with uniform steps in r is 3x faster; check if we are uniform 
    281     # before continuing 
    282     if np.max(np.abs(np.diff(r) - r[0])) > r[0]*0.01: 
    283         return _calc_Pr_nonuniform(r, rho, points) 
    284  
     293    return Pr 
     294 
     295def _calc_Pr_uniform(r, rho, points): 
    285296    # Make Pr a little be bigger than necessary so that only distances 
    286297    # min < d < max end up in Pr 
    287     n_max = len(r) 
     298    dr, n_max = r[0], len(r) 
    288299    extended_Pr = np.zeros(n_max+1, 'd') 
    289300    t0 = time.clock() 
    290301    t_next = t0 + 3 
    291     row_zero = np.zeros(len(rho), 'i') 
    292302    for k, rho_k in enumerate(rho[:-1]): 
    293303        distances = sqrt(np.sum((points[k] - points[k+1:])**2, axis=1)) 
    294304        weights = rho_k * rho[k+1:] 
    295         index = np.minimum(np.asarray(distances/r[0], 'i'), n_max) 
     305        index = np.minimum(np.asarray(distances/dr, 'i'), n_max) 
    296306        # Note: indices may be duplicated, so "Pr[index] += w" will not work!! 
    297307        extended_Pr += np.bincount(index, weights, n_max+1) 
     
    305315    # we are only accumulating the upper triangular distances. 
    306316    #Pr = Pr * 2 / len(rho)**2 
    307     return Pr / Pr.max() 
     317    return Pr 
    308318 
    309319    # Can get an additional 2x by going to C.  Cuda/OpenCL will allow even 
     
    327337} 
    328338""" 
     339 
     340if NUMBA: 
     341    @njit("f8[:](f8[:], f8[:], f8[:,:])") 
     342    def _calc_Pr_uniform_jit(r, rho, points): 
     343        dr = r[0] 
     344        n_max = len(r) 
     345        Pr = np.zeros_like(r) 
     346        for j in range(len(rho) - 1): 
     347            x, y, z = points[j, 0], points[j, 1], points[j, 2] 
     348            for k in range(j+1, len(rho)): 
     349                distance = sqrt((x - points[k, 0])**2 
     350                                + (y - points[k, 1])**2 
     351                                + (z - points[k, 2])**2) 
     352                index = int(distance/dr) 
     353                if index < n_max: 
     354                    Pr[index] += rho[j] * rho[k] 
     355        # Make Pr independent of sampling density.  The factor of 2 comes because 
     356        # we are only accumulating the upper triangular distances. 
     357        #Pr = Pr * 2 / len(rho)**2 
     358        return Pr 
     359 
     360 
     361def calc_Pr(r, rho, points): 
     362    # P(r) with uniform steps in r is 3x faster; check if we are uniform 
     363    # before continuing 
     364    if np.max(np.abs(np.diff(r) - r[0])) > r[0]*0.01: 
     365        Pr = _calc_Pr_nonuniform(r, rho, points) 
     366    else: 
     367        if NUMBA: 
     368            Pr = _calc_Pr_uniform_jit(r, rho, points) 
     369        else: 
     370            Pr = _calc_Pr_uniform(r, rho, points) 
     371    return Pr / Pr.max() 
     372 
    329373 
    330374def j0(x): 
     
    404448    q = np.logspace(-3, 0, 200) 
    405449    r = shape.r_bins(q, r_step=0.01) 
    406     sampling_density = 50000 / shape.volume() 
     450    sampling_density = 6*5000 / shape.volume() 
    407451    rho, points = shape.sample(sampling_density) 
     452    t0 = time.time() 
    408453    Pr = calc_Pr(r, rho-rho_solvent, points) 
     454    print("calc Pr time", time.time() - t0) 
    409455    Iq = calc_Iq(q, r, Pr) 
    410456    theory = (q, fn(q)) if fn is not None else None 
     
    586632    def fn(q): 
    587633        return csbox_Iq(q, a, b, c, da, db, dc, slda, sldb, sldc, sld_core) 
    588     check_shape(shape, fn) 
     634    #check_shape(shape, fn) 
    589635 
    590636    view = (20, 30, 40) 
     
    592638        return csbox_Iqxy(qx, qy, a, b, c, da, db, dc, 
    593639                          slda, sldb, sldc, sld_core, view=view) 
    594     #check_shape_2d(shape, fn_xy, view=view) 
     640    check_shape_2d(shape, fn_xy, view=view) 
    595641 
    596642if __name__ == "__main__": 
Note: See TracChangeset for help on using the changeset viewer.