Changeset 4f1f876 in sasmodels


Ignore:
Timestamp:
Jul 28, 2016 6:39:28 PM (8 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
58210db
Parents:
0f00d95
Message:

Intel GPU wants data vectors to follow cache alignment

Location:
sasmodels
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/details.py

    r9eb3632 r4f1f876  
    176176    if all(len(w)==1 for w in weights): 
    177177        call_details = mono_details(kernel.info) 
    178         data = np.array(scalars+scalars+[1]*len(scalars), dtype=kernel.dtype) 
     178        # Pad value array to a 32 value boundary 
     179        data_len = 3*len(scalars) 
     180        extra = ((data_len+31)//32)*32 - data_len 
     181        data = np.array(scalars+scalars+[1.]*len(scalars)+[0.]*extra, dtype=kernel.dtype) 
    179182    else: 
    180183        call_details = poly_details(kernel.info, weights) 
    181         data = np.hstack(scalars+list(values)+list(weights)).astype(kernel.dtype) 
     184        # Pad value array to a 32 value boundary 
     185        data_len = len(scalars) + 2*sum(len(v) for v in values) 
     186        extra = ((data_len+31)//32)*32 - data_len 
     187        data = np.hstack(scalars+list(values)+list(weights)+[0.]*extra).astype(kernel.dtype) 
    182188    is_magnetic = convert_magnetism(kernel.info.parameters, data) 
    183189    #call_details.show() 
  • sasmodels/kernel_iq.cl

    r0f00d95 r4f1f876  
    2828} ProblemDetails; 
    2929 
     30// Intel HD 4000 needs private arrays to be a multiple of 4 long 
    3031typedef struct { 
    3132    PARAMETER_TABLE 
     33} ParameterTable; 
     34typedef union { 
     35    ParameterTable table; 
     36    double vector[4*((NUM_PARS+3)/4)]; 
    3237} ParameterBlock; 
    3338#endif // _PAR_BLOCK_ 
     
    8792  // walk the polydispersity cube.  local_values will be aliased to pvec. 
    8893  ParameterBlock local_values; 
    89   double *pvec = (double *)&local_values; 
    9094 
    9195  // Fill in the initial variables 
    9296  for (int i=0; i < NUM_PARS; i++) { 
    93     pvec[i] = values[2+i]; 
    94 //if (q_index==0) printf("p%d = %g\n",i, pvec[i]); 
     97    local_values.vector[i] = values[2+i]; 
     98//if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]); 
    9599  } 
    96100 
    97101#if defined(MAGNETIC) && NUM_MAGNETIC>0 
    98   // Location of the sld parameters in the parameter pvec. 
     102  // Location of the sld parameters in the parameter vector. 
    99103  // These parameters are updated with the effective sld due to magnetism. 
    100104  #if NUM_MAGNETIC > 3 
     
    183187  const double weight5 = 1.0; 
    184188  while (i4 < n4) { 
    185     pvec[p4] = v4[i4]; 
     189    local_values.vector[p4] = v4[i4]; 
    186190    double weight4 = w4[i4] * weight5; 
    187 //if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, pvec[p4], weight4); 
     191//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, local_values.vector[p4], weight4); 
    188192#elif MAX_PD>3 
    189193    const double weight4 = 1.0; 
     
    191195#if MAX_PD>3 
    192196  while (i3 < n3) { 
    193     pvec[p3] = v3[i3]; 
     197    local_values.vector[p3] = v3[i3]; 
    194198    double weight3 = w3[i3] * weight4; 
    195 //if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, pvec[p3], weight3); 
     199//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, local_values.vector[p3], weight3); 
    196200#elif MAX_PD>2 
    197201    const double weight3 = 1.0; 
     
    199203#if MAX_PD>2 
    200204  while (i2 < n2) { 
    201     pvec[p2] = v2[i2]; 
     205    local_values.vector[p2] = v2[i2]; 
    202206    double weight2 = w2[i2] * weight3; 
    203 //if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, pvec[p2], weight2); 
     207//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, local_values.vector[p2], weight2); 
    204208#elif MAX_PD>1 
    205209    const double weight2 = 1.0; 
     
    207211#if MAX_PD>1 
    208212  while (i1 < n1) { 
    209     pvec[p1] = v1[i1]; 
     213    local_values.vector[p1] = v1[i1]; 
    210214    double weight1 = w1[i1] * weight2; 
    211 //if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, pvec[p1], weight1); 
     215//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, local_values.vector[p1], weight1); 
    212216#elif MAX_PD>0 
    213217    const double weight1 = 1.0; 
     
    215219#if MAX_PD>0 
    216220  if (slow_theta) { // Theta is not in inner loop 
    217     spherical_correction = fmax(fabs(cos(M_PI_180*pvec[theta_par])), 1.e-6); 
     221    spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[theta_par])), 1.e-6); 
    218222  } 
    219223  while(i0 < n0) { 
    220     pvec[p0] = v0[i0]; 
     224    local_values.vector[p0] = v0[i0]; 
    221225    double weight0 = w0[i0] * weight1; 
    222 //if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, pvec[p0], weight0); 
     226//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, local_values.vector[p0], weight0); 
    223227    if (fast_theta) { // Theta is in inner loop 
    224       spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0])), 1.e-6); 
     228      spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[p0])), 1.e-6); 
    225229    } 
    226230#else 
     
    228232#endif 
    229233 
    230 //if (q_index == 0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, pvec[i]); printf("\n"); } 
     234//if (q_index == 0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n"); } 
    231235//if (q_index == 0) printf("sphcor: %g\n", spherical_correction); 
    232236 
    233237    #ifdef INVALID 
    234     if (!INVALID(local_values)) 
     238    if (!INVALID(local_values.table)) 
    235239    #endif 
    236240    { 
     
    241245        // would be problems looking at models with theta=90. 
    242246        const double weight = weight0 * spherical_correction; 
    243         pd_norm += weight * CALL_VOLUME(local_values); 
     247        pd_norm += weight * CALL_VOLUME(local_values.table); 
    244248 
    245249#if defined(MAGNETIC) && NUM_MAGNETIC > 0 
     
    267271                #define M3 NUM_PARS+13 
    268272                #define SLD(_M_offset, _sld_offset) \ 
    269                     pvec[_sld_offset] = xs * (axis \ 
     273                    local_values.vector[_sld_offset] = xs * (axis \ 
    270274                    ? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \ 
    271275                    : mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \ 
     
    285289                } 
    286290                #endif 
    287                 scattering += CALL_IQ(q, q_index, local_values); 
     291                scattering += CALL_IQ(q, q_index, local_values.table); 
    288292              } 
    289293            } 
     
    291295        } 
    292296#else  // !MAGNETIC 
    293         const double scattering = CALL_IQ(q, q_index, local_values); 
     297        const double scattering = CALL_IQ(q, q_index, local_values.table); 
    294298#endif // !MAGNETIC 
    295299        this_result += weight * scattering; 
Note: See TracChangeset for help on using the changeset viewer.