Changeset 07646b6 in sasmodels for sasmodels/kernel_iq.c


Ignore:
Timestamp:
Oct 25, 2018 1:43:01 PM (6 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
149eb53
Parents:
31fc4ad (diff), d5ce7fa (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
git-author:
Paul Kienzle <pkienzle@…> (10/25/18 12:41:48)
git-committer:
Paul Kienzle <pkienzle@…> (10/25/18 13:43:01)
Message:

Merge branch 'cuda-test' into beta_approx

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernel_iq.c

    re44432d r07646b6  
    277277kernel 
    278278void KERNEL_NAME( 
    279     int32_t nq,                 // number of q values 
    280     const int32_t pd_start,     // where we are in the dispersity loop 
    281     const int32_t pd_stop,      // where we are stopping in the dispersity loop 
    282     global const ProblemDetails *details, 
    283     global const double *values, 
    284     global const double *q, // nq q values, with padding to boundary 
    285     global double *result,  // nq+1 return values, again with padding 
    286     const double cutoff,     // cutoff in the dispersity weight product 
     279    int32_t nq,                   // number of q values 
     280    const int32_t pd_start,       // where we are in the dispersity loop 
     281    const int32_t pd_stop,        // where we are stopping in the dispersity loop 
     282    pglobal const ProblemDetails *details, 
     283    pglobal const double *values, // parameter values and distributions 
     284    pglobal const double *q,      // nq q values, with padding to boundary 
     285    pglobal double *result,       // nq+1 return values, again with padding 
     286    const double cutoff,          // cutoff in the dispersity weight product 
    287287    int32_t effective_radius_type // which effective radius to compute 
    288288    ) 
    289289{ 
    290 #ifdef USE_OPENCL 
     290#if defined(USE_GPU) 
    291291  // who we are and what element we are working with 
     292  #if defined(USE_OPENCL) 
    292293  const int q_index = get_global_id(0); 
     294  #else // USE_CUDA 
     295  const int q_index = threadIdx.x + blockIdx.x * blockDim.x; 
     296  #endif 
    293297  if (q_index >= nq) return; 
    294298#else 
     
    341345  // 
    342346  // The code differs slightly between opencl and dll since opencl is only 
    343   // seeing one q value (stored in the variable "this_result") while the dll 
     347  // seeing one q value (stored in the variable "this_F2") while the dll 
    344348  // version must loop over all q. 
    345   #ifdef USE_OPENCL 
     349  #if defined(CALL_FQ) 
     350    double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]); 
     351    double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]); 
     352    double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]); 
     353    double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]); 
     354  #else 
     355    double weight_norm = (pd_start == 0 ? 0.0 : result[nq]); 
     356    double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]); 
     357    double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]); 
     358    double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]); 
     359  #endif 
     360  #if defined(USE_GPU) 
    346361    #if defined(CALL_FQ) 
    347       double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]); 
    348       double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]); 
    349       double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]); 
    350       double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]); 
    351362      double this_F2 = (pd_start == 0 ? 0.0 : result[2*q_index+0]); 
    352363      double this_F1 = (pd_start == 0 ? 0.0 : result[2*q_index+1]); 
    353364    #else 
    354       double weight_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    355       double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]); 
    356       double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]); 
    357       double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]); 
    358       double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 
     365      double this_F2 = (pd_start == 0 ? 0.0 : result[q_index]); 
    359366    #endif 
    360   #else // !USE_OPENCL 
    361     #if defined(CALL_FQ) 
    362       double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]); 
    363       double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]); 
    364       double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]); 
    365       double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]); 
    366     #else 
    367       double weight_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    368       double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]); 
    369       double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]); 
    370       double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]); 
    371     #endif 
     367  #else // !USE_GPU 
    372368    if (pd_start == 0) { 
    373369      #ifdef USE_OPENMP 
     
    381377      #endif 
    382378    } 
    383     //if (q_index==0) printf("start %d %g %g\n", pd_start, weighted_shell, result[0]); 
    384 #endif // !USE_OPENCL 
     379    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
     380#endif // !USE_GPU 
    385381 
    386382 
     
    405401  const int n4 = pd_length[4]; 
    406402  const int p4 = pd_par[4]; 
    407   global const double *v4 = pd_value + pd_offset[4]; 
    408   global const double *w4 = pd_weight + pd_offset[4]; 
     403  pglobal const double *v4 = pd_value + pd_offset[4]; 
     404  pglobal const double *w4 = pd_weight + pd_offset[4]; 
    409405  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start 
    410406 
     
    441437          FETCH_Q         // set qx,qy from the q input vector 
    442438          APPLY_ROTATION  // convert qx,qy to qa,qb,qc 
    443           CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...) 
     439          CALL_KERNEL     // F2 = Iqxy(qa, qb, qc, p1, p2, ...) 
    444440 
    445441      ++step;  // increment counter representing position in dispersity mesh 
     
    613609  const int n##_LOOP = details->pd_length[_LOOP]; \ 
    614610  const int p##_LOOP = details->pd_par[_LOOP]; \ 
    615   global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 
    616   global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 
     611  pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 
     612  pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 
    617613  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 
    618614 
     
    638634// Pointers to the start of the dispersity and weight vectors, if needed. 
    639635#if MAX_PD>0 
    640   global const double *pd_value = values + NUM_VALUES; 
    641   global const double *pd_weight = pd_value + details->num_weights; 
     636  pglobal const double *pd_value = values + NUM_VALUES; 
     637  pglobal const double *pd_weight = pd_value + details->num_weights; 
    642638#endif 
    643639 
     
    706702      BUILD_ROTATION(); 
    707703 
    708 #ifndef USE_OPENCL 
     704#if !defined(USE_GPU) 
    709705      // DLL needs to explicitly loop over the q values. 
    710706      #ifdef USE_OPENMP 
     
    712708      #endif 
    713709      for (q_index=0; q_index<nq; q_index++) 
    714 #endif // !USE_OPENCL 
     710#endif // !USE_GPU 
    715711      { 
    716712 
     
    721717        #if defined(MAGNETIC) && NUM_MAGNETIC > 0 
    722718          // Compute the scattering from the magnetic cross sections. 
    723           double scattering = 0.0; 
     719          double F2 = 0.0; 
    724720          const double qsq = qx*qx + qy*qy; 
    725721          if (qsq > 1.e-16) { 
     
    746742//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz); 
    747743                } 
    748                 scattering += xs_weight * CALL_KERNEL(); 
     744                F2 += xs_weight * CALL_KERNEL(); 
    749745              } 
    750746            } 
     
    754750            CALL_KERNEL(); // sets F1 and F2 by reference 
    755751          #else 
    756             const double scattering = CALL_KERNEL(); 
     752            const double F2 = CALL_KERNEL(); 
    757753          #endif 
    758754        #endif // !MAGNETIC 
    759 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0); 
    760  
    761         #ifdef USE_OPENCL 
     755//printf("q_index:%d %g %g %g %g\n", q_index, F2, weight0); 
     756 
     757        #if defined(USE_GPU) 
    762758          #if defined(CALL_FQ) 
    763759            this_F2 += weight * F2; 
    764760            this_F1 += weight * F1; 
    765761          #else 
    766             this_result += weight * scattering; 
     762            this_F2 += weight * F2; 
    767763          #endif 
    768764        #else // !USE_OPENCL 
     
    771767            result[2*q_index+1] += weight * F1; 
    772768          #else 
    773             result[q_index] += weight * scattering; 
     769            result[q_index] += weight * F2; 
    774770          #endif 
    775771        #endif // !USE_OPENCL 
     
    795791#endif 
    796792 
    797 // Remember the current result and the updated norm. 
    798 #ifdef USE_OPENCL 
     793// Remember the results and the updated norm. 
     794#if defined(USE_GPU) 
    799795  #if defined(CALL_FQ) 
    800     result[2*q_index+0] = this_F2; 
    801     result[2*q_index+1] = this_F1; 
    802     if (q_index == 0) { 
    803       result[2*nq+0] = weight_norm; 
    804       result[2*nq+1] = weighted_form; 
    805       result[2*nq+3] = weighted_shell; 
    806       result[2*nq+3] = weighted_radius; 
    807     } 
     796  result[2*q_index+0] = this_F2; 
     797  result[2*q_index+1] = this_F1; 
    808798  #else 
    809     result[q_index] = this_result; 
    810     if (q_index == 0) { 
    811       result[nq+0] = weight_norm; 
    812       result[nq+1] = weighted_form; 
    813       result[nq+2] = weighted_shell; 
    814       result[nq+3] = weighted_radius; 
    815     } 
     799  result[q_index] = this_F2; 
    816800  #endif 
    817  
    818 //if (q_index == 0) printf("res: %g/%g\n", result[0], weighted_shell); 
    819 #else // !USE_OPENCL 
    820   #if defined(CALL_FQ) 
     801  if (q_index == 0) 
     802#endif 
     803  { 
     804#if defined(CALL_FQ) 
    821805    result[2*nq] = weight_norm; 
    822806    result[2*nq+1] = weighted_form; 
    823807    result[2*nq+2] = weighted_shell; 
    824808    result[2*nq+3] = weighted_radius; 
    825   #else 
     809#else 
    826810    result[nq] = weight_norm; 
    827811    result[nq+1] = weighted_form; 
    828812    result[nq+2] = weighted_shell; 
    829813    result[nq+3] = weighted_radius; 
    830   #endif 
    831 //printf("res: %g/%g\n", result[0], weighted_shell); 
    832 #endif // !USE_OPENCL 
     814#endif 
     815  } 
    833816 
    834817// ** clear the macros in preparation for the next kernel ** 
Note: See TracChangeset for help on using the changeset viewer.