Changeset 508475a in sasmodels for sasmodels/kernel_iq.c


Ignore:
Timestamp:
Oct 25, 2018 2:50:35 PM (6 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
c11d09f
Parents:
8b31efa (diff), 2a12d8d8 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
git-author:
Paul Kienzle <pkienzle@…> (10/25/18 14:20:45)
git-committer:
Paul Kienzle <pkienzle@…> (10/25/18 14:50:35)
Message:

Merge branch 'ticket-1015-gpu-mem-error' into cuda-test

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernel_iq.c

    r70530778 r74e9b5f  
    278278    const int32_t pd_start,     // where we are in the dispersity loop 
    279279    const int32_t pd_stop,      // where we are stopping in the dispersity loop 
    280     global const ProblemDetails *details, 
    281     global const double *values, 
    282     global const double *q, // nq q values, with padding to boundary 
    283     global double *result,  // nq+1 return values, again with padding 
     280    pglobal const ProblemDetails *details, 
     281    pglobal const double *values, 
     282    pglobal const double *q, // nq q values, with padding to boundary 
     283    pglobal double *result,  // nq+1 return values, again with padding 
    284284    const double cutoff     // cutoff in the dispersity weight product 
    285285    ) 
    286286{ 
    287 #ifdef USE_OPENCL 
     287#if defined(USE_GPU) 
    288288  // who we are and what element we are working with 
     289  #if defined(USE_OPENCL) 
    289290  const int q_index = get_global_id(0); 
     291  #else // USE_CUDA 
     292  const int q_index = threadIdx.x + blockIdx.x * blockDim.x; 
     293  #endif 
    290294  if (q_index >= nq) return; 
    291295#else 
     
    340344  // seeing one q value (stored in the variable "this_result") while the dll 
    341345  // version must loop over all q. 
    342   #ifdef USE_OPENCL 
     346  #if defined(USE_GPU) 
    343347    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    344348    double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 
    345   #else // !USE_OPENCL 
     349  #else // !USE_GPU 
    346350    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    347351    if (pd_start == 0) { 
     
    352356    } 
    353357    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
    354 #endif // !USE_OPENCL 
     358#endif // !USE_GPU 
    355359 
    356360 
     
    375379  const int n4 = pd_length[4]; 
    376380  const int p4 = pd_par[4]; 
    377   global const double *v4 = pd_value + pd_offset[4]; 
    378   global const double *w4 = pd_weight + pd_offset[4]; 
     381  pglobal const double *v4 = pd_value + pd_offset[4]; 
     382  pglobal const double *w4 = pd_weight + pd_offset[4]; 
    379383  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start 
    380384 
     
    562566  const int n##_LOOP = details->pd_length[_LOOP]; \ 
    563567  const int p##_LOOP = details->pd_par[_LOOP]; \ 
    564   global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 
    565   global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 
     568  pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 
     569  pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 
    566570  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 
    567571 
     
    587591// Pointers to the start of the dispersity and weight vectors, if needed. 
    588592#if MAX_PD>0 
    589   global const double *pd_value = values + NUM_VALUES; 
    590   global const double *pd_weight = pd_value + details->num_weights; 
     593  pglobal const double *pd_value = values + NUM_VALUES; 
     594  pglobal const double *pd_weight = pd_value + details->num_weights; 
    591595#endif 
    592596 
     
    648652      BUILD_ROTATION(); 
    649653 
    650 #ifndef USE_OPENCL 
     654#if !defined(USE_GPU) 
    651655      // DLL needs to explicitly loop over the q values. 
    652656      #ifdef USE_OPENMP 
     
    654658      #endif 
    655659      for (q_index=0; q_index<nq; q_index++) 
    656 #endif // !USE_OPENCL 
     660#endif // !USE_GPU 
    657661      { 
    658662 
     
    697701//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0); 
    698702 
    699         #ifdef USE_OPENCL 
     703        #if defined(USE_GPU) 
    700704          this_result += weight * scattering; 
    701         #else // !USE_OPENCL 
     705        #else // !USE_GPU 
    702706          result[q_index] += weight * scattering; 
    703         #endif // !USE_OPENCL 
     707        #endif // !USE_GPU 
    704708      } 
    705709    } 
     
    725729 
    726730// Remember the current result and the updated norm. 
    727 #ifdef USE_OPENCL 
     731#if defined(USE_GPU) 
    728732  result[q_index] = this_result; 
    729733  if (q_index == 0) result[nq] = pd_norm; 
    730734//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 
    731 #else // !USE_OPENCL 
     735#else // !USE_GPU 
    732736  result[nq] = pd_norm; 
    733737//printf("res: %g/%g\n", result[0], pd_norm); 
    734 #endif // !USE_OPENCL 
     738#endif // !USE_GPU 
    735739 
    736740// ** clear the macros in preparation for the next kernel ** 
Note: See TracChangeset for help on using the changeset viewer.