Changeset 0db7dbd in sasmodels for sasmodels/kernel_iq.c


Ignore:
Timestamp:
Feb 16, 2018 7:10:04 PM (6 years ago)
Author:
pkienzle
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
47fb816
Parents:
aa90015
Message:

cuda support: allow cylinder model to run under CUDA as well as OpenCL

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernel_iq.c

    raadec17 r0db7dbd  
    6767 
    6868// Return value restricted between low and high 
     69__device__ 
    6970static double clip(double value, double low, double high) 
    7071{ 
     
    7980//     du * (m_sigma_y + 1j*m_sigma_z); 
    8081// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag 
     82__device__ 
    8183static void set_spin_weights(double in_spin, double out_spin, double spins[4]) 
    8284{ 
     
    9294 
    9395// Compute the magnetic sld 
     96__device__ 
    9497static double mag_sld( 
    9598  const unsigned int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag 
     
    140143// jitter angles (dtheta, dphi).  This matrix can be applied to all of the 
    141144// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]' 
     145__device__ 
    142146static void 
    143147qac_rotation( 
     
    173177// Apply the rotation matrix returned from qac_rotation to the point (qx,qy), 
    174178// returning R*[qx,qy]' = [qa,qc]' 
    175 static double 
     179__device__ 
     180static void 
    176181qac_apply( 
    177182    QACRotation *rotation, 
     
    200205// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the 
    201206// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]' 
     207__device__ 
    202208static void 
    203209qabc_rotation( 
     
    246252// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy), 
    247253// returning R*[qx,qy]' = [qa,qb,qc]' 
    248 static double 
     254__device__ 
     255static void 
    249256qabc_apply( 
    250257    QABCRotation *rotation, 
     
    267274    const int32_t pd_start,     // where we are in the dispersity loop 
    268275    const int32_t pd_stop,      // where we are stopping in the dispersity loop 
    269     global const ProblemDetails *details, 
    270     global const double *values, 
    271     global const double *q, // nq q values, with padding to boundary 
    272     global double *result,  // nq+1 return values, again with padding 
     276    global_par const ProblemDetails *details, 
     277    global_par const double *values, 
     278    global_par const double *q, // nq q values, with padding to boundary 
     279    global_par double *result,  // nq+1 return values, again with padding 
    273280    const double cutoff     // cutoff in the dispersity weight product 
    274281    ) 
    275282{ 
    276 #ifdef USE_OPENCL 
     283#if defined(USE_GPU) 
    277284  // who we are and what element we are working with 
     285  #if defined(USE_OPENCL) 
    278286  const int q_index = get_global_id(0); 
     287  #else // USE_CUDA 
     288  const int q_index = threadIdx.x + blockIdx.x * blockDim.x; 
     289  #endif 
    279290  if (q_index >= nq) return; 
    280291#else 
     
    329340  // seeing one q value (stored in the variable "this_result") while the dll 
    330341  // version must loop over all q. 
    331   #ifdef USE_OPENCL 
     342  #if defined(USE_GPU) 
    332343    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    333344    double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 
    334   #else // !USE_OPENCL 
     345  #else // !USE_GPU 
    335346    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    336347    if (pd_start == 0) { 
     
    341352    } 
    342353    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
    343 #endif // !USE_OPENCL 
     354#endif // !USE_GPU 
    344355 
    345356 
     
    364375  const int n4 = pd_length[4]; 
    365376  const int p4 = pd_par[4]; 
    366   global const double *v4 = pd_value + pd_offset[4]; 
    367   global const double *w4 = pd_weight + pd_offset[4]; 
     377  global_var const double *v4 = pd_value + pd_offset[4]; 
     378  global_var const double *w4 = pd_weight + pd_offset[4]; 
    368379  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start 
    369380 
     
    551562  const int n##_LOOP = details->pd_length[_LOOP]; \ 
    552563  const int p##_LOOP = details->pd_par[_LOOP]; \ 
    553   global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 
    554   global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 
     564  global_var const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 
     565  global_var const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 
    555566  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 
    556567 
     
    576587// Pointers to the start of the dispersity and weight vectors, if needed. 
    577588#if MAX_PD>0 
    578   global const double *pd_value = values + NUM_VALUES; 
    579   global const double *pd_weight = pd_value + details->num_weights; 
     589  global_var const double *pd_value = values + NUM_VALUES; 
     590  global_var const double *pd_weight = pd_value + details->num_weights; 
    580591#endif 
    581592 
     
    637648      BUILD_ROTATION(); 
    638649 
    639 #ifndef USE_OPENCL 
     650#if !defined(USE_GPU) 
    640651      // DLL needs to explicitly loop over the q values. 
    641652      #ifdef USE_OPENMP 
     
    643654      #endif 
    644655      for (q_index=0; q_index<nq; q_index++) 
    645 #endif // !USE_OPENCL 
     656#endif // !USE_GPU 
    646657      { 
    647658 
     
    684695//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0); 
    685696 
    686         #ifdef USE_OPENCL 
     697        #if defined(USE_GPU) 
    687698          this_result += weight * scattering; 
    688         #else // !USE_OPENCL 
     699        #else // !USE_GPU 
    689700          result[q_index] += weight * scattering; 
    690         #endif // !USE_OPENCL 
     701        #endif // !USE_GPU 
    691702      } 
    692703    } 
     
    712723 
    713724// Remember the current result and the updated norm. 
    714 #ifdef USE_OPENCL 
     725#if defined(USE_GPU) 
    715726  result[q_index] = this_result; 
    716727  if (q_index == 0) result[nq] = pd_norm; 
    717728//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 
    718 #else // !USE_OPENCL 
     729#else // !USE_GPU 
    719730  result[nq] = pd_norm; 
    720731//printf("res: %g/%g\n", result[0], pd_norm); 
    721 #endif // !USE_OPENCL 
     732#endif // !USE_GPU 
    722733 
    723734// ** clear the macros in preparation for the next kernel ** 
Note: See TracChangeset for help on using the changeset viewer.