Changeset 149eb53 in sasmodels for sasmodels/kernel_iq.c


Ignore:
Timestamp:
Oct 25, 2018 1:43:27 PM (6 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
6d5601c
Parents:
07646b6 (diff), 508475a (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'cuda-test' into beta_approx

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernel_iq.c

    r74e9b5f r07646b6  
    2626//  MAGNETIC_PARS : a comma-separated list of indices to the sld 
    2727//      parameters in the parameter table. 
    28 //  CALL_VOLUME(table) : call the form volume function 
     28//  CALL_VOLUME(form, shell, table) : assign form and shell values 
     29//  CALL_EFFECTIVE_RADIUS(type, table) : call the R_eff function 
    2930//  CALL_IQ(q, table) : call the Iq function for 1D calcs. 
    3031//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data. 
     32//  CALL_FQ(q, F1, F2, table) : call the Fq function for 1D calcs. 
     33//  CALL_FQ_A(q, F1, F2, table) : call the Iq function with |q| for 2D data. 
    3134//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes 
    3235//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes 
     
    3639//  PROJECTION : equirectangular=1, sinusoidal=2 
    3740//      see explore/jitter.py for definitions. 
     41 
    3842 
    3943#ifndef _PAR_BLOCK_ // protected block so we can include this code twice. 
     
    270274#endif // _QABC_SECTION 
    271275 
    272  
    273276// ==================== KERNEL CODE ======================== 
    274  
    275277kernel 
    276278void KERNEL_NAME( 
    277     int32_t nq,                 // number of q values 
    278     const int32_t pd_start,     // where we are in the dispersity loop 
    279     const int32_t pd_stop,      // where we are stopping in the dispersity loop 
     279    int32_t nq,                   // number of q values 
     280    const int32_t pd_start,       // where we are in the dispersity loop 
     281    const int32_t pd_stop,        // where we are stopping in the dispersity loop 
    280282    pglobal const ProblemDetails *details, 
    281     pglobal const double *values, 
    282     pglobal const double *q, // nq q values, with padding to boundary 
    283     pglobal double *result,  // nq+1 return values, again with padding 
    284     const double cutoff     // cutoff in the dispersity weight product 
     283    pglobal const double *values, // parameter values and distributions 
     284    pglobal const double *q,      // nq q values, with padding to boundary 
     285    pglobal double *result,       // nq+1 return values, again with padding 
     286    const double cutoff,          // cutoff in the dispersity weight product 
     287    int32_t effective_radius_type // which effective radius to compute 
    285288    ) 
    286289{ 
     
    342345  // 
    343346  // The code differs slightly between opencl and dll since opencl is only 
    344   // seeing one q value (stored in the variable "this_result") while the dll 
     347  // seeing one q value (stored in the variable "this_F2") while the dll 
    345348  // version must loop over all q. 
     349  #if defined(CALL_FQ) 
     350    double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]); 
     351    double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]); 
     352    double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]); 
     353    double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]); 
     354  #else 
     355    double weight_norm = (pd_start == 0 ? 0.0 : result[nq]); 
     356    double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]); 
     357    double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]); 
     358    double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]); 
     359  #endif 
    346360  #if defined(USE_GPU) 
    347     double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    348     double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 
     361    #if defined(CALL_FQ) 
     362      double this_F2 = (pd_start == 0 ? 0.0 : result[2*q_index+0]); 
     363      double this_F1 = (pd_start == 0 ? 0.0 : result[2*q_index+1]); 
     364    #else 
     365      double this_F2 = (pd_start == 0 ? 0.0 : result[q_index]); 
     366    #endif 
    349367  #else // !USE_GPU 
    350     double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    351368    if (pd_start == 0) { 
    352369      #ifdef USE_OPENMP 
    353370      #pragma omp parallel for 
    354371      #endif 
    355       for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 
     372      #if defined(CALL_FQ) 
     373          // 2*nq for F^2,F pairs 
     374          for (int q_index=0; q_index < 2*nq; q_index++) result[q_index] = 0.0; 
     375      #else 
     376          for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 
     377      #endif 
    356378    } 
    357379    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
     
    415437          FETCH_Q         // set qx,qy from the q input vector 
    416438          APPLY_ROTATION  // convert qx,qy to qa,qb,qc 
    417           CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...) 
     439          CALL_KERNEL     // F2 = Iqxy(qa, qb, qc, p1, p2, ...) 
    418440 
    419441      ++step;  // increment counter representing position in dispersity mesh 
     
    446468// inner loop and defines the macros that use them. 
    447469 
    448 #if defined(CALL_IQ) 
    449   // unoriented 1D 
     470 
     471#if defined(CALL_FQ) // COMPUTE_F1_F2 is true 
     472  // unoriented 1D returning <F> and <F^2> 
     473  double qk; 
     474  double F1, F2; 
     475  #define FETCH_Q() do { qk = q[q_index]; } while (0) 
     476  #define BUILD_ROTATION() do {} while(0) 
     477  #define APPLY_ROTATION() do {} while(0) 
     478  #define CALL_KERNEL() CALL_FQ(qk,F1,F2,local_values.table) 
     479 
     480#elif defined(CALL_FQ_A) 
     481  // unoriented 2D return <F> and <F^2> 
     482  double qx, qy; 
     483  double F1, F2; 
     484  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 
     485  #define BUILD_ROTATION() do {} while(0) 
     486  #define APPLY_ROTATION() do {} while(0) 
     487  #define CALL_KERNEL() CALL_FQ_A(sqrt(qx*qx+qy*qy),F1,F2,local_values.table) 
     488 
     489#elif defined(CALL_IQ) 
     490  // unoriented 1D return <F^2> 
    450491  double qk; 
    451492  #define FETCH_Q() do { qk = q[q_index]; } while (0) 
    452493  #define BUILD_ROTATION() do {} while(0) 
    453494  #define APPLY_ROTATION() do {} while(0) 
    454   #define CALL_KERNEL() CALL_IQ(qk, local_values.table) 
     495  #define CALL_KERNEL() CALL_IQ(qk,local_values.table) 
    455496 
    456497#elif defined(CALL_IQ_A) 
     
    486527  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc) 
    487528  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table) 
     529 
    488530#elif defined(CALL_IQ_XY) 
    489531  // direct call to qx,qy calculator 
     
    499541// logic in the IQ_AC and IQ_ABC branches.  This will become more important 
    500542// if we implement more projections, or more complicated projections. 
    501 #if defined(CALL_IQ) || defined(CALL_IQ_A)  // no orientation 
     543#if defined(CALL_IQ) || defined(CALL_IQ_A) || defined(CALL_FQ) || defined(CALL_FQ_A) 
     544  // no orientation 
    502545  #define APPLY_PROJECTION() const double weight=weight0 
    503546#elif defined(CALL_IQ_XY) // pass orientation to the model 
     
    649692    // Note: weight==0 must always be excluded 
    650693    if (weight > cutoff) { 
    651       pd_norm += weight * CALL_VOLUME(local_values.table); 
     694      double form, shell; 
     695      CALL_VOLUME(form, shell, local_values.table); 
     696      weight_norm += weight; 
     697      weighted_form += weight * form; 
     698      weighted_shell += weight * shell; 
     699      if (effective_radius_type != 0) { 
     700        weighted_radius += weight * CALL_EFFECTIVE_RADIUS(effective_radius_type, local_values.table); 
     701      } 
    652702      BUILD_ROTATION(); 
    653703 
     
    667717        #if defined(MAGNETIC) && NUM_MAGNETIC > 0 
    668718          // Compute the scattering from the magnetic cross sections. 
    669           double scattering = 0.0; 
     719          double F2 = 0.0; 
    670720          const double qsq = qx*qx + qy*qy; 
    671721          if (qsq > 1.e-16) { 
     
    692742//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz); 
    693743                } 
    694                 scattering += xs_weight * CALL_KERNEL(); 
     744                F2 += xs_weight * CALL_KERNEL(); 
    695745              } 
    696746            } 
    697747          } 
    698748        #else  // !MAGNETIC 
    699           const double scattering = CALL_KERNEL(); 
     749          #if defined(CALL_FQ) 
     750            CALL_KERNEL(); // sets F1 and F2 by reference 
     751          #else 
     752            const double F2 = CALL_KERNEL(); 
     753          #endif 
    700754        #endif // !MAGNETIC 
    701 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0); 
     755//printf("q_index:%d %g %g %g %g\n", q_index, F2, weight0); 
    702756 
    703757        #if defined(USE_GPU) 
    704           this_result += weight * scattering; 
    705         #else // !USE_GPU 
    706           result[q_index] += weight * scattering; 
    707         #endif // !USE_GPU 
     758          #if defined(CALL_FQ) 
     759            this_F2 += weight * F2; 
     760            this_F1 += weight * F1; 
     761          #else 
     762            this_F2 += weight * F2; 
     763          #endif 
     764        #else // !USE_OPENCL 
     765          #if defined(CALL_FQ) 
     766            result[2*q_index+0] += weight * F2; 
     767            result[2*q_index+1] += weight * F1; 
     768          #else 
     769            result[q_index] += weight * F2; 
     770          #endif 
     771        #endif // !USE_OPENCL 
    708772      } 
    709773    } 
    710774  } 
    711  
    712775// close nested loops 
    713776++step; 
     
    728791#endif 
    729792 
    730 // Remember the current result and the updated norm. 
     793// Remember the results and the updated norm. 
    731794#if defined(USE_GPU) 
    732   result[q_index] = this_result; 
    733   if (q_index == 0) result[nq] = pd_norm; 
    734 //if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 
    735 #else // !USE_GPU 
    736   result[nq] = pd_norm; 
    737 //printf("res: %g/%g\n", result[0], pd_norm); 
    738 #endif // !USE_GPU 
     795  #if defined(CALL_FQ) 
     796  result[2*q_index+0] = this_F2; 
     797  result[2*q_index+1] = this_F1; 
     798  #else 
     799  result[q_index] = this_F2; 
     800  #endif 
     801  if (q_index == 0) 
     802#endif 
     803  { 
     804#if defined(CALL_FQ) 
     805    result[2*nq] = weight_norm; 
     806    result[2*nq+1] = weighted_form; 
     807    result[2*nq+2] = weighted_shell; 
     808    result[2*nq+3] = weighted_radius; 
     809#else 
     810    result[nq] = weight_norm; 
     811    result[nq+1] = weighted_form; 
     812    result[nq+2] = weighted_shell; 
     813    result[nq+3] = weighted_radius; 
     814#endif 
     815  } 
    739816 
    740817// ** clear the macros in preparation for the next kernel ** 
Note: See TracChangeset for help on using the changeset viewer.