Changeset 0db7dbd in sasmodels for sasmodels/kernel_iq.c
- Timestamp:
- Feb 16, 2018 7:10:04 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 47fb816
- Parents:
- aa90015
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel_iq.c
raadec17 r0db7dbd 67 67 68 68 // Return value restricted between low and high 69 __device__ 69 70 static double clip(double value, double low, double high) 70 71 { … … 79 80 // du * (m_sigma_y + 1j*m_sigma_z); 80 81 // weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag 82 __device__ 81 83 static void set_spin_weights(double in_spin, double out_spin, double spins[4]) 82 84 { … … 92 94 93 95 // Compute the magnetic sld 96 __device__ 94 97 static double mag_sld( 95 98 const unsigned int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag … … 140 143 // jitter angles (dtheta, dphi). This matrix can be applied to all of the 141 144 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]' 145 __device__ 142 146 static void 143 147 qac_rotation( … … 173 177 // Apply the rotation matrix returned from qac_rotation to the point (qx,qy), 174 178 // returning R*[qx,qy]' = [qa,qc]' 175 static double 179 __device__ 180 static void 176 181 qac_apply( 177 182 QACRotation *rotation, … … 200 205 // jitter angles (dtheta, dphi, dpsi). This matrix can be applied to all of the 201 206 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]' 207 __device__ 202 208 static void 203 209 qabc_rotation( … … 246 252 // Apply the rotation matrix returned from qabc_rotation to the point (qx,qy), 247 253 // returning R*[qx,qy]' = [qa,qb,qc]' 248 static double 254 __device__ 255 static void 249 256 qabc_apply( 250 257 QABCRotation *rotation, … … 267 274 const int32_t pd_start, // where we are in the dispersity loop 268 275 const int32_t pd_stop, // where we are stopping in the dispersity loop 269 global const ProblemDetails *details,270 global const double *values,271 global const double *q, // nq q values, with padding to boundary272 global double *result, // nq+1 return values, again with padding276 global_par const ProblemDetails *details, 277 global_par const double *values, 278 global_par const double *q, // nq q values, with padding to boundary 279 global_par double *result, // nq+1 return values, again with padding 273 280 const double cutoff // cutoff in the dispersity weight product 274 281 ) 275 282 { 276 #if def USE_OPENCL283 #if defined(USE_GPU) 277 284 // who we are and what element we are working with 285 #if defined(USE_OPENCL) 278 286 const int q_index = get_global_id(0); 287 #else // USE_CUDA 288 const int q_index = threadIdx.x + blockIdx.x * blockDim.x; 289 #endif 279 290 if (q_index >= nq) return; 280 291 #else … … 329 340 // seeing one q value (stored in the variable "this_result") while the dll 330 341 // version must loop over all q. 331 #if def USE_OPENCL342 #if defined(USE_GPU) 332 343 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 333 344 double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 334 #else // !USE_ OPENCL345 #else // !USE_GPU 335 346 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 336 347 if (pd_start == 0) { … … 341 352 } 342 353 //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 343 #endif // !USE_ OPENCL354 #endif // !USE_GPU 344 355 345 356 … … 364 375 const int n4 = pd_length[4]; 365 376 const int p4 = pd_par[4]; 366 global const double *v4 = pd_value + pd_offset[4];367 global const double *w4 = pd_weight + pd_offset[4];377 global_var const double *v4 = pd_value + pd_offset[4]; 378 global_var const double *w4 = pd_weight + pd_offset[4]; 368 379 int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start 369 380 … … 551 562 const int n##_LOOP = details->pd_length[_LOOP]; \ 552 563 const int p##_LOOP = details->pd_par[_LOOP]; \ 553 global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \554 global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \564 global_var const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 565 global_var const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 555 566 int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 556 567 … … 576 587 // Pointers to the start of the dispersity and weight vectors, if needed. 577 588 #if MAX_PD>0 578 global const double *pd_value = values + NUM_VALUES;579 global const double *pd_weight = pd_value + details->num_weights;589 global_var const double *pd_value = values + NUM_VALUES; 590 global_var const double *pd_weight = pd_value + details->num_weights; 580 591 #endif 581 592 … … 637 648 BUILD_ROTATION(); 638 649 639 #if ndef USE_OPENCL650 #if !defined(USE_GPU) 640 651 // DLL needs to explicitly loop over the q values. 641 652 #ifdef USE_OPENMP … … 643 654 #endif 644 655 for (q_index=0; q_index<nq; q_index++) 645 #endif // !USE_ OPENCL656 #endif // !USE_GPU 646 657 { 647 658 … … 684 695 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0); 685 696 686 #if def USE_OPENCL697 #if defined(USE_GPU) 687 698 this_result += weight * scattering; 688 #else // !USE_ OPENCL699 #else // !USE_GPU 689 700 result[q_index] += weight * scattering; 690 #endif // !USE_ OPENCL701 #endif // !USE_GPU 691 702 } 692 703 } … … 712 723 713 724 // Remember the current result and the updated norm. 714 #if def USE_OPENCL725 #if defined(USE_GPU) 715 726 result[q_index] = this_result; 716 727 if (q_index == 0) result[nq] = pd_norm; 717 728 //if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 718 #else // !USE_ OPENCL729 #else // !USE_GPU 719 730 result[nq] = pd_norm; 720 731 //printf("res: %g/%g\n", result[0], pd_norm); 721 #endif // !USE_ OPENCL732 #endif // !USE_GPU 722 733 723 734 // ** clear the macros in preparation for the next kernel **
Note: See TracChangeset
for help on using the changeset viewer.