Changeset 508475a in sasmodels for sasmodels/kernel_iq.c
- Timestamp:
- Oct 25, 2018 2:50:35 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- c11d09f
- Parents:
- 8b31efa (diff), 2a12d8d8 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - git-author:
- Paul Kienzle <pkienzle@…> (10/25/18 14:20:45)
- git-committer:
- Paul Kienzle <pkienzle@…> (10/25/18 14:50:35)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel_iq.c
r70530778 r74e9b5f 278 278 const int32_t pd_start, // where we are in the dispersity loop 279 279 const int32_t pd_stop, // where we are stopping in the dispersity loop 280 global const ProblemDetails *details,281 global const double *values,282 global const double *q, // nq q values, with padding to boundary283 global double *result, // nq+1 return values, again with padding280 pglobal const ProblemDetails *details, 281 pglobal const double *values, 282 pglobal const double *q, // nq q values, with padding to boundary 283 pglobal double *result, // nq+1 return values, again with padding 284 284 const double cutoff // cutoff in the dispersity weight product 285 285 ) 286 286 { 287 #if def USE_OPENCL287 #if defined(USE_GPU) 288 288 // who we are and what element we are working with 289 #if defined(USE_OPENCL) 289 290 const int q_index = get_global_id(0); 291 #else // USE_CUDA 292 const int q_index = threadIdx.x + blockIdx.x * blockDim.x; 293 #endif 290 294 if (q_index >= nq) return; 291 295 #else … … 340 344 // seeing one q value (stored in the variable "this_result") while the dll 341 345 // version must loop over all q. 342 #if def USE_OPENCL346 #if defined(USE_GPU) 343 347 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 344 348 double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 345 #else // !USE_ OPENCL349 #else // !USE_GPU 346 350 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 347 351 if (pd_start == 0) { … … 352 356 } 353 357 //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 354 #endif // !USE_ OPENCL358 #endif // !USE_GPU 355 359 356 360 … … 375 379 const int n4 = pd_length[4]; 376 380 const int p4 = pd_par[4]; 377 global const double *v4 = pd_value + pd_offset[4];378 global const double *w4 = pd_weight + pd_offset[4];381 pglobal const double *v4 = pd_value + pd_offset[4]; 382 pglobal const double *w4 = pd_weight + pd_offset[4]; 379 383 int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start 380 384 … … 562 566 const int n##_LOOP = details->pd_length[_LOOP]; \ 563 567 const int p##_LOOP = details->pd_par[_LOOP]; \ 564 global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \565 global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \568 pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 569 pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 566 570 int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 567 571 … … 587 591 // Pointers to the start of the dispersity and weight vectors, if needed. 588 592 #if MAX_PD>0 589 global const double *pd_value = values + NUM_VALUES;590 global const double *pd_weight = pd_value + details->num_weights;593 pglobal const double *pd_value = values + NUM_VALUES; 594 pglobal const double *pd_weight = pd_value + details->num_weights; 591 595 #endif 592 596 … … 648 652 BUILD_ROTATION(); 649 653 650 #if ndef USE_OPENCL654 #if !defined(USE_GPU) 651 655 // DLL needs to explicitly loop over the q values. 652 656 #ifdef USE_OPENMP … … 654 658 #endif 655 659 for (q_index=0; q_index<nq; q_index++) 656 #endif // !USE_ OPENCL660 #endif // !USE_GPU 657 661 { 658 662 … … 697 701 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0); 698 702 699 #if def USE_OPENCL703 #if defined(USE_GPU) 700 704 this_result += weight * scattering; 701 #else // !USE_ OPENCL705 #else // !USE_GPU 702 706 result[q_index] += weight * scattering; 703 #endif // !USE_ OPENCL707 #endif // !USE_GPU 704 708 } 705 709 } … … 725 729 726 730 // Remember the current result and the updated norm. 727 #if def USE_OPENCL731 #if defined(USE_GPU) 728 732 result[q_index] = this_result; 729 733 if (q_index == 0) result[nq] = pd_norm; 730 734 //if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 731 #else // !USE_ OPENCL735 #else // !USE_GPU 732 736 result[nq] = pd_norm; 733 737 //printf("res: %g/%g\n", result[0], pd_norm); 734 #endif // !USE_ OPENCL738 #endif // !USE_GPU 735 739 736 740 // ** clear the macros in preparation for the next kernel **
Note: See TracChangeset
for help on using the changeset viewer.