Changeset 07646b6 in sasmodels for sasmodels/kernel_iq.c
- Timestamp:
- Oct 25, 2018 1:43:01 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 149eb53
- Parents:
- 31fc4ad (diff), d5ce7fa (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - git-author:
- Paul Kienzle <pkienzle@…> (10/25/18 12:41:48)
- git-committer:
- Paul Kienzle <pkienzle@…> (10/25/18 13:43:01)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel_iq.c
re44432d r07646b6 277 277 kernel 278 278 void KERNEL_NAME( 279 int32_t nq, // number of q values280 const int32_t pd_start, // where we are in the dispersity loop281 const int32_t pd_stop, // where we are stopping in the dispersity loop282 global const ProblemDetails *details,283 global const double *values,284 global const double *q,// nq q values, with padding to boundary285 global double *result,// nq+1 return values, again with padding286 const double cutoff, // cutoff in the dispersity weight product279 int32_t nq, // number of q values 280 const int32_t pd_start, // where we are in the dispersity loop 281 const int32_t pd_stop, // where we are stopping in the dispersity loop 282 pglobal const ProblemDetails *details, 283 pglobal const double *values, // parameter values and distributions 284 pglobal const double *q, // nq q values, with padding to boundary 285 pglobal double *result, // nq+1 return values, again with padding 286 const double cutoff, // cutoff in the dispersity weight product 287 287 int32_t effective_radius_type // which effective radius to compute 288 288 ) 289 289 { 290 #if def USE_OPENCL290 #if defined(USE_GPU) 291 291 // who we are and what element we are working with 292 #if defined(USE_OPENCL) 292 293 const int q_index = get_global_id(0); 294 #else // USE_CUDA 295 const int q_index = threadIdx.x + blockIdx.x * blockDim.x; 296 #endif 293 297 if (q_index >= nq) return; 294 298 #else … … 341 345 // 342 346 // The code differs slightly between opencl and dll since opencl is only 343 // seeing one q value (stored in the variable "this_ result") while the dll347 // seeing one q value (stored in the variable "this_F2") while the dll 344 348 // version must loop over all q. 345 #ifdef USE_OPENCL 349 #if defined(CALL_FQ) 350 double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]); 351 double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]); 352 double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]); 353 double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]); 354 #else 355 double weight_norm = (pd_start == 0 ? 0.0 : result[nq]); 356 double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]); 357 double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]); 358 double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]); 359 #endif 360 #if defined(USE_GPU) 346 361 #if defined(CALL_FQ) 347 double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]);348 double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]);349 double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]);350 double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]);351 362 double this_F2 = (pd_start == 0 ? 0.0 : result[2*q_index+0]); 352 363 double this_F1 = (pd_start == 0 ? 0.0 : result[2*q_index+1]); 353 364 #else 354 double weight_norm = (pd_start == 0 ? 0.0 : result[nq]); 355 double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]); 356 double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]); 357 double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]); 358 double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 365 double this_F2 = (pd_start == 0 ? 0.0 : result[q_index]); 359 366 #endif 360 #else // !USE_OPENCL 361 #if defined(CALL_FQ) 362 double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]); 363 double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]); 364 double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]); 365 double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]); 366 #else 367 double weight_norm = (pd_start == 0 ? 0.0 : result[nq]); 368 double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]); 369 double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]); 370 double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]); 371 #endif 367 #else // !USE_GPU 372 368 if (pd_start == 0) { 373 369 #ifdef USE_OPENMP … … 381 377 #endif 382 378 } 383 //if (q_index==0) printf("start %d %g %g\n", pd_start, weighted_shell, result[0]);384 #endif // !USE_ OPENCL379 //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 380 #endif // !USE_GPU 385 381 386 382 … … 405 401 const int n4 = pd_length[4]; 406 402 const int p4 = pd_par[4]; 407 global const double *v4 = pd_value + pd_offset[4];408 global const double *w4 = pd_weight + pd_offset[4];403 pglobal const double *v4 = pd_value + pd_offset[4]; 404 pglobal const double *w4 = pd_weight + pd_offset[4]; 409 405 int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start 410 406 … … 441 437 FETCH_Q // set qx,qy from the q input vector 442 438 APPLY_ROTATION // convert qx,qy to qa,qb,qc 443 CALL_KERNEL // scattering= Iqxy(qa, qb, qc, p1, p2, ...)439 CALL_KERNEL // F2 = Iqxy(qa, qb, qc, p1, p2, ...) 444 440 445 441 ++step; // increment counter representing position in dispersity mesh … … 613 609 const int n##_LOOP = details->pd_length[_LOOP]; \ 614 610 const int p##_LOOP = details->pd_par[_LOOP]; \ 615 global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \616 global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \611 pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 612 pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 617 613 int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 618 614 … … 638 634 // Pointers to the start of the dispersity and weight vectors, if needed. 639 635 #if MAX_PD>0 640 global const double *pd_value = values + NUM_VALUES;641 global const double *pd_weight = pd_value + details->num_weights;636 pglobal const double *pd_value = values + NUM_VALUES; 637 pglobal const double *pd_weight = pd_value + details->num_weights; 642 638 #endif 643 639 … … 706 702 BUILD_ROTATION(); 707 703 708 #if ndef USE_OPENCL704 #if !defined(USE_GPU) 709 705 // DLL needs to explicitly loop over the q values. 710 706 #ifdef USE_OPENMP … … 712 708 #endif 713 709 for (q_index=0; q_index<nq; q_index++) 714 #endif // !USE_ OPENCL710 #endif // !USE_GPU 715 711 { 716 712 … … 721 717 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 722 718 // Compute the scattering from the magnetic cross sections. 723 double scattering= 0.0;719 double F2 = 0.0; 724 720 const double qsq = qx*qx + qy*qy; 725 721 if (qsq > 1.e-16) { … … 746 742 // q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz); 747 743 } 748 scattering+= xs_weight * CALL_KERNEL();744 F2 += xs_weight * CALL_KERNEL(); 749 745 } 750 746 } … … 754 750 CALL_KERNEL(); // sets F1 and F2 by reference 755 751 #else 756 const double scattering= CALL_KERNEL();752 const double F2 = CALL_KERNEL(); 757 753 #endif 758 754 #endif // !MAGNETIC 759 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);760 761 #if def USE_OPENCL755 //printf("q_index:%d %g %g %g %g\n", q_index, F2, weight0); 756 757 #if defined(USE_GPU) 762 758 #if defined(CALL_FQ) 763 759 this_F2 += weight * F2; 764 760 this_F1 += weight * F1; 765 761 #else 766 this_ result += weight * scattering;762 this_F2 += weight * F2; 767 763 #endif 768 764 #else // !USE_OPENCL … … 771 767 result[2*q_index+1] += weight * F1; 772 768 #else 773 result[q_index] += weight * scattering;769 result[q_index] += weight * F2; 774 770 #endif 775 771 #endif // !USE_OPENCL … … 795 791 #endif 796 792 797 // Remember the current resultand the updated norm.798 #if def USE_OPENCL793 // Remember the results and the updated norm. 794 #if defined(USE_GPU) 799 795 #if defined(CALL_FQ) 800 result[2*q_index+0] = this_F2; 801 result[2*q_index+1] = this_F1; 802 if (q_index == 0) { 803 result[2*nq+0] = weight_norm; 804 result[2*nq+1] = weighted_form; 805 result[2*nq+3] = weighted_shell; 806 result[2*nq+3] = weighted_radius; 807 } 796 result[2*q_index+0] = this_F2; 797 result[2*q_index+1] = this_F1; 808 798 #else 809 result[q_index] = this_result; 810 if (q_index == 0) { 811 result[nq+0] = weight_norm; 812 result[nq+1] = weighted_form; 813 result[nq+2] = weighted_shell; 814 result[nq+3] = weighted_radius; 815 } 799 result[q_index] = this_F2; 816 800 #endif 817 818 //if (q_index == 0) printf("res: %g/%g\n", result[0], weighted_shell); 819 #else // !USE_OPENCL 820 801 if (q_index == 0) 802 #endif 803 { 804 #if defined(CALL_FQ) 821 805 result[2*nq] = weight_norm; 822 806 result[2*nq+1] = weighted_form; 823 807 result[2*nq+2] = weighted_shell; 824 808 result[2*nq+3] = weighted_radius; 825 809 #else 826 810 result[nq] = weight_norm; 827 811 result[nq+1] = weighted_form; 828 812 result[nq+2] = weighted_shell; 829 813 result[nq+3] = weighted_radius; 830 #endif 831 //printf("res: %g/%g\n", result[0], weighted_shell); 832 #endif // !USE_OPENCL 814 #endif 815 } 833 816 834 817 // ** clear the macros in preparation for the next kernel **
Note: See TracChangeset
for help on using the changeset viewer.