Changeset 149eb53 in sasmodels for sasmodels/kernel_iq.c
- Timestamp:
- Oct 25, 2018 1:43:27 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 6d5601c
- Parents:
- 07646b6 (diff), 508475a (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel_iq.c
r74e9b5f r07646b6 26 26 // MAGNETIC_PARS : a comma-separated list of indices to the sld 27 27 // parameters in the parameter table. 28 // CALL_VOLUME(table) : call the form volume function 28 // CALL_VOLUME(form, shell, table) : assign form and shell values 29 // CALL_EFFECTIVE_RADIUS(type, table) : call the R_eff function 29 30 // CALL_IQ(q, table) : call the Iq function for 1D calcs. 30 31 // CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data. 32 // CALL_FQ(q, F1, F2, table) : call the Fq function for 1D calcs. 33 // CALL_FQ_A(q, F1, F2, table) : call the Iq function with |q| for 2D data. 31 34 // CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes 32 35 // CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes … … 36 39 // PROJECTION : equirectangular=1, sinusoidal=2 37 40 // see explore/jitter.py for definitions. 41 38 42 39 43 #ifndef _PAR_BLOCK_ // protected block so we can include this code twice. … … 270 274 #endif // _QABC_SECTION 271 275 272 273 276 // ==================== KERNEL CODE ======================== 274 275 277 kernel 276 278 void KERNEL_NAME( 277 int32_t nq, // number of q values278 const int32_t pd_start, // where we are in the dispersity loop279 const int32_t pd_stop, // where we are stopping in the dispersity loop279 int32_t nq, // number of q values 280 const int32_t pd_start, // where we are in the dispersity loop 281 const int32_t pd_stop, // where we are stopping in the dispersity loop 280 282 pglobal const ProblemDetails *details, 281 pglobal const double *values, 282 pglobal const double *q, // nq q values, with padding to boundary 283 pglobal double *result, // nq+1 return values, again with padding 284 const double cutoff // cutoff in the dispersity weight product 283 pglobal const double *values, // parameter values and distributions 284 pglobal const double *q, // nq q values, with padding to boundary 285 pglobal double *result, // nq+1 return values, again with padding 286 const double cutoff, // cutoff in the dispersity weight product 287 int32_t effective_radius_type // which effective radius to compute 285 288 ) 286 289 { … … 342 345 // 343 346 // The code differs slightly between opencl and dll since opencl is only 344 // seeing one q value (stored in the variable "this_ result") while the dll347 // seeing one q value (stored in the variable "this_F2") while the dll 345 348 // version must loop over all q. 349 #if defined(CALL_FQ) 350 double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]); 351 double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]); 352 double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]); 353 double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]); 354 #else 355 double weight_norm = (pd_start == 0 ? 0.0 : result[nq]); 356 double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]); 357 double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]); 358 double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]); 359 #endif 346 360 #if defined(USE_GPU) 347 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 348 double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 361 #if defined(CALL_FQ) 362 double this_F2 = (pd_start == 0 ? 0.0 : result[2*q_index+0]); 363 double this_F1 = (pd_start == 0 ? 0.0 : result[2*q_index+1]); 364 #else 365 double this_F2 = (pd_start == 0 ? 0.0 : result[q_index]); 366 #endif 349 367 #else // !USE_GPU 350 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);351 368 if (pd_start == 0) { 352 369 #ifdef USE_OPENMP 353 370 #pragma omp parallel for 354 371 #endif 355 for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 372 #if defined(CALL_FQ) 373 // 2*nq for F^2,F pairs 374 for (int q_index=0; q_index < 2*nq; q_index++) result[q_index] = 0.0; 375 #else 376 for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 377 #endif 356 378 } 357 379 //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); … … 415 437 FETCH_Q // set qx,qy from the q input vector 416 438 APPLY_ROTATION // convert qx,qy to qa,qb,qc 417 CALL_KERNEL // scattering= Iqxy(qa, qb, qc, p1, p2, ...)439 CALL_KERNEL // F2 = Iqxy(qa, qb, qc, p1, p2, ...) 418 440 419 441 ++step; // increment counter representing position in dispersity mesh … … 446 468 // inner loop and defines the macros that use them. 447 469 448 #if defined(CALL_IQ) 449 // unoriented 1D 470 471 #if defined(CALL_FQ) // COMPUTE_F1_F2 is true 472 // unoriented 1D returning <F> and <F^2> 473 double qk; 474 double F1, F2; 475 #define FETCH_Q() do { qk = q[q_index]; } while (0) 476 #define BUILD_ROTATION() do {} while(0) 477 #define APPLY_ROTATION() do {} while(0) 478 #define CALL_KERNEL() CALL_FQ(qk,F1,F2,local_values.table) 479 480 #elif defined(CALL_FQ_A) 481 // unoriented 2D return <F> and <F^2> 482 double qx, qy; 483 double F1, F2; 484 #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 485 #define BUILD_ROTATION() do {} while(0) 486 #define APPLY_ROTATION() do {} while(0) 487 #define CALL_KERNEL() CALL_FQ_A(sqrt(qx*qx+qy*qy),F1,F2,local_values.table) 488 489 #elif defined(CALL_IQ) 490 // unoriented 1D return <F^2> 450 491 double qk; 451 492 #define FETCH_Q() do { qk = q[q_index]; } while (0) 452 493 #define BUILD_ROTATION() do {} while(0) 453 494 #define APPLY_ROTATION() do {} while(0) 454 #define CALL_KERNEL() CALL_IQ(qk, 495 #define CALL_KERNEL() CALL_IQ(qk,local_values.table) 455 496 456 497 #elif defined(CALL_IQ_A) … … 486 527 #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc) 487 528 #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table) 529 488 530 #elif defined(CALL_IQ_XY) 489 531 // direct call to qx,qy calculator … … 499 541 // logic in the IQ_AC and IQ_ABC branches. This will become more important 500 542 // if we implement more projections, or more complicated projections. 501 #if defined(CALL_IQ) || defined(CALL_IQ_A) // no orientation 543 #if defined(CALL_IQ) || defined(CALL_IQ_A) || defined(CALL_FQ) || defined(CALL_FQ_A) 544 // no orientation 502 545 #define APPLY_PROJECTION() const double weight=weight0 503 546 #elif defined(CALL_IQ_XY) // pass orientation to the model … … 649 692 // Note: weight==0 must always be excluded 650 693 if (weight > cutoff) { 651 pd_norm += weight * CALL_VOLUME(local_values.table); 694 double form, shell; 695 CALL_VOLUME(form, shell, local_values.table); 696 weight_norm += weight; 697 weighted_form += weight * form; 698 weighted_shell += weight * shell; 699 if (effective_radius_type != 0) { 700 weighted_radius += weight * CALL_EFFECTIVE_RADIUS(effective_radius_type, local_values.table); 701 } 652 702 BUILD_ROTATION(); 653 703 … … 667 717 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 668 718 // Compute the scattering from the magnetic cross sections. 669 double scattering= 0.0;719 double F2 = 0.0; 670 720 const double qsq = qx*qx + qy*qy; 671 721 if (qsq > 1.e-16) { … … 692 742 // q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz); 693 743 } 694 scattering+= xs_weight * CALL_KERNEL();744 F2 += xs_weight * CALL_KERNEL(); 695 745 } 696 746 } 697 747 } 698 748 #else // !MAGNETIC 699 const double scattering = CALL_KERNEL(); 749 #if defined(CALL_FQ) 750 CALL_KERNEL(); // sets F1 and F2 by reference 751 #else 752 const double F2 = CALL_KERNEL(); 753 #endif 700 754 #endif // !MAGNETIC 701 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);755 //printf("q_index:%d %g %g %g %g\n", q_index, F2, weight0); 702 756 703 757 #if defined(USE_GPU) 704 this_result += weight * scattering; 705 #else // !USE_GPU 706 result[q_index] += weight * scattering; 707 #endif // !USE_GPU 758 #if defined(CALL_FQ) 759 this_F2 += weight * F2; 760 this_F1 += weight * F1; 761 #else 762 this_F2 += weight * F2; 763 #endif 764 #else // !USE_OPENCL 765 #if defined(CALL_FQ) 766 result[2*q_index+0] += weight * F2; 767 result[2*q_index+1] += weight * F1; 768 #else 769 result[q_index] += weight * F2; 770 #endif 771 #endif // !USE_OPENCL 708 772 } 709 773 } 710 774 } 711 712 775 // close nested loops 713 776 ++step; … … 728 791 #endif 729 792 730 // Remember the current resultand the updated norm.793 // Remember the results and the updated norm. 731 794 #if defined(USE_GPU) 732 result[q_index] = this_result; 733 if (q_index == 0) result[nq] = pd_norm; 734 //if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 735 #else // !USE_GPU 736 result[nq] = pd_norm; 737 //printf("res: %g/%g\n", result[0], pd_norm); 738 #endif // !USE_GPU 795 #if defined(CALL_FQ) 796 result[2*q_index+0] = this_F2; 797 result[2*q_index+1] = this_F1; 798 #else 799 result[q_index] = this_F2; 800 #endif 801 if (q_index == 0) 802 #endif 803 { 804 #if defined(CALL_FQ) 805 result[2*nq] = weight_norm; 806 result[2*nq+1] = weighted_form; 807 result[2*nq+2] = weighted_shell; 808 result[2*nq+3] = weighted_radius; 809 #else 810 result[nq] = weight_norm; 811 result[nq+1] = weighted_form; 812 result[nq+2] = weighted_shell; 813 result[nq+3] = weighted_radius; 814 #endif 815 } 739 816 740 817 // ** clear the macros in preparation for the next kernel **
Note: See TracChangeset
for help on using the changeset viewer.