Changeset b9c12fe5 in sasmodels
 Timestamp:
 Jul 15, 2016 11:25:10 PM (8 years ago)
 Branches:
 master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket1257vesicleproduct, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
 Children:
 f3bd37f
 Parents:
 def2c1b
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

sasmodels/kernel_iq.cl
ra738209 rb9c12fe5 48 48 // Storage for the current parameter values. These will be updated as we 49 49 // walk the polydispersity cube. local_values will be aliased to pvec. 50 local ParameterBlock local_values; 50 ParameterBlock local_values; 51 double *pvec = (double *)&local_values; 51 52 52 53 // who we are and what element we are working with 53 54 const int q_index = get_global_id(0); 54 const int thread = get_local_id(0);55 55 56 56 // Fill in the initial variables 57 event_t e = async_work_group_copy((local double *)&local_values, values+2, NPARS, 0); 58 wait_group_events(1, &e); 57 for (int i=0; i < NPARS; i++) { 58 pvec[i] = values[2+i]; 59 } 59 60 60 61 // Monodisperse computation … … 82 83 83 84 //printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop); 84 // norm will be shared across all threads.85 85 86 // "values" is global and can't be assigned to a local, so even though only87 // the alias is only needed for thread 0 it is allocated in all threads.88 86 global const double *pd_value = values+2+NPARS; 89 87 global const double *pd_weight = pd_value+details>pd_sum; … … 91 89 // need product of weights at every Iq calc, so keep product of 92 90 // weights from the outer loops so that weight = partial_weight * fast_weight 93 local double pd_norm; 94 local double partial_weight; // product of weight w4*w3*w2 but not w1 95 local double spherical_correction; // cosine correction for latitude variation 96 local double weight; // product of partial_weight*w1*spherical_correction 97 local double *pvec; 98 local int p0_par; 99 local int p0_length; 100 local int p0_offset; 101 local int p0_is_theta; 102 local int p0_index; 91 double pd_norm; 92 double partial_weight; // product of weight w4*w3*w2 but not w1 93 double spherical_correction; // cosine correction for latitude variation 94 double weight; // product of partial_weight*w1*spherical_correction 95 int p0_par; 96 int p0_length; 97 int p0_offset; 98 int p0_is_theta; 99 int p0_index; 103 100 104 101 // Number of elements in the longest polydispersity loop 105 barrier(CLK_LOCAL_MEM_FENCE); 106 if (thread == 0) { 107 pvec = (local double *)(&local_values); 102 p0_par = details>pd_par[0]; 103 p0_length = details>pd_length[0]; 104 p0_offset = details>pd_offset[0]; 105 p0_is_theta = (p0_par == details>theta_par); 108 106 109 // Number of elements in the longest polydispersity loop 110 p0_par = details>pd_par[0]; 111 p0_length = details>pd_length[0]; 112 p0_offset = details>pd_offset[0]; 113 p0_is_theta = (p0_par == details>theta_par); 107 // Trigger the reset behaviour that happens at the end the fast loop 108 // by setting the initial index >= weight vector length. 109 p0_index = p0_length; 114 110 115 // Trigger the reset behaviour that happens at the end the fast loop116 // by setting the initial index >= weight vector length.117 p0_index = p0_length;111 // Default the spherical correction to 1.0 in case it is not otherwise set 112 spherical_correction = 1.0; 113 weight=1.0; 118 114 119 // Default the spherical correction to 1.0 in case it is not otherwise set 120 spherical_correction = 1.0; 121 122 // Since we are no longer looping over the entire polydispersity hypercube 123 // for each q, we need to track the result and normalization values between 124 // calls. This means initializing them to 0 at the start and accumulating 125 // them between calls. 126 pd_norm = pd_start == 0 ? 0.0 : result[nq]; 127 } 128 barrier(CLK_LOCAL_MEM_FENCE); 115 // Since we are no longer looping over the entire polydispersity hypercube 116 // for each q, we need to track the result and normalization values between 117 // calls. This means initializing them to 0 at the start and accumulating 118 // them between calls. 119 pd_norm = pd_start == 0 ? 0.0 : result[nq]; 129 120 130 121 if (q_index < nq) { … … 134 125 // Loop over the weights then loop over q, accumulating values 135 126 for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 136 barrier(CLK_LOCAL_MEM_FENCE); 137 if (thread == 0) { 138 // check if fast loop needs to be reset 139 if (p0_index == p0_length) { 140 //printf("should be here with %d active\n", num_active); 127 // check if fast loop needs to be reset 128 if (p0_index == p0_length) { 129 //printf("should be here with %d active\n", num_active); 141 130 142 // Compute position in polydispersity hypercube and partial weight 143 partial_weight = 1.0; 144 for (int k=1; k < details>num_active; k++) { 145 int pk = details>pd_par[k]; 146 int index = details>pd_offset[k] + (loop_index/details>pd_stride[k])%details>pd_length[k]; 147 pvec[pk] = pd_value[index]; 148 partial_weight *= pd_weight[index]; 149 //printf("index[%d] = %d\n",k,index); 150 if (pk == details>theta_par) { 151 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e6); 152 } 131 // Compute position in polydispersity hypercube and partial weight 132 partial_weight = 1.0; 133 for (int k=1; k < details>num_active; k++) { 134 int pk = details>pd_par[k]; 135 int index = details>pd_offset[k] + (loop_index/details>pd_stride[k])%details>pd_length[k]; 136 pvec[pk] = pd_value[index]; 137 partial_weight *= pd_weight[index]; 138 //printf("index[%d] = %d\n",k,index); 139 if (pk == details>theta_par) { 140 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e6); 153 141 } 154 p0_index = loop_index%p0_length;155 //printf("\n");156 142 } 143 p0_index = loop_index%p0_length; 144 //printf("\n"); 145 } 157 146 158 // Update parameter p0 159 weight = partial_weight*pd_weight[p0_offset + p0_index]; 160 pvec[p0_par] = pd_value[p0_offset + p0_index]; 161 if (p0_is_theta) { 162 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e6); 163 } 164 p0_index++; 147 // Update parameter p0 148 weight = partial_weight*pd_weight[p0_offset + p0_index]; 149 pvec[p0_par] = pd_value[p0_offset + p0_index]; 150 if (p0_is_theta) { 151 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e6); 165 152 } 166 barrier(CLK_LOCAL_MEM_FENCE);153 p0_index++; 167 154 //printf("\n"); 168 155
Note: See TracChangeset
for help on using the changeset viewer.