Changeset a738209 in sasmodels
- Timestamp:
- Jul 15, 2016 9:33:33 AM (8 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- def2c1b
- Parents:
- 98ba1fc
- Location:
- sasmodels
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/compare.py
rf2f67a6 ra738209 460 460 Return a model calculator using the OpenCL calculation engine. 461 461 """ 462 try: 463 model = core.build_model(model_info, dtype=dtype, platform="ocl") 464 except Exception as exc: 465 print(exc) 466 print("... trying again with single precision") 467 model = core.build_model(model_info, dtype='single', platform="ocl") 462 if not core.HAVE_OPENCL: 463 raise RuntimeError("OpenCL not available") 464 model = core.build_model(model_info, dtype=dtype, platform="ocl") 468 465 calculator = DirectModel(data, model, cutoff=cutoff) 469 466 calculator.engine = "OCL%s"%DTYPE_MAP[dtype] -
sasmodels/details.py
r0ff62d4 ra738209 1 from __future__ import print_function 2 1 3 import numpy as np # type: ignore 2 4 … … 15 17 parameters = model_info.parameters 16 18 max_pd = parameters.max_pd 17 npars = parameters.npars 18 par_offset = 4*max_pd 19 self.buffer = np.zeros(par_offset + 3 * npars + 4, 'i4') 19 20 # Structure of the call details buffer: 21 # pd_par[max_pd] pd params in order of length 22 # pd_length[max_pd] length of each pd param 23 # pd_offset[max_pd] offset of pd values in parameter array 24 # pd_stride[max_pd] index of pd value in loop = n//stride[k] 25 # pd_prod total length of pd loop 26 # pd_sum total length of the weight vector 27 # num_active number of pd params 28 # theta_par parameter number for theta parameter 29 self.buffer = np.zeros(4*max_pd + 4, 'i4') 20 30 21 31 # generate views on different parts of the array … … 24 34 self._pd_offset = self.buffer[2 * max_pd:3 * max_pd] 25 35 self._pd_stride = self.buffer[3 * max_pd:4 * max_pd] 26 self._par_offset = self.buffer[par_offset + 0 * npars:par_offset + 1 * npars]27 self._par_coord = self.buffer[par_offset + 1 * npars:par_offset + 2 * npars]28 self._pd_coord = self.buffer[par_offset + 2 * npars:par_offset + 3 * npars]29 36 30 37 # theta_par is fixed 31 self. buffer[-1]= parameters.theta_offset38 self.theta_par = parameters.theta_offset 32 39 33 40 @property … … 44 51 45 52 @property 46 def pd_coord(self): return self._pd_coord 53 def pd_prod(self): return self.buffer[-4] 54 @pd_prod.setter 55 def pd_prod(self, v): self.buffer[-4] = v 47 56 48 57 @property 49 def par_coord(self): return self._par_coord 58 def pd_sum(self): return self.buffer[-3] 59 @pd_sum.setter 60 def pd_sum(self, v): self.buffer[-3] = v 50 61 51 62 @property 52 def par_offset(self): return self._par_offset 53 54 @property 55 def num_active(self): return self.buffer[-4] 63 def num_active(self): return self.buffer[-2] 56 64 @num_active.setter 57 def num_active(self, v): self.buffer[-4] = v 58 59 @property 60 def total_pd(self): return self.buffer[-3] 61 @total_pd.setter 62 def total_pd(self, v): self.buffer[-3] = v 63 64 @property 65 def num_coord(self): return self.buffer[-2] 66 @num_coord.setter 67 def num_coord(self, v): self.buffer[-2] = v 65 def num_active(self, v): self.buffer[-2] = v 68 66 69 67 @property 70 68 def theta_par(self): return self.buffer[-1] 69 @theta_par.setter 70 def theta_par(self, v): self.buffer[-1] = v 71 71 72 72 def show(self): 73 print("total_pd", self.total_pd)74 73 print("num_active", self.num_active) 74 print("pd_prod", self.pd_prod) 75 print("pd_sum", self.pd_sum) 76 print("theta par", self.theta_par) 75 77 print("pd_par", self.pd_par) 76 78 print("pd_length", self.pd_length) 77 79 print("pd_offset", self.pd_offset) 78 80 print("pd_stride", self.pd_stride) 79 print("par_offsets", self.par_offset)80 print("num_coord", self.num_coord)81 print("par_coord", self.par_coord)82 print("pd_coord", self.pd_coord)83 print("theta par", self.buffer[-1])84 81 85 82 def mono_details(model_info): 86 83 call_details = CallDetails(model_info) 87 # The zero defaults for monodisperse systems are mostly fine 88 call_details.par_offset[:] = np.arange(2, len(call_details.par_offset)+2) 84 call_details.pd_prod = 1 89 85 return call_details 90 86 91 87 def poly_details(model_info, weights): 92 88 #print("weights",weights) 93 weights = weights[2:] # Skip scale and background89 #weights = weights[2:] # Skip scale and background 94 90 95 91 # Decreasing list of polydispersity lengths 96 # Note: the reversing view, x[::-1], does not require a copy97 92 pd_length = np.array([len(w) for w in weights]) 98 93 num_active = np.sum(pd_length>1) … … 101 96 102 97 pd_offset = np.cumsum(np.hstack((0, pd_length))) 98 # Note: the reversing view, x[::-1], does not require a copy 103 99 idx = np.argsort(pd_length)[::-1][:num_active] 104 par_length = np.array([ max(len(w),1) for w in weights])100 par_length = np.array([len(w) for w in weights]) 105 101 pd_stride = np.cumprod(np.hstack((1, par_length[idx]))) 106 par_offsets = np.cumsum(np.hstack((2, par_length)))107 102 108 103 call_details = CallDetails(model_info) 109 call_details.pd_par[:num_active] = idx 104 call_details.pd_par[:num_active] = idx - 2 # skip background & scale 110 105 call_details.pd_length[:num_active] = pd_length[idx] 111 106 call_details.pd_offset[:num_active] = pd_offset[idx] 112 107 call_details.pd_stride[:num_active] = pd_stride[:-1] 113 call_details.p ar_offset[:] = par_offsets[:-1]114 call_details. total_pd = pd_stride[-1]108 call_details.pd_prod = pd_stride[-1] 109 call_details.pd_sum = np.sum(par_length) 115 110 call_details.num_active = num_active 116 # Without constraints coordinated parameters are just the pd parameters117 call_details.par_coord[:num_active] = idx118 call_details.pd_coord[:num_active] = 2**np.arange(num_active)119 call_details.num_coord = num_active120 111 #call_details.show() 121 112 return call_details 122 123 def constrained_poly_details(model_info, weights, constraints):124 # Need to find the independently varying pars and sort them125 # Need to build a coordination list for the dependent variables126 # Need to generate a constraints function which takes values127 # and weights, returning par blocks128 raise NotImplementedError("Can't handle constraints yet")129 -
sasmodels/direct_model.py
r56b2687 ra738209 66 66 67 67 vw_pairs = [(get_weights(p, pars) if active(p.name) 68 else ([pars.get(p.name, p.default)], [ ]))68 else ([pars.get(p.name, p.default)], [1.0])) 69 69 for p in parameters.call_parameters] 70 70 71 call_details, weights,values = kernel.build_details(calculator, vw_pairs)72 return calculator(call_details, weights,values, cutoff)71 call_details, values = kernel.build_details(calculator, vw_pairs) 72 return calculator(call_details, values, cutoff) 73 73 74 74 def get_weights(parameter, values): … … 89 89 nsigma = values.get(parameter.name+'_pd_nsigma', 3.0) 90 90 if npts == 0 or width == 0: 91 return [value], [ ]91 return [value], [1.0] 92 92 value, weight = weights.get_weights( 93 93 disperser, npts, width, nsigma, value, limits, relative) -
sasmodels/generate.py
r56b2687 ra738209 451 451 452 452 453 _IQXY_PATTERN = re.compile("^((inline|static) )? *(double )? *Iqxy *([(]|$)", 453 # type in IQXY pattern could be single, float, double, long double, ... 454 _IQXY_PATTERN = re.compile("^((inline|static) )? *([a-z ]+ )? *Iqxy *([(]|$)", 454 455 flags=re.MULTILINE) 455 456 def _have_Iqxy(sources): … … 469 470 line instead. 470 471 """ 471 for code, pathin sources:472 for path, code in sources: 472 473 if _IQXY_PATTERN.search(code): 473 474 return True -
sasmodels/kernel.py
r0ff62d4 ra738209 9 9 the kernel should be released, which also releases the inputs. 10 10 """ 11 12 from __future__ import division, print_function 11 13 12 14 import numpy as np … … 90 92 """ 91 93 values, weights = zip(*pairs) 92 if max([len(w) for w in weights]) > 1: 94 scalars = [v[0] for v in values] 95 if all(len(w)==1 for w in weights): 96 call_details = mono_details(kernel.info) 97 data = np.array(scalars, dtype=kernel.dtype) 98 else: 93 99 call_details = poly_details(kernel.info, weights) 94 else: 95 call_details = mono_details(kernel.info) 96 weights, values = [np.hstack(v) for v in (weights, values)] 97 weights = weights.astype(dtype=kernel.dtype) 98 values = values.astype(dtype=kernel.dtype) 99 return call_details, weights, values 100 100 data = np.hstack(scalars+list(values)+list(weights)).astype(kernel.dtype) 101 return call_details, data -
sasmodels/kernel_iq.c
rae2b6b5 ra738209 22 22 int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level 23 23 #endif // MAX_PD > 0 24 int32_t par_offset[NPARS]; // offset of par value blocks in the value & weight vector 25 int32_t par_coord[NPARS]; // ids of the coordination parameters 26 int32_t pd_coord[NPARS]; // polydispersity coordination bitvector 24 int32_t pd_prod; // total number of voxels in hypercube 25 int32_t pd_sum; // total length of the weights vector 27 26 int32_t num_active; // number of non-trivial pd loops 28 int32_t total_pd; // total number of voxels in hypercube29 int32_t num_coord; // number of coordinated parameters30 27 int32_t theta_par; // id of spherical correction variable 31 28 } ProblemDetails; … … 43 40 const int32_t pd_stop, // where we are stopping in the polydispersity loop 44 41 global const ProblemDetails *details, 45 global const double *weights,46 42 global const double *values, 47 43 global const double *q, // nq q values, with padding to boundary … … 54 50 ParameterBlock local_values; // current parameter values 55 51 double *pvec = (double *)(&local_values); // Alias named parameters with a vector 56 double norm;57 58 // number of active loops59 const int num_active = details->num_active;60 52 61 53 // Fill in the initial variables … … 64 56 #endif 65 57 for (int k=0; k < NPARS; k++) { 66 pvec[k] = values[ details->par_offset[k]];58 pvec[k] = values[k+2]; 67 59 } 68 60 69 61 // Monodisperse computation 70 if (num_active == 0) { 62 if (details->num_active == 0) { 63 double norm, scale, background; 71 64 #ifdef INVALID 72 65 if (INVALID(local_values)) { return; } 73 66 #endif 67 74 68 norm = CALL_VOLUME(local_values); 75 76 double scale, background;77 69 scale = values[0]; 78 70 background = values[1]; … … 90 82 #if MAX_PD > 0 91 83 84 const double *pd_value = values+2+NPARS; 85 const double *pd_weight = pd_value+details->pd_sum; 86 92 87 // need product of weights at every Iq calc, so keep product of 93 88 // weights from the outer loops so that weight = partial_weight * fast_weight 89 double pd_norm; 94 90 double partial_weight; // product of weight w4*w3*w2 but not w1 95 91 double spherical_correction; // cosine correction for latitude variation 96 92 double weight; // product of partial_weight*w1*spherical_correction 97 93 98 // Location in the polydispersity hypercube, one index per dimension.99 int pd_index[MAX_PD];100 101 // Location of the coordinated parameters in their own sub-cubes.102 int offset[NPARS];103 104 // Number of coordinated indices105 const int num_coord = details->num_coord;106 107 94 // Number of elements in the longest polydispersity loop 108 const int fast_length = details->pd_length[0]; 95 const int p0_par = details->pd_par[0]; 96 const int p0_length = details->pd_length[0]; 97 const int p0_offset = details->pd_offset[0]; 98 const int p0_is_theta = (p0_par == details->theta_par); 99 int p0_index; 109 100 110 101 // Trigger the reset behaviour that happens at the end the fast loop 111 102 // by setting the initial index >= weight vector length. 112 p d_index[0] = fast_length;103 p0_index = p0_length; 113 104 114 105 // Default the spherical correction to 1.0 in case it is not otherwise set … … 119 110 // calls. This means initializing them to 0 at the start and accumulating 120 111 // them between calls. 121 norm = pd_start == 0 ? 0.0 : result[nq]; 112 pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 113 122 114 if (pd_start == 0) { 123 115 #ifdef USE_OPENMP … … 132 124 for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 133 125 // check if fast loop needs to be reset 134 if (pd_index[0] == fast_length) { 135 //printf("should be here with %d active\n", num_active); 126 if (p0_index == p0_length) { 136 127 137 // Compute position in polydispersity hypercube 138 for (int k=0; k < num_active; k++) { 139 pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 140 //printf("pd_index[%d] = %d\n",k,pd_index[k]); 141 } 142 143 // Compute partial weights 128 // Compute position in polydispersity hypercube and partial weight 144 129 partial_weight = 1.0; 145 //printf("partial weight %d: ", loop_index); 146 for (int k=1; k < num_active; k++) { 147 double wi = weights[details->pd_offset[k] + pd_index[k]]; 148 //printf("pd[%d]=par[%d]=%g ", k, details->pd_par[k], wi); 149 partial_weight *= wi; 150 } 151 //printf("\n"); 152 153 // Update parameter offsets in weight vector 154 //printf("slow %d: ", loop_index); 155 for (int k=0; k < num_coord; k++) { 156 int par = details->par_coord[k]; 157 int coord = details->pd_coord[k]; 158 int this_offset = details->par_offset[par]; 159 int block_size = 1; 160 for (int bit=0; coord != 0; bit++) { 161 if (coord&1) { 162 this_offset += block_size * pd_index[bit]; 163 block_size *= details->pd_length[bit]; 164 } 165 coord >>= 1; 166 } 167 offset[par] = this_offset; 168 pvec[par] = values[this_offset]; 169 //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]); 170 // if theta is not coordinated with fast index, precompute spherical correction 171 if (par == details->theta_par && !(details->par_coord[k]&1)) { 172 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 130 for (int k=1; k < details->num_active; k++) { 131 int pk = details->pd_par[k]; 132 int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k]; 133 pvec[pk] = pd_value[index]; 134 partial_weight *= pd_weight[index]; 135 if (pk == details->theta_par) { 136 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6); 173 137 } 174 138 } 175 //printf("\n");139 p0_index = loop_index%p0_length; 176 140 } 177 141 178 // Update fast parameters 179 //printf("fast %d: ", loop_index); 180 for (int k=0; k < num_coord; k++) { 181 if (details->pd_coord[k]&1) { 182 const int par = details->par_coord[k]; 183 pvec[par] = values[offset[par]++]; 184 //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]); 185 // if theta is coordinated with fast index, compute spherical correction each time 186 if (par == details->theta_par) { 187 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 188 } 189 } 142 // Update parameter p0 143 weight = partial_weight*pd_weight[p0_offset + p0_index]; 144 pvec[p0_par] = pd_value[p0_offset + p0_index]; 145 if (p0_is_theta) { 146 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6); 190 147 } 191 //printf("\n"); 192 193 // Increment fast index 194 const double wi = weights[details->pd_offset[0] + pd_index[0]]; 195 weight = partial_weight*wi; 196 pd_index[0]++; 148 p0_index++; 197 149 198 150 #ifdef INVALID … … 206 158 // where it becomes zero. If the entirety of the correction 207 159 weight *= spherical_correction; 208 norm += weight * CALL_VOLUME(local_values);160 pd_norm += weight * CALL_VOLUME(local_values); 209 161 210 162 #ifdef USE_OPENMP … … 218 170 } 219 171 220 if (pd_stop >= details-> total_pd) {172 if (pd_stop >= details->pd_prod) { 221 173 // End of the PD loop we can normalize 222 174 double scale, background; … … 227 179 #endif 228 180 for (int q_index=0; q_index < nq; q_index++) { 229 result[q_index] = ( norm>0. ? scale*result[q_index]/norm + background : background);181 result[q_index] = (pd_norm>0. ? scale*result[q_index]/pd_norm + background : background); 230 182 } 231 183 } 232 184 233 185 // Remember the updated norm. 234 result[nq] = norm;186 result[nq] = pd_norm; 235 187 #endif // MAX_PD > 0 236 188 } -
sasmodels/kernel_iq.cl
rae2b6b5 ra738209 22 22 int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level 23 23 #endif // MAX_PD > 0 24 int32_t par_offset[NPARS]; // offset of par value blocks in the value & weight vector 25 int32_t par_coord[NPARS]; // ids of the coordination parameters 26 int32_t pd_coord[NPARS]; // polydispersity coordination bitvector 24 int32_t pd_prod; // total number of voxels in hypercube 25 int32_t pd_sum; // total length of the weights vector 27 26 int32_t num_active; // number of non-trivial pd loops 28 int32_t total_pd; // total number of voxels in hypercube29 int32_t num_coord; // number of coordinated parameters30 27 int32_t theta_par; // id of spherical correction variable 31 28 } ProblemDetails; … … 43 40 const int32_t pd_stop, // where we are stopping in the polydispersity loop 44 41 global const ProblemDetails *details, 45 global const double *weights,46 42 global const double *values, 47 43 global const double *q, // nq q values, with padding to boundary 48 global double *result, // nq+ 3return values, again with padding44 global double *result, // nq+1 return values, again with padding 49 45 const double cutoff // cutoff in the polydispersity weight product 50 46 ) 51 47 { 52 48 // Storage for the current parameter values. These will be updated as we 53 // walk the polydispersity cube. 54 ParameterBlock local_values; // current parameter values 55 double *pvec = (double *)(&local_values); // Alias named parameters with a vector 56 double norm; 49 // walk the polydispersity cube. local_values will be aliased to pvec. 50 local ParameterBlock local_values; 57 51 58 52 // who we are and what element we are working with 59 53 const int q_index = get_global_id(0); 60 61 // number of active loops 62 const int num_active = details->num_active; 54 const int thread = get_local_id(0); 63 55 64 56 // Fill in the initial variables 65 for (int k=0; k < NPARS; k++) { 66 pvec[k] = values[details->par_offset[k]]; 67 } 57 event_t e = async_work_group_copy((local double *)&local_values, values+2, NPARS, 0); 58 wait_group_events(1, &e); 68 59 69 60 // Monodisperse computation 70 if (num_active == 0) { 61 if (details->num_active == 0) { 62 double norm, scale, background; 63 // TODO: only needs to be done by one process... 71 64 #ifdef INVALID 72 65 if (INVALID(local_values)) { return; } 73 66 #endif 67 74 68 norm = CALL_VOLUME(local_values); 75 76 double scale, background;77 69 scale = values[0]; 78 70 background = values[1]; … … 92 84 // norm will be shared across all threads. 93 85 86 // "values" is global and can't be assigned to a local, so even though only 87 // the alias is only needed for thread 0 it is allocated in all threads. 88 global const double *pd_value = values+2+NPARS; 89 global const double *pd_weight = pd_value+details->pd_sum; 90 94 91 // need product of weights at every Iq calc, so keep product of 95 92 // weights from the outer loops so that weight = partial_weight * fast_weight 96 double partial_weight; // product of weight w4*w3*w2 but not w1 97 double spherical_correction; // cosine correction for latitude variation 98 double weight; // product of partial_weight*w1*spherical_correction 99 100 // Location in the polydispersity hypercube, one index per dimension. 101 int pd_index[MAX_PD]; 102 103 // Location of the coordinated parameters in their own sub-cubes. 104 int offset[NPARS]; 105 106 // Number of coordinated indices 107 const int num_coord = details->num_coord; 93 local double pd_norm; 94 local double partial_weight; // product of weight w4*w3*w2 but not w1 95 local double spherical_correction; // cosine correction for latitude variation 96 local double weight; // product of partial_weight*w1*spherical_correction 97 local double *pvec; 98 local int p0_par; 99 local int p0_length; 100 local int p0_offset; 101 local int p0_is_theta; 102 local int p0_index; 108 103 109 104 // Number of elements in the longest polydispersity loop 110 const int fast_length = details->pd_length[0]; 111 112 // Trigger the reset behaviour that happens at the end the fast loop 113 // by setting the initial index >= weight vector length. 114 pd_index[0] = fast_length; 115 116 // Default the spherical correction to 1.0 in case it is not otherwise set 117 spherical_correction = 1.0; 118 119 // Since we are no longer looping over the entire polydispersity hypercube 120 // for each q, we need to track the result and normalization values between 121 // calls. This means initializing them to 0 at the start and accumulating 122 // them between calls. 123 norm = pd_start == 0 ? 0.0 : result[nq]; 105 barrier(CLK_LOCAL_MEM_FENCE); 106 if (thread == 0) { 107 pvec = (local double *)(&local_values); 108 109 // Number of elements in the longest polydispersity loop 110 p0_par = details->pd_par[0]; 111 p0_length = details->pd_length[0]; 112 p0_offset = details->pd_offset[0]; 113 p0_is_theta = (p0_par == details->theta_par); 114 115 // Trigger the reset behaviour that happens at the end the fast loop 116 // by setting the initial index >= weight vector length. 117 p0_index = p0_length; 118 119 // Default the spherical correction to 1.0 in case it is not otherwise set 120 spherical_correction = 1.0; 121 122 // Since we are no longer looping over the entire polydispersity hypercube 123 // for each q, we need to track the result and normalization values between 124 // calls. This means initializing them to 0 at the start and accumulating 125 // them between calls. 126 pd_norm = pd_start == 0 ? 0.0 : result[nq]; 127 } 128 barrier(CLK_LOCAL_MEM_FENCE); 129 124 130 if (q_index < nq) { 125 131 this_result = pd_start == 0 ? 0.0 : result[q_index]; … … 128 134 // Loop over the weights then loop over q, accumulating values 129 135 for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 130 // check if fast loop needs to be reset 131 if (pd_index[0] == fast_length) { 132 //printf("should be here with %d active\n", num_active); 133 134 // Compute position in polydispersity hypercube 135 for (int k=0; k < num_active; k++) { 136 pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 137 //printf("pd_index[%d] = %d\n",k,pd_index[k]); 136 barrier(CLK_LOCAL_MEM_FENCE); 137 if (thread == 0) { 138 // check if fast loop needs to be reset 139 if (p0_index == p0_length) { 140 //printf("should be here with %d active\n", num_active); 141 142 // Compute position in polydispersity hypercube and partial weight 143 partial_weight = 1.0; 144 for (int k=1; k < details->num_active; k++) { 145 int pk = details->pd_par[k]; 146 int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k]; 147 pvec[pk] = pd_value[index]; 148 partial_weight *= pd_weight[index]; 149 //printf("index[%d] = %d\n",k,index); 150 if (pk == details->theta_par) { 151 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6); 152 } 153 } 154 p0_index = loop_index%p0_length; 155 //printf("\n"); 138 156 } 139 157 140 // need to compute the product of the weights. If the vector were really 141 // long, we could split the work into groups, with each thread taking 142 // every nth weight, but there really is no call for it here. We could 143 // also do some clever pair-wise multiplication similar to parallel 144 // prefix, but again simpler is probably faster since n is likely small. 145 // Compute partial weights 146 partial_weight = 1.0; 147 //printf("partial weight %d: ", loop_index); 148 for (int k=1; k < num_active; k++) { 149 double wi = weights[details->pd_offset[k] + pd_index[k]]; 150 //printf("pd[%d]=par[%d]=%g ", k, details->pd_par[k], wi); 151 partial_weight *= wi; 158 // Update parameter p0 159 weight = partial_weight*pd_weight[p0_offset + p0_index]; 160 pvec[p0_par] = pd_value[p0_offset + p0_index]; 161 if (p0_is_theta) { 162 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6); 152 163 } 153 //printf("\n"); 154 155 // Update parameter offsets in weight vector 156 //printf("slow %d: ", loop_index); 157 for (int k=0; k < num_coord; k++) { 158 int par = details->par_coord[k]; 159 int coord = details->pd_coord[k]; 160 int this_offset = details->par_offset[par]; 161 int block_size = 1; 162 for (int bit=0; coord != 0; bit++) { 163 if (coord&1) { 164 this_offset += block_size * pd_index[bit]; 165 block_size *= details->pd_length[bit]; 166 } 167 coord >>= 1; 168 } 169 offset[par] = this_offset; 170 pvec[par] = values[this_offset]; 171 //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]); 172 // if theta is not coordinated with fast index, precompute spherical correction 173 if (par == details->theta_par && !(details->par_coord[k]&1)) { 174 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 175 } 176 } 177 //printf("\n"); 178 } 179 180 // Update fast parameters 181 //printf("fast %d: ", loop_index); 182 for (int k=0; k < num_coord; k++) { 183 if (details->pd_coord[k]&1) { 184 const int par = details->par_coord[k]; 185 pvec[par] = values[offset[par]++]; 186 //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]); 187 // if theta is coordinated with fast index, compute spherical correction each time 188 if (par == details->theta_par) { 189 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 190 } 191 } 192 } 164 p0_index++; 165 } 166 barrier(CLK_LOCAL_MEM_FENCE); 193 167 //printf("\n"); 194 168 195 169 // Increment fast index 196 const double wi = weights[details->pd_offset[0] + pd_index[0]];197 weight = partial_weight*wi;198 pd_index[0]++;199 170 200 171 #ifdef INVALID … … 208 179 // where it becomes zero. If the entirety of the correction 209 180 weight *= spherical_correction; 210 norm += weight * CALL_VOLUME(local_values);181 pd_norm += weight * CALL_VOLUME(local_values); 211 182 212 183 const double scattering = CALL_IQ(q, q_index, local_values); … … 216 187 217 188 if (q_index < nq) { 218 if (pd_stop >= details-> total_pd) {189 if (pd_stop >= details->pd_prod) { 219 190 // End of the PD loop we can normalize 220 191 double scale, background; 221 192 scale = values[0]; 222 193 background = values[1]; 223 result[q_index] = ( norm>0. ? scale*this_result/norm + background : background);194 result[q_index] = (pd_norm>0. ? scale*this_result/pd_norm + background : background); 224 195 } else { 225 196 // Partial result, so remember it but don't normalize it. … … 228 199 229 200 // Remember the updated norm. 230 if (q_index == 0) result[nq] = norm;201 if (q_index == 0) result[nq] = pd_norm; 231 202 } 232 203 -
sasmodels/kernelcl.py
r56b2687 ra738209 521 521 else np.float32) # will never get here, so use np.float32 522 522 523 def __call__(self, call_details, weights,values, cutoff):523 def __call__(self, call_details, values, cutoff): 524 524 # type: (CallDetails, np.ndarray, np.ndarray, float) -> np.ndarray 525 525 context = self.queue.context … … 527 527 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 528 528 hostbuf=call_details.buffer) 529 weights_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR,530 hostbuf=weights) if len(weights) else None531 529 values_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 532 530 hostbuf=values) … … 534 532 # Call kernel and retrieve results 535 533 step = 100 536 for start in range(0, call_details. total_pd, step):537 stop = min(start+step, call_details. total_pd)534 for start in range(0, call_details.pd_prod, step): 535 stop = min(start+step, call_details.pd_prod) 538 536 args = [ 539 537 np.uint32(self.q_input.nq), np.int32(start), np.int32(stop), 540 details_b, weights_b,values_b, self.q_input.q_b, self.result_b,538 details_b, values_b, self.q_input.q_b, self.result_b, 541 539 self.real(cutoff), 542 540 ] … … 545 543 546 544 # Free buffers 547 for v in (details_b, weights_b,values_b):545 for v in (details_b, values_b): 548 546 if v is not None: v.release() 549 547 -
sasmodels/kerneldll.py
r56b2687 ra738209 165 165 exist yet if it hasn't been compiled. 166 166 """ 167 return os.path.join(DLL_PATH, dll_name(model_info, dtype) +".so")167 return os.path.join(DLL_PATH, dll_name(model_info, dtype)) 168 168 169 169 … … 206 206 need_recompile = dll_time < newest_source 207 207 if need_recompile: 208 basename = dll_name(model_info, dtype)+ "_"209 f id, filename = tempfile.mkstemp(suffix=".c", prefix=basename)208 basename = os.path.splitext(os.path.basename(dll))[0] + "_" 209 fd, filename = tempfile.mkstemp(suffix=".c", prefix=basename) 210 210 source = generate.convert_type(source, dtype) 211 fd, filename = tempfile.mkstemp(suffix=".c", prefix=tempfile_prefix)212 211 with os.fdopen(fd, "w") as file: 213 212 file.write(source) … … 269 268 else c_longdouble) 270 269 271 # int, int, int, int*, double*, double*, double*, double*, double *, double272 argtypes = [c_int32]*3 + [c_void_p]* 5+ [fp]270 # int, int, int, int*, double*, double*, double*, double*, double 271 argtypes = [c_int32]*3 + [c_void_p]*4 + [fp] 273 272 self._Iq = self._dll[generate.kernel_name(self.info, is_2d=False)] 274 273 self._Iqxy = self._dll[generate.kernel_name(self.info, is_2d=True)] … … 342 341 else np.float128) 343 342 344 def __call__(self, call_details, weights,values, cutoff):343 def __call__(self, call_details, values, cutoff): 345 344 # type: (CallDetails, np.ndarray, np.ndarray, float) -> np.ndarray 346 345 347 346 #print("in kerneldll") 348 #print("weights", weights)349 347 #print("values", values) 350 start, stop = 0, call_details. total_pd348 start, stop = 0, call_details.pd_prod 351 349 args = [ 352 350 self.q_input.nq, # nq … … 354 352 stop, # pd_stop pd_stride[MAX_PD] 355 353 call_details.buffer.ctypes.data, # problem 356 weights.ctypes.data, # weights357 354 values.ctypes.data, #pars 358 355 self.q_input.q.ctypes.data, #q -
sasmodels/kernelpy.py
r7ae2b7f ra738209 7 7 :class:`kernelcl.GpuModel` and :class:`kerneldll.DllModel`. 8 8 """ 9 from __future__ import division, print_function 10 9 11 import numpy as np # type: ignore 10 12 from numpy import pi, cos #type: ignore … … 153 155 else (lambda: 1.0)) 154 156 155 def __call__(self, call_details, weights,values, cutoff):157 def __call__(self, call_details, values, cutoff): 156 158 assert isinstance(call_details, details.CallDetails) 157 159 res = _loops(self._parameter_vector, self._form, self._volume, 158 self.q_input.nq, call_details, weights,values, cutoff)160 self.q_input.nq, call_details, values, cutoff) 159 161 return res 160 162 … … 166 168 self.q_input = None 167 169 168 def _loops(parameters, form, form_volume, nq, call_details,169 weights,values, cutoff):170 def _loops(parameters, form, form_volume, nq, details, 171 values, cutoff): 170 172 # type: (np.ndarray, Callable[[], np.ndarray], Callable[[], float], int, details.CallDetails, np.ndarray, np.ndarray, float) -> None 171 173 ################################################################ … … 178 180 # # 179 181 ################################################################ 180 parameters[:] = values[call_details.par_offset] 182 NPARS = len(parameters) 183 parameters[:] = values[2:NPARS+2] 181 184 scale, background = values[0], values[1] 182 if call_details.num_active == 0:185 if details.num_active == 0: 183 186 norm = float(form_volume()) 184 187 if norm > 0.0: … … 187 190 return np.ones(nq, 'd')*background 188 191 192 pd_value = values[2+NPARS:2+NPARS+details.pd_sum] 193 pd_weight = values[2+NPARS+details.pd_sum:] 194 195 pd_norm = 0.0 196 spherical_correction = 1.0 189 197 partial_weight = np.NaN 190 spherical_correction = 1.0 191 pd_stride = call_details.pd_stride[:call_details.num_active] 192 pd_length = call_details.pd_length[:call_details.num_active] 193 pd_offset = call_details.pd_offset[:call_details.num_active] 194 pd_index = np.empty_like(pd_offset) 195 offset = np.empty_like(call_details.par_offset) 196 theta = call_details.theta_par 197 fast_length = pd_length[0] 198 pd_index[0] = fast_length 198 weight =np.NaN 199 200 p0_par = details.pd_par[0] 201 p0_is_theta = (p0_par == details.theta_par) 202 p0_length = details.pd_length[0] 203 p0_index = p0_length 204 p0_offset = details.pd_offset[0] 205 206 pd_par = details.pd_par[:details.num_active] 207 pd_offset = details.pd_offset[:details.num_active] 208 pd_stride = details.pd_stride[:details.num_active] 209 pd_length = details.pd_length[:details.num_active] 210 199 211 total = np.zeros(nq, 'd') 200 norm = 0.0 201 for loop_index in range(call_details.total_pd): 212 for loop_index in range(details.pd_prod): 202 213 # update polydispersity parameter values 203 if pd_index[0] == fast_length: 204 pd_index[:] = (loop_index/pd_stride)%pd_length 205 partial_weight = np.prod(weights[pd_offset+pd_index][1:]) 206 for k in range(call_details.num_coord): 207 par = call_details.par_coord[k] 208 coord = call_details.pd_coord[k] 209 this_offset = call_details.par_offset[par] 210 block_size = 1 211 for bit in range(len(pd_offset)): 212 if coord&1: 213 this_offset += block_size * pd_index[bit] 214 block_size *= pd_length[bit] 215 coord >>= 1 216 if coord == 0: break 217 offset[par] = this_offset 218 parameters[par] = values[this_offset] 219 if par == theta and not (call_details.par_coord[k]&1): 220 spherical_correction = max(abs(cos(pi/180 * parameters[theta])), 1e-6) 221 for k in range(call_details.num_coord): 222 if call_details.pd_coord[k]&1: 223 #par = call_details.par_coord[k] 224 parameters[par] = values[offset[par]] 225 #print "par",par,offset[par],parameters[par+2] 226 offset[par] += 1 227 if par == theta: 228 spherical_correction = max(abs(cos(pi/180 * parameters[theta])), 1e-6) 229 230 weight = partial_weight * weights[pd_offset[0] + pd_index[0]] 231 pd_index[0] += 1 214 if p0_index == p0_length: 215 pd_index = (loop_index//pd_stride)%pd_length 216 parameters[pd_par] = pd_value[pd_offset+pd_index] 217 partial_weight = np.prod(pd_weight[pd_offset+pd_index][1:]) 218 if details.theta_par >= 0: 219 spherical_correction = max(abs(cos(pi/180 * parameters[details.theta_par])), 1e-6) 220 p0_index = loop_index%p0_length 221 222 weight = partial_weight * pd_weight[p0_offset + p0_index] 223 parameters[p0_par] = pd_value[p0_offset + p0_index] 224 if p0_is_theta: 225 spherical_correction = max(abs(cos(pi/180 * parameters[p0_par])), 1e-6) 226 p0_index += 1 232 227 if weight > cutoff: 233 228 # Call the scattering function … … 241 236 weight *= spherical_correction 242 237 total += weight * I 243 norm += weight * form_volume()244 245 if norm > 0.0:246 return (scale/ norm)*total + background238 pd_norm += weight * form_volume() 239 240 if pd_norm > 0.0: 241 return (scale/pd_norm)*total + background 247 242 else: 248 243 return np.ones(nq, 'd')*background -
sasmodels/sasview_model.py
r56b2687 ra738209 513 513 else: 514 514 q_vectors = [np.asarray(qx)] 515 kernel= self._model.make_kernel(q_vectors)515 calculator = self._model.make_kernel(q_vectors) 516 516 pairs = [self._get_weights(p) 517 517 for p in self._model_info.parameters.call_parameters] 518 call_details, weight, value = kernel.build_details(kernel, pairs)519 result = kernel(call_details, weight, value, cutoff=self.cutoff)520 kernel.release()518 call_details, value = kernel.build_details(calculator, pairs) 519 result = calculator(call_details, value, cutoff=self.cutoff) 520 calculator.release() 521 521 return result 522 522
Note: See TracChangeset
for help on using the changeset viewer.