Changeset ee72c70 in sasmodels
- Timestamp:
- Apr 18, 2016 7:43:26 PM (9 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 51973e1
- Parents:
- ae2b6b5 (diff), 1bf66d9 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - Location:
- sasmodels
- Files:
-
- 1 added
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/model_test.py
r0ff62d4 r38a9b07 259 259 self.assertTrue(np.isfinite(actual_yi), 260 260 'invalid f(%s): %s' % (xi, actual_yi)) 261 elif np.isnan(yi): 262 self.assertTrue(np.isnan(actual_yi), 263 'f(%s): expected:%s; actual:%s' 264 % (xi, yi, actual_yi)) 261 265 else: 262 self.assertTrue(is_near(yi, actual_yi, 5), 266 # is_near does not work for infinite values, so also test 267 # for exact values. Note that this will not 268 self.assertTrue(yi==actual_yi or is_near(yi, actual_yi, 5), 263 269 'f(%s); expected:%s; actual:%s' 264 270 % (xi, yi, actual_yi)) -
sasmodels/models/__init__.py
r32c160a r1ca1fd9 1 """ 2 1D Modeling for SAS 3 """ 4 #from sas.models import * 5 6 import os 7 from distutils.filelist import findall 8 9 __version__ = "2.1.0" 10 11 def get_data_path(media): 12 """ 13 """ 14 # Check for data path in the package 15 path = os.path.join(os.path.dirname(__file__), media) 16 if os.path.isdir(path): 17 return path 18 19 # Check for data path next to exe/zip file. 20 # If we are inside a py2exe zip file, we need to go up 21 # to get to the directory containing 22 # the media for this module 23 path = os.path.dirname(__file__) 24 #Look for maximum n_dir up of the current dir to find media 25 n_dir = 12 26 for i in range(n_dir): 27 path, _ = os.path.split(path) 28 media_path = os.path.join(path, media) 29 if os.path.isdir(media_path): 30 module_media_path = os.path.join(media_path, 'models_media') 31 if os.path.isdir(module_media_path): 32 return module_media_path 33 return media_path 34 35 raise RuntimeError('Could not find models media files') 36 37 def data_files(): 38 """ 39 Return the data files associated with media. 40 41 The format is a list of (directory, [files...]) pairs which can be 42 used directly in setup(...,data_files=...) for setup.py. 43 44 """ 45 data_files = [] 46 path_img = get_data_path(media=os.path.join("sasmodels","sasmodels","models","img")) 47 #path_img = get_data_path(media="img") 48 im_list = findall(path_img) 49 #for f in findall(path): 50 # if os.path.isfile(f) and f not in im_list: 51 # data_files.append(('media/models_media', [f])) 52 53 for f in im_list: 54 data_files.append(('media/models_media/img', [f])) 55 return data_files 56 -
sasmodels/models/porod.py
rec45c4f r82923a6 26 26 """ 27 27 28 from numpy import sqrt, power 28 from numpy import sqrt, power, inf, errstate 29 29 30 30 name = "porod" … … 42 42 @param q: Input q-value 43 43 """ 44 return 1.0/power(q, 4) 44 with errstate(divide='ignore'): 45 return power(q, -4) 45 46 46 47 Iq.vectorized = True # Iq accepts an array of q values … … 58 59 demo = dict(scale=1.5, background=0.5) 59 60 60 tests = [[{'scale': 0.00001, 'background':0.01}, 0.04, 3.916250]] 61 tests = [ 62 [{'scale': 0.00001, 'background':0.01}, 0.04, 3.916250], 63 [{}, 0.0, inf], 64 ] -
sasmodels/models/spherical_sld.c
r299dcce r1bf66d9 1 //Headers 2 static double form_volume(double thick_inter[], 3 double thick_flat_[], 4 double core_radius, 5 int n_shells); 6 7 double Iq(double q, 1 static double form_volume( 8 2 int n_shells, 9 double thick_inter[], 10 double func_inter[], 11 double sld_core, 12 double sld_solvent, 13 double sld_flat[], 3 double radius_core, 14 4 double thick_flat[], 15 double nu_inter[], 16 int npts_inter, 17 double core_radius); 18 19 double Iqxy(double qx, double qy, 20 int n_shells, 21 double thick_inter[], 22 double func_inter[], 23 double sld_core, 24 double sld_solvent, 25 double sld_flat[], 26 double thick_flat[], 27 double nu_inter[], 28 int npts_inter, 29 double core_radius); 30 31 //Main code 32 static double form_volume(double thick_inter[], 33 double thick_flat_[], 34 double core_radius, 35 int n) 5 double thick_inter[]) 36 6 { 37 double radius = 0.0;38 7 int i; 39 double r = core_radius;40 for (i=0; i < n ; i++) {8 double r = radius_core; 9 for (i=0; i < n_shells; i++) { 41 10 r += thick_inter[i]; 42 11 r += thick_flat[i]; … … 56 25 double background = dp[6]; 57 26 double npts = dp[57]; //number of sub_layers in each interface 58 double nsl=npts;//21.0; //nsl = Num_sub_layer: MUST ODD number in double //no other number works now27 double nsl=npts;//21.0; //nsl = Num_sub_layer: must be ODD double number 59 28 int n_s; 60 29 … … 63 32 double pi; 64 33 65 //int* fun_type;66 //double* sld;67 //double* thick_inter;68 //double* thick;69 //double* fun_coef;70 71 34 double total_thick=0.0; 72 35 73 //fun_type = (int*)malloc((n+2)*sizeof(int));74 //sld = (double*)malloc((n+2)*sizeof(double));75 //thick_inter = (double*)malloc((n+2)*sizeof(double));76 //thick = (double*)malloc((n+2)*sizeof(double));77 //fun_coef = (double*)malloc((n+2)*sizeof(double));78 79 //TODO: Solution to avoid mallocs but probablyu can be done better80 36 int fun_type[12]; 81 37 double sld[12]; … … 175 131 if (fabs(slope) > 0.0 ){ 176 132 //fun = sign * 3.0 * r * (2.0*qr*sin(qr)-((qr*qr)-2.0)*cos(qr))/(qr * qr * qr * qr); 177 fun = sign * r * sph_j1c(qr) + sign * 3.0 * sin(qr)/(qr * qr * q ) + sign * 6.0 * cos(qr)/(qr * qr * qr * q); 133 fun = sign * r * sph_j1c(qr) + sign * 3.0 * sin(qr)/(qr * qr * q ) 134 + sign * 6.0 * cos(qr)/(qr * qr * qr * q); 178 135 } 179 136 } … … 203 160 } 204 161 } 205 //vol += vol_sub;206 162 f2 = f * f / vol; 207 //f2 *= scale;208 //f2 += background;209 //free(fun_type);210 //free(sld);211 //free(thick_inter);212 //free(thick);213 //free(fun_coef);214 163 215 164 return (f2); … … 222 171 * @return: function value 223 172 */ 224 double Iq(double q,173 static double Iq(double q, 225 174 int n_shells, 226 double thick_inter[],227 double func_inter[],175 int npts_inter, 176 double radius_core, 228 177 double sld_core, 229 178 double sld_solvent, 230 179 double sld_flat[], 231 180 double thick_flat[], 232 double nu_inter[], 233 int npts_inter, 234 double core_radius 235 ) { 181 double func_inter[], 182 double thick_inter[], 183 double nu_inter[] ) { 236 184 237 185 //printf("Number of points %d\n",npts_inter); 238 186 double intensity; 239 //TODO: Remove this container at later stage. It is only kept to minimize stupid errors now187 //TODO: Remove this container at later stage. 240 188 double dp[60]; 241 189 dp[0] = n_shells; 242 190 //This is scale will also have to be removed at some stage 243 191 dp[1] = 1.0; 244 dp[2] = thick_inter _0;245 dp[3] = func_inter _0;192 dp[2] = thick_inter[0]; 193 dp[3] = func_inter[0]; 246 194 dp[4] = sld_core; 247 195 dp[5] = sld_solvent; 248 196 dp[6] = 0.0; 249 250 for (i=0; i<n; i++){ 197 dp[7] = sld_flat[0]; 198 //TODO: Something is messed up with this data strcucture! 199 dp[17] = thick_inter[0]; 200 dp[27] = thick_flat[0]; 201 dp[37] = func_inter[0]; 202 dp[47] = nu_inter[0]; 203 204 for (int i=1; i<=n_shells; i++){ 251 205 dp[i+7] = sld_flat[i]; 252 206 dp[i+17] = thick_inter[i]; … … 257 211 258 212 dp[57] = npts_inter; 259 dp[58] = nu_inter _0;260 dp[59] = rad _core_0;213 dp[58] = nu_inter[0]; 214 dp[59] = radius_core; 261 215 262 216 intensity = 1.0e-4*sphere_sld_kernel(dp,q); … … 271 225 * @return: function value 272 226 */ 273 double Iqxy(double qx, double qy, 227 228 /*static double Iqxy(double qx, double qy, 274 229 int n_shells, 275 double thick_inter[],276 double func_inter[],230 int npts_inter, 231 double radius_core 277 232 double sld_core, 278 233 double sld_solvent, 279 234 double sld_flat[], 280 235 double thick_flat[], 236 double func_inter[], 237 double thick_inter[], 281 238 double nu_inter[], 282 int npts_inter,283 double core_radius284 239 ) { 285 240 286 241 double q = sqrt(qx*qx + qy*qy); 287 return Iq(q, n_shells, thick_inter_0, func_inter_0, core0_sld, solvent_sld, 288 flat1_sld, flat2_sld, flat3_sld, flat4_sld, flat5_sld, 289 flat6_sld, flat7_sld, flat8_sld, flat9_sld, flat10_sld, 290 thick_inter_1, thick_inter_2, thick_inter_3, thick_inter_4, thick_inter_5, 291 thick_inter_6, thick_inter_7, thick_inter_8, thick_inter_9, thick_inter_10, 292 thick_flat_1, thick_flat_2, thick_flat_3, thick_flat_4, thick_flat_5, 293 thick_flat_6, thick_flat_7, thick_flat_8, thick_flat_9, thick_flat_10, 294 func_inter_1, func_inter_2, func_inter_3, func_inter_4, func_inter_5, 295 func_inter_6, func_inter_7, func_inter_8, func_inter_9, func_inter_10, 296 nu_inter_1, nu_inter_2, nu_inter_3, nu_inter_4, nu_inter_5, 297 nu_inter_6, nu_inter_7, nu_inter_8, nu_inter_9, nu_inter_10, 298 npts_inter, nu_inter_0, rad_core_0); 299 300 //TODO: Check if evalute rphi is not needed? 301 302 } 303 242 return Iq(q, n_shells, npts_inter, radius_core, sld_core, sld_solvent, 243 sld_flat[], thick_flat[], func_inter[], thick_inter[], nu_inter[]) 244 }*/ 245 -
sasmodels/models/spherical_sld.py
rd2bb604 r1bf66d9 170 170 # pylint: disable=bad-whitespace, line-too-long 171 171 # ["name", "units", default, [lower, upper], "type", "description"], 172 parameters = [["n", "", 1, [0, 9], "", "number of shells"], 172 parameters = [["n_shells", "", 1, [0, 9], "", "number of shells"], 173 ["npts_inter", "", 35, [0, 35], "", "number of points in each sublayer Must be odd number"], 173 174 ["radius_core", "Ang", 50.0, [0, inf], "", "intern layer thickness"], 174 175 ["sld_core", "1e-6/Ang^2", 2.07, [-inf, inf], "", "sld function flat"], 175 ["sld_flat[n]", "1e-6/Ang^2", 4.06, [-inf, inf], "", "sld function flat"],176 ["thick_flat[n]", "Ang", 100.0, [0, inf], "", "flat layer_thickness"],177 ["func_inter[n]", "", 0, [0, 4], "", "Erf:0, RPower:1, LPower:2, RExp:3, LExp:4"],178 ["thick_inter[n]", "Ang", 50.0, [0, inf], "", "intern layer thickness"],179 ["inter_nu[n]", "", 2.5, [-inf, inf], "", "steepness parameter"],180 ["npts_inter", "", 35, [0, 35], "", "number of points in each sublayer Must be odd number"],181 176 ["sld_solvent", "1e-6/Ang^2", 1.0, [-inf, inf], "", "sld function solvent"], 177 ["sld_flat[n_shells]", "1e-6/Ang^2", 4.06, [-inf, inf], "", "sld function flat"], 178 ["thick_flat[n_shells]", "Ang", 100.0, [0, inf], "", "flat layer_thickness"], 179 ["func_inter[n_shells]", "", 0, [0, 4], "", "Erf:0, RPower:1, LPower:2, RExp:3, LExp:4"], 180 ["thick_inter[n_shells]", "Ang", 50.0, [0, inf], "", "intern layer thickness"], 181 ["nu_inter[n_shells]", "", 2.5, [-inf, inf], "", "steepness parameter"], 182 182 ] 183 183 # pylint: enable=bad-whitespace, line-too-long 184 #source = ["lib/librefl.c", "lib/sph_j1c.c", "spherical_sld.c"] 185 186 def Iq(q, *args, **kw): 187 return q 188 189 def Iqxy(qx, *args, **kw): 190 return qx 191 192 193 demo = dict( 194 n=4, 195 scale=1.0, 196 solvent_sld=1.0, 197 background=0.0, 198 npts_inter=35.0, 199 ) 184 source = ["lib/librefl.c", "lib/sph_j1c.c", "spherical_sld.c"] 185 186 #def Iq(q, *args, **kw): 187 # return q 188 189 #def Iqxy(qx, *args, **kw): 190 # return qx 191 192 def profile(n_shells, radius_core, sld_core, sld_solvent, sld_flat, 193 thick_flat, func_inter, thick_inter, nu_inter, npts_inter): 194 """ 195 Returns shape profile with x=radius, y=SLD. 196 """ 197 198 z = [] 199 beta = [] 200 z0 = 0 201 # two sld points for core 202 z.append(0) 203 beta.append(sld_core) 204 z.append(radius_core) 205 beta.append(sld_core) 206 z0 += radius_core 207 208 for i in range(1, n_shells+2): 209 dz = thick_inter[i-1]/npts_inter 210 # j=0 for interface, j=1 for flat layer 211 for j in range(0, 2): 212 # interation for sub-layers 213 for n_s in range(0, npts_inter+1): 214 if j == 1: 215 if i == n_shells+1: 216 break 217 # shift half sub thickness for the first point 218 z0 -= dz#/2.0 219 z.append(z0) 220 #z0 -= dz/2.0 221 z0 += thick_flat[i] 222 sld_i = sld_flat[i] 223 beta.append(sld_flat[i]) 224 dz = 0 225 else: 226 nu = nu_inter[i-1] 227 # decide which sld is which, sld_r or sld_l 228 if i == 1: 229 sld_l = sld_core 230 else: 231 sld_l = sld_flat[i-1] 232 if i == n_shells+1: 233 sld_r = sld_solvent 234 else: 235 sld_r = sld_flat[i] 236 # get function type 237 func_idx = func_inter[i-1] 238 # calculate the sld 239 sld_i = intersldfunc(func_idx, npts_inter, n_s, nu, 240 sld_l, sld_r) 241 # append to the list 242 z.append(z0) 243 beta.append(sld_i) 244 z0 += dz 245 if j == 1: 246 break 247 z.append(z0) 248 beta.append(sld_solvent) 249 z_ext = z0/5.0 250 z.append(z0+z_ext) 251 beta.append(sld_solvent) 252 # return sld profile (r, beta) 253 return np.asarray(z), np.asarray(beta)*1e-6 254 255 def ER(core_radius, n, thickness): 256 return np.sum(thickness[:n[0]], axis=0) + core_radius 257 258 def VR(core_radius, n, thickness): 259 return 1.0, 1.0 260 261 262 demo = { 263 "n_shells":4, 264 "npts_inter":35.0, 265 "radius_core":50.0, 266 "sld_core":2.07, 267 "sld_solvent": 1.0, 268 "sld_flat":[4.0,3.5,4.0,3.5,4.0], 269 "thick_flat":[100.0,100.0,100.0,100.0,100.0], 270 "func_inter":[0,0,0,0,0], 271 "thick_inter":[50.0,50.0,50.0,50.0,50.0], 272 "nu_inter":[2.5,2.5,2.5,2.5,2.5] 273 } 200 274 201 275 #TODO: Not working yet -
sasmodels/generate.py
rf2f67a6 rae2b6b5 473 473 dll_code = load_template('kernel_iq.c') 474 474 ocl_code = load_template('kernel_iq.cl') 475 #ocl_code = load_template('kernel_iq_local.cl') 475 476 user_code = [open(f).read() for f in model_sources(model_info)] 476 477 -
sasmodels/kernel_iq.c
rf2f67a6 rae2b6b5 52 52 // Storage for the current parameter values. These will be updated as we 53 53 // walk the polydispersity cube. 54 localParameterBlock local_values; // current parameter values54 ParameterBlock local_values; // current parameter values 55 55 double *pvec = (double *)(&local_values); // Alias named parameters with a vector 56 56 double norm; … … 74 74 norm = CALL_VOLUME(local_values); 75 75 76 const double scale = values[0];77 const double background = values[1];78 // result[nq] = norm; // Total volume normalization76 double scale, background; 77 scale = values[0]; 78 background = values[1]; 79 79 80 80 #ifdef USE_OPENMP 81 81 #pragma omp parallel for 82 82 #endif 83 for (int i=0; i < nq; i++) {84 double scattering = CALL_IQ(q, i, local_values);85 result[ i] = (norm>0. ? scale*scattering/norm + background : background);83 for (int q_index=0; q_index < nq; q_index++) { 84 double scattering = CALL_IQ(q, q_index, local_values); 85 result[q_index] = (norm>0. ? scale*scattering/norm + background : background); 86 86 } 87 87 return; … … 89 89 90 90 #if MAX_PD > 0 91 // If it is the first round initialize the result to zero, otherwise 92 // assume that the previous result has been passed back. 93 // Note: doing this even in the monodisperse case in order to handle the 94 // rare case where the model parameters are invalid and zero is returned. 95 // So slightly increased cost for slightly smaller code size. 91 92 // need product of weights at every Iq calc, so keep product of 93 // weights from the outer loops so that weight = partial_weight * fast_weight 94 double partial_weight; // product of weight w4*w3*w2 but not w1 95 double spherical_correction; // cosine correction for latitude variation 96 double weight; // product of partial_weight*w1*spherical_correction 97 98 // Location in the polydispersity hypercube, one index per dimension. 99 int pd_index[MAX_PD]; 100 101 // Location of the coordinated parameters in their own sub-cubes. 102 int offset[NPARS]; 103 104 // Number of coordinated indices 105 const int num_coord = details->num_coord; 106 107 // Number of elements in the longest polydispersity loop 108 const int fast_length = details->pd_length[0]; 109 110 // Trigger the reset behaviour that happens at the end the fast loop 111 // by setting the initial index >= weight vector length. 112 pd_index[0] = fast_length; 113 114 // Default the spherical correction to 1.0 in case it is not otherwise set 115 spherical_correction = 1.0; 116 117 // Since we are no longer looping over the entire polydispersity hypercube 118 // for each q, we need to track the result and normalization values between 119 // calls. This means initializing them to 0 at the start and accumulating 120 // them between calls. 121 norm = pd_start == 0 ? 0.0 : result[nq]; 96 122 if (pd_start == 0) { 97 123 #ifdef USE_OPENMP 98 124 #pragma omp parallel for 99 125 #endif 100 for (int i=0; i < nq+1; i++) { 101 result[i] = 0.0; 102 } 103 norm = 0.0; 104 } else { 105 norm = result[nq]; 106 } 107 108 // need product of weights at every Iq calc, so keep product of 109 // weights from the outer loops so that weight = partial_weight * fast_weight 110 double partial_weight = NAN; // product of weight w4*w3*w2 but not w1 111 double spherical_correction = 1.0; // cosine correction for latitude variation 112 113 // Location in the polydispersity hypercube, one index per dimension. 114 local int pd_index[MAX_PD]; 115 116 // Location of the coordinated parameters in their own sub-cubes. 117 local int offset[NPARS]; 118 119 // Trigger the reset behaviour that happens at the end the fast loop 120 // by setting the initial index >= weight vector length. 121 const int fast_length = details->pd_length[0]; 122 pd_index[0] = fast_length; 123 124 // Number of coordinated indices 125 const int num_coord = details->num_coord; 126 for (int q_index=0; q_index < nq; q_index++) { 127 result[q_index] = 0.0; 128 } 129 } 126 130 127 131 // Loop over the weights then loop over q, accumulating values … … 172 176 } 173 177 174 // Increment fast index 175 const double wi = weights[details->pd_offset[0] + pd_index[0]++]; 176 double weight = partial_weight*wi; 178 // Update fast parameters 177 179 //printf("fast %d: ", loop_index); 178 180 for (int k=0; k < num_coord; k++) { … … 189 191 //printf("\n"); 190 192 193 // Increment fast index 194 const double wi = weights[details->pd_offset[0] + pd_index[0]]; 195 weight = partial_weight*wi; 196 pd_index[0]++; 197 191 198 #ifdef INVALID 192 199 if (INVALID(local_values)) continue; … … 204 211 #pragma omp parallel for 205 212 #endif 206 for (int i=0; i < nq; i++) { 207 const double scattering = CALL_IQ(q, i, local_values); 208 result[i] += weight*scattering; 209 } 210 } 211 } 212 213 // End of the PD loop we can normalize 213 for (int q_index=0; q_index < nq; q_index++) { 214 const double scattering = CALL_IQ(q, q_index, local_values); 215 result[q_index] += weight*scattering; 216 } 217 } 218 } 219 214 220 if (pd_stop >= details->total_pd) { 215 const double scale = values[0]; 216 const double background = values[1]; 221 // End of the PD loop we can normalize 222 double scale, background; 223 scale = values[0]; 224 background = values[1]; 217 225 #ifdef USE_OPENMP 218 226 #pragma omp parallel for 219 227 #endif 220 for (int i=0; i < nq; i++) {221 result[ i] = (norm>0. ? scale*result[i]/norm + background : background);228 for (int q_index=0; q_index < nq; q_index++) { 229 result[q_index] = (norm>0. ? scale*result[q_index]/norm + background : background); 222 230 } 223 231 } -
sasmodels/kernel_iq.cl
rf2f67a6 rae2b6b5 50 50 ) 51 51 { 52 double norm;53 54 // who we are and what element we are working with55 const int q_index = get_global_id(0);56 57 // number of active loops58 const int num_active = details->num_active;59 60 52 // Storage for the current parameter values. These will be updated as we 61 53 // walk the polydispersity cube. 62 54 ParameterBlock local_values; // current parameter values 63 55 double *pvec = (double *)(&local_values); // Alias named parameters with a vector 56 double norm; 57 58 // who we are and what element we are working with 59 const int q_index = get_global_id(0); 60 61 // number of active loops 62 const int num_active = details->num_active; 63 64 64 // Fill in the initial variables 65 for (int k =0; k < NPARS; k++) {65 for (int k=0; k < NPARS; k++) { 66 66 pvec[k] = values[details->par_offset[k]]; 67 67 } … … 72 72 if (INVALID(local_values)) { return; } 73 73 #endif 74 norm = CALL_VOLUME(local_values); 74 75 75 76 double scale, background; 76 norm = CALL_VOLUME(local_values);77 77 scale = values[0]; 78 78 background = values[1]; 79 80 // if (i==0) result[nq] = norm; // Total volume normalization81 79 82 80 if (q_index < nq) { … … 89 87 #if MAX_PD > 0 90 88 91 // If it is the first round initialize the result to zero, otherwise92 // assume that the previous result has been passed back.93 // Note: doing this even in the monodisperse case in order to handle the94 // rare case where the model parameters are invalid and zero is returned.95 // So slightly increased cost for slightly smaller code size.96 89 double this_result; 97 90 … … 102 95 // weights from the outer loops so that weight = partial_weight * fast_weight 103 96 double partial_weight; // product of weight w4*w3*w2 but not w1 104 double spherical_correction; // cosine correction for latitude variation 97 double spherical_correction; // cosine correction for latitude variation 98 double weight; // product of partial_weight*w1*spherical_correction 105 99 106 100 // Location in the polydispersity hypercube, one index per dimension. … … 110 104 int offset[NPARS]; 111 105 106 // Number of coordinated indices 107 const int num_coord = details->num_coord; 108 112 109 // Number of elements in the longest polydispersity loop 113 110 const int fast_length = details->pd_length[0]; 114 111 115 // Number of coordinated indices116 const int num_coord = details->num_coord;117 118 // We could in theory spread this work across different threads, but119 // lets keep it simple;120 norm = pd_start == 0 ? 0.0 : result[nq];121 spherical_correction = 1.0; // the usual case.122 // partial_weight = NAN;123 112 // Trigger the reset behaviour that happens at the end the fast loop 124 113 // by setting the initial index >= weight vector length. 125 114 pd_index[0] = fast_length; 115 116 // Default the spherical correction to 1.0 in case it is not otherwise set 117 spherical_correction = 1.0; 126 118 127 119 // Since we are no longer looping over the entire polydispersity hypercube … … 129 121 // calls. This means initializing them to 0 at the start and accumulating 130 122 // them between calls. 123 norm = pd_start == 0 ? 0.0 : result[nq]; 131 124 if (q_index < nq) { 132 125 this_result = pd_start == 0 ? 0.0 : result[q_index]; … … 141 134 // Compute position in polydispersity hypercube 142 135 for (int k=0; k < num_active; k++) { 143 144 136 pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 137 //printf("pd_index[%d] = %d\n",k,pd_index[k]); 145 138 } 146 139 … … 163 156 //printf("slow %d: ", loop_index); 164 157 for (int k=0; k < num_coord; k++) { 165 if (k < num_coord) { 166 int par = details->par_coord[k]; 167 int coord = details->pd_coord[k]; 168 int this_offset = details->par_offset[par]; 169 int block_size = 1; 170 for (int bit=0; coord != 0; bit++) { 171 if (coord&1) { 172 this_offset += block_size * pd_index[bit]; 173 block_size *= details->pd_length[bit]; 174 } 175 coord >>= 1; 158 int par = details->par_coord[k]; 159 int coord = details->pd_coord[k]; 160 int this_offset = details->par_offset[par]; 161 int block_size = 1; 162 for (int bit=0; coord != 0; bit++) { 163 if (coord&1) { 164 this_offset += block_size * pd_index[bit]; 165 block_size *= details->pd_length[bit]; 176 166 } 177 offset[par] = this_offset; 178 pvec[par] = values[this_offset]; 179 //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]); 180 // if theta is not coordinated with fast index, precompute spherical correction 181 if (par == details->theta_par && !(details->par_coord[k]&1)) { 182 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 183 } 167 coord >>= 1; 184 168 } 169 offset[par] = this_offset; 170 pvec[par] = values[this_offset]; 171 //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]); 172 // if theta is not coordinated with fast index, precompute spherical correction 173 if (par == details->theta_par && !(details->par_coord[k]&1)) { 174 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 175 } 185 176 } 186 177 //printf("\n"); 187 178 } 188 179 189 double weight; 180 // Update fast parameters 181 //printf("fast %d: ", loop_index); 182 for (int k=0; k < num_coord; k++) { 183 if (details->pd_coord[k]&1) { 184 const int par = details->par_coord[k]; 185 pvec[par] = values[offset[par]++]; 186 //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]); 187 // if theta is coordinated with fast index, compute spherical correction each time 188 if (par == details->theta_par) { 189 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 190 } 191 } 192 } 193 //printf("\n"); 194 195 // Increment fast index 190 196 const double wi = weights[details->pd_offset[0] + pd_index[0]]; 191 197 weight = partial_weight*wi; 192 198 pd_index[0]++; 193 194 // Increment fast index195 //printf("fast %d: ", loop_index);196 for (int k=0; k < num_coord; k++) {197 if (k < num_coord) {198 if (details->pd_coord[k]&1) {199 const int par = details->par_coord[k];200 pvec[par] = values[offset[par]++];201 //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]);202 // if theta is coordinated with fast index, compute spherical correction each time203 if (par == details->theta_par) {204 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6);205 }206 }207 }208 }209 //printf("\n");210 199 211 200 #ifdef INVALID … … 229 218 if (pd_stop >= details->total_pd) { 230 219 // End of the PD loop we can normalize 231 const double scale = values[0]; 232 const double background = values[1]; 220 double scale, background; 221 scale = values[0]; 222 background = values[1]; 233 223 result[q_index] = (norm>0. ? scale*this_result/norm + background : background); 234 224 } else { … … 236 226 result[q_index] = this_result; 237 227 } 238 // Accumulate norm. 228 229 // Remember the updated norm. 239 230 if (q_index == 0) result[nq] = norm; 240 231 } -
sasmodels/kernelcl.py
rf2f67a6 rae2b6b5 511 511 hostbuf=values) 512 512 513 start, stop = 0, call_details.total_pd 514 args = [ 515 np.uint32(self.q_input.nq), np.int32(start), np.int32(stop), 516 details_b, weights_b, values_b, self.q_input.q_b, self.result_b, 517 self.real(cutoff), 518 ] 519 self.kernel(self.queue, self.q_input.global_size, None, *args) 513 # Call kernel and retrieve results 514 step = 100 515 for start in range(0, call_details.total_pd, step): 516 stop = min(start+step, call_details.total_pd) 517 args = [ 518 np.uint32(self.q_input.nq), np.int32(start), np.int32(stop), 519 details_b, weights_b, values_b, self.q_input.q_b, self.result_b, 520 self.real(cutoff), 521 ] 522 self.kernel(self.queue, self.q_input.global_size, None, *args) 520 523 cl.enqueue_copy(self.queue, self.result, self.result_b) 524 525 # Free buffers 521 526 for v in (details_b, weights_b, values_b): 522 527 if v is not None: v.release()
Note: See TracChangeset
for help on using the changeset viewer.