Changeset f2f67a6 in sasmodels
- Timestamp:
- Apr 15, 2016 7:26:24 PM (9 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- ae2b6b5, 38a9b07, eb97b11
- Parents:
- 0ff62d4
- Files:
-
- 1 added
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
.gitignore
r4a82d4d rf2f67a6 4 4 /*.csv 5 5 *.pyc 6 *.cl7 6 *.so 8 7 *.obj -
sasmodels/compare.py
r8d62008 rf2f67a6 497 497 value = calculator(**pars) 498 498 average_time = toc()*1000./Nevals 499 #print("I(q)",value) 499 500 return value, average_time 500 501 … … 604 605 if Nbase > 0 and Ncomp > 0: 605 606 resid = (base_value - comp_value) 606 relerr = resid/ comp_value607 relerr = resid/np.where(comp_value!=0., abs(comp_value), 1.0) 607 608 _print_stats("|%s-%s|" 608 609 % (base.engine, comp.engine) + (" "*(3+len(comp.engine))), -
sasmodels/core.py
rdd7fc12 rf2f67a6 142 142 or not HAVE_OPENCL 143 143 or not kernelcl.environment().has_type(numpy_dtype)): 144 #print("building dll", numpy_dtype) 144 145 return kerneldll.load_dll(source, model_info, numpy_dtype) 145 146 else: 147 #print("building ocl", numpy_dtype) 146 148 return kernelcl.GpuModel(source, model_info, numpy_dtype, fast=fast) 147 149 -
sasmodels/generate.py
ra5b8477 rf2f67a6 173 173 174 174 try: 175 from typing import Tuple, Sequence, Iterator 175 from typing import Tuple, Sequence, Iterator, Dict 176 176 from .modelinfo import ModelInfo 177 177 except ImportError: … … 298 298 Return a timestamp for the model corresponding to the most recently 299 299 changed file or dependency. 300 301 Note that this does not look at the time stamps for the OpenCL header 302 information since that need not trigger a recompile of the DLL. 300 303 """ 301 304 source_files = (model_sources(model_info) … … 304 307 newest = max(getmtime(f) for f in source_files) 305 308 return newest 309 310 def model_templates(): 311 # type: () -> List[str] 312 # TODO: fails DRY; templates appear two places. 313 # should instead have model_info contain a list of paths 314 # Note: kernel_iq.cl is not on this list because changing it need not 315 # trigger a recompile of the dll. 316 return [joinpath(TEMPLATE_ROOT, filename) 317 for filename in ('kernel_header.c', 'kernel_iq.c')] 306 318 307 319 def convert_type(source, dtype): … … 377 389 return _template_cache[filename][1] 378 390 379 def model_templates():380 # type: () -> List[str]381 # TODO: fails DRY; templates are listed in two places.382 # should instead have model_info contain a list of paths383 return [joinpath(TEMPLATE_ROOT, filename)384 for filename in ('kernel_header.c', 'kernel_iq.c')]385 386 391 387 392 _FN_TEMPLATE = """\ … … 390 395 %(body)s 391 396 } 392 393 397 394 398 """ … … 467 471 # Load templates and user code 468 472 kernel_header = load_template('kernel_header.c') 469 kernel_code = load_template('kernel_iq.c') 473 dll_code = load_template('kernel_iq.c') 474 ocl_code = load_template('kernel_iq.cl') 470 475 user_code = [open(f).read() for f in model_sources(model_info)] 471 476 … … 506 511 if _have_Iqxy(user_code): 507 512 # Call 2D model 508 refs = ["q[2* i]", "q[2*i+1]"] + _call_pars("_v.", partable.iqxy_parameters)513 refs = ["q[2*_i]", "q[2*_i+1]"] + _call_pars("_v.", partable.iqxy_parameters) 509 514 call_iqxy = "#define CALL_IQ(_q,_i,_v) Iqxy(%s)" % (",".join(refs)) 510 515 else: … … 521 526 # TODO: allow mixed python/opencl kernels? 522 527 523 # define the Iq kernel 524 source.append("#define KERNEL_NAME %s_Iq"%model_info.name) 525 source.append(call_iq) 526 source.append(kernel_code) 527 source.append("#undef CALL_IQ") 528 source.append("#undef KERNEL_NAME") 529 530 # define the Iqxy kernel from the same source with different #defines 531 source.append("#define KERNEL_NAME %s_Iqxy"%model_info.name) 532 source.append(call_iqxy) 533 source.append(kernel_code) 534 source.append("#undef CALL_IQ") 535 source.append("#undef KERNEL_NAME") 536 528 source.append("#if defined(USE_OPENCL)") 529 source.extend(_add_kernels(ocl_code, call_iq, call_iqxy, model_info.name)) 530 source.append("#else /* !USE_OPENCL */") 531 source.extend(_add_kernels(dll_code, call_iq, call_iqxy, model_info.name)) 532 source.append("#endif /* !USE_OPENCL */") 537 533 return '\n'.join(source) 534 535 def _add_kernels(kernel_code, call_iq, call_iqxy, name): 536 # type: (str, str, str, str) -> List[str] 537 source = [ 538 # define the Iq kernel 539 "#define KERNEL_NAME %s_Iq"%name, 540 call_iq, 541 kernel_code, 542 "#undef CALL_IQ", 543 "#undef KERNEL_NAME", 544 545 # define the Iqxy kernel from the same source with different #defines 546 "#define KERNEL_NAME %s_Iqxy"%name, 547 call_iqxy, 548 kernel_code, 549 "#undef CALL_IQ", 550 "#undef KERNEL_NAME", 551 ] 552 return source 538 553 539 554 def load_kernel_module(model_name): 540 555 # type: (str) -> module 556 """ 557 Return the kernel module named in *model_name*. 558 559 If the name ends in *.py* then load it as a custom model using 560 :func:`sasmodels.custom.load_custom_kernel_module`, otherwise 561 load it from :mod:`sasmodels.models`. 562 """ 541 563 if model_name.endswith('.py'): 542 564 kernel_module = load_custom_kernel_module(model_name) -
sasmodels/kernel_iq.c
r6e7ff6d rf2f67a6 54 54 local ParameterBlock local_values; // current parameter values 55 55 double *pvec = (double *)(&local_values); // Alias named parameters with a vector 56 double norm; 57 58 // number of active loops 59 const int num_active = details->num_active; 56 60 57 61 // Fill in the initial variables … … 63 67 } 64 68 69 // Monodisperse computation 70 if (num_active == 0) { 71 #ifdef INVALID 72 if (INVALID(local_values)) { return; } 73 #endif 74 norm = CALL_VOLUME(local_values); 75 76 const double scale = values[0]; 77 const double background = values[1]; 78 // result[nq] = norm; // Total volume normalization 79 80 #ifdef USE_OPENMP 81 #pragma omp parallel for 82 #endif 83 for (int i=0; i < nq; i++) { 84 double scattering = CALL_IQ(q, i, local_values); 85 result[i] = (norm>0. ? scale*scattering/norm + background : background); 86 } 87 return; 88 } 89 90 #if MAX_PD > 0 65 91 // If it is the first round initialize the result to zero, otherwise 66 92 // assume that the previous result has been passed back. … … 75 101 result[i] = 0.0; 76 102 } 77 } 78 79 // Monodisperse computation 80 if (details->num_active == 0) { 81 #ifdef INVALID 82 if (INVALID(local_values)) { return; } 83 #endif 84 85 const double norm = CALL_VOLUME(local_values); 86 const double scale = values[0]; 87 const double background = values[1]; 88 #ifdef USE_OPENMP 89 #pragma omp parallel for 90 #endif 91 result[nq] = norm; // Total volume normalization 92 for (int i=0; i < nq; i++) { 93 double scattering = CALL_IQ(q, i, local_values); 94 result[i] = (norm>0. ? scale*scattering/norm + background : background); 95 } 96 return; 97 } 98 99 #if MAX_PD > 0 100 //printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop); 101 // Since we are no longer looping over the entire polydispersity hypercube 102 // for each q, we need to track the normalization values between calls. 103 double norm = 0.0; 103 norm = 0.0; 104 } else { 105 norm = result[nq]; 106 } 104 107 105 108 // need product of weights at every Iq calc, so keep product of … … 119 122 pd_index[0] = fast_length; 120 123 124 // Number of coordinated indices 125 const int num_coord = details->num_coord; 126 121 127 // Loop over the weights then loop over q, accumulating values 122 128 for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 123 // check if indices need to be updated129 // check if fast loop needs to be reset 124 130 if (pd_index[0] == fast_length) { 125 //printf("should be here with %d active\n", details->num_active);131 //printf("should be here with %d active\n", num_active); 126 132 127 133 // Compute position in polydispersity hypercube 128 for (int k=0; k < details->num_active; k++) {134 for (int k=0; k < num_active; k++) { 129 135 pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 130 136 //printf("pd_index[%d] = %d\n",k,pd_index[k]); … … 134 140 partial_weight = 1.0; 135 141 //printf("partial weight %d: ", loop_index); 136 for (int k=1; k < details->num_active; k++) {142 for (int k=1; k < num_active; k++) { 137 143 double wi = weights[details->pd_offset[k] + pd_index[k]]; 138 144 //printf("pd[%d]=par[%d]=%g ", k, details->pd_par[k], wi); … … 143 149 // Update parameter offsets in weight vector 144 150 //printf("slow %d: ", loop_index); 145 for (int k=0; k < details->num_coord; k++) {151 for (int k=0; k < num_coord; k++) { 146 152 int par = details->par_coord[k]; 147 153 int coord = details->pd_coord[k]; … … 160 166 // if theta is not coordinated with fast index, precompute spherical correction 161 167 if (par == details->theta_par && !(details->par_coord[k]&1)) { 162 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1 e-6);168 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 163 169 } 164 170 } … … 170 176 double weight = partial_weight*wi; 171 177 //printf("fast %d: ", loop_index); 172 for (int k=0; k < details->num_coord; k++) {178 for (int k=0; k < num_coord; k++) { 173 179 if (details->pd_coord[k]&1) { 174 180 const int par = details->par_coord[k]; … … 177 183 // if theta is coordinated with fast index, compute spherical correction each time 178 184 if (par == details->theta_par) { 179 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1 e-6);185 spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 180 186 } 181 187 } … … 205 211 } 206 212 207 // Accumulate norm.208 result[nq] += norm;209 210 213 // End of the PD loop we can normalize 211 214 if (pd_stop >= details->total_pd) { … … 219 222 } 220 223 } 224 225 // Remember the updated norm. 226 result[nq] = norm; 221 227 #endif // MAX_PD > 0 222 228 } -
sasmodels/kernelcl.py
r8d62008 rf2f67a6 56 56 57 57 try: 58 raise NotImplementedError("OpenCL not yet implemented for new kernel template")58 #raise NotImplementedError("OpenCL not yet implemented for new kernel template") 59 59 import pyopencl as cl # type: ignore 60 60 # Ask OpenCL for the default context so that we know that one exists … … 264 264 key = "%s-%s-%s"%(name, dtype, fast) 265 265 if key not in self.compiled: 266 print("compiling",name)266 #print("OpenCL compile",name) 267 267 dtype = np.dtype(dtype) 268 268 program = compile_model(self.get_context(dtype), … … 373 373 kernel_name = generate.kernel_name(self.info, is_2d) 374 374 kernel = getattr(self.program, kernel_name) 375 return GpuKernel(kernel, self. info, q_vectors)375 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 376 376 377 377 def release(self): … … 443 443 Free the memory. 444 444 """ 445 if self.q is not None:446 self.q .release()447 self.q = None445 if self.q_b is not None: 446 self.q_b.release() 447 self.q_b = None 448 448 449 449 def __del__(self): … … 471 471 Call :meth:`release` when done with the kernel instance. 472 472 """ 473 def __init__(self, kernel, model_info, q_vectors):474 # type: (cl.Kernel, ModelInfo, List[np.ndarray]) -> None473 def __init__(self, kernel, dtype, model_info, q_vectors): 474 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 475 475 max_pd = model_info.parameters.max_pd 476 476 npars = len(model_info.parameters.kernel_parameters)-2 477 q_input = GpuInput(q_vectors, kernel.dtype)477 q_input = GpuInput(q_vectors, dtype) 478 478 self.kernel = kernel 479 479 self.info = model_info 480 self.dtype = kernel.dtype480 self.dtype = dtype 481 481 self.dim = '2d' if q_input.is_2d else '1d' 482 482 # plus three for the normalization values 483 self.result = np.empty(q_input.nq+3, q_input.dtype)483 self.result = np.empty(q_input.nq+3, dtype) 484 484 485 485 # Inputs and outputs for each kernel call 486 486 # Note: res may be shorter than res_b if global_size != nq 487 487 env = environment() 488 self.queue = env.get_queue( kernel.dtype)488 self.queue = env.get_queue(dtype) 489 489 490 490 # details is int32 data, padded to an 8 integer boundary 491 491 size = ((max_pd*5 + npars*3 + 2 + 7)//8)*8 492 492 self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 493 q_input.global_size[0] * kernel.dtype.itemsize)493 q_input.global_size[0] * dtype.itemsize) 494 494 self.q_input = q_input # allocated by GpuInput above 495 495 496 496 self._need_release = [ self.result_b, self.q_input ] 497 self.real = (np.float32 if self.q_input.dtype == generate.F32498 else np.float64 if self.q_input.dtype == generate.F64499 else np.float16 if self.q_input.dtype == generate.F16497 self.real = (np.float32 if dtype == generate.F32 498 else np.float64 if dtype == generate.F64 499 else np.float16 if dtype == generate.F16 500 500 else np.float32) # will never get here, so use np.float32 501 501 502 502 def __call__(self, call_details, weights, values, cutoff): 503 503 # type: (CallDetails, np.ndarray, np.ndarray, float) -> np.ndarray 504 505 504 context = self.queue.context 506 505 # Arrange data transfer to card … … 508 507 hostbuf=call_details.buffer) 509 508 weights_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 510 hostbuf=weights) 509 hostbuf=weights) if len(weights) else None 511 510 values_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 512 511 hostbuf=values) … … 521 520 cl.enqueue_copy(self.queue, self.result, self.result_b) 522 521 for v in (details_b, weights_b, values_b): 523 v.release()522 if v is not None: v.release() 524 523 525 524 return self.result[:self.q_input.nq]
Note: See TracChangeset
for help on using the changeset viewer.