Changes in / [040575f:c48d0c6] in sasmodels
- Location:
- sasmodels
- Files:
-
- 2 added
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/direct_model.py
r16bc3fc rf734e7d 3 3 import numpy as np 4 4 5 from . import models 6 from . import weights 7 8 try: 9 from .kernelcl import load_model 10 except ImportError,exc: 11 warnings.warn(str(exc)) 12 warnings.warn("using ctypes instead") 13 from .kerneldll import load_model 14 15 def load_model_definition(model_name): 16 __import__('sasmodels.models.'+model_name) 17 model_definition = getattr(models, model_name, None) 18 return model_definition 19 20 # load_model is imported above. It looks like the following 21 #def load_model(model_definition, dtype='single): 22 # if kerneldll: 23 # if source is newer than compiled: compile 24 # load dll 25 # return kernel 26 # elif kernelcl: 27 # compile source on context 28 # return kernel 29 30 31 def make_kernel(model, q_vectors): 32 """ 33 Return a computation kernel from the model definition and the q input. 34 """ 35 input = model.make_input(q_vectors) 36 return model(input) 37 38 def get_weights(kernel, pars, name): 39 """ 40 Generate the distribution for parameter *name* given the parameter values 41 in *pars*. 42 43 Searches for "name", "name_pd", "name_pd_type", "name_pd_n", "name_pd_sigma" 44 """ 45 relative = name in kernel.info['partype']['pd-rel'] 46 limits = kernel.info['limits'] 47 disperser = pars.get(name+'_pd_type', 'gaussian') 48 value = pars.get(name, kernel.info['defaults'][name]) 49 npts = pars.get(name+'_pd_n', 0) 50 width = pars.get(name+'_pd', 0.0) 51 nsigma = pars.get(name+'_pd_nsigma', 3.0) 52 v,w = weights.get_weights( 53 disperser, npts, width, nsigma, 54 value, limits[name], relative) 55 return v,w/np.sum(w) 56 57 def call_kernel(kernel, pars): 58 fixed_pars = [pars.get(name, kernel.info['defaults'][name]) 59 for name in kernel.fixed_pars] 60 pd_pars = [get_weights(kernel, pars, name) for name in kernel.pd_pars] 61 return kernel(fixed_pars, pd_pars) 5 from .core import load_model_definition, make_kernel, call_kernel 6 from .core import load_model_cl as load_model 7 if load_model is None: 8 warnings.warn("unable to load opencl; using ctypes instead") 9 from .core import load_model_dll as load_model 62 10 63 11 class DirectModel: -
sasmodels/generate.py
rf1ecfa92 r6137124 190 190 191 191 import sys 192 import os 193 import os.path 192 from os.path import abspath, dirname, join as joinpath, exists 194 193 import re 195 194 196 195 import numpy as np 196 C_KERNEL_TEMPLATE_PATH = joinpath(dirname(__file__), 'kernel_template.c') 197 197 198 198 F64 = np.dtype('float64') … … 229 229 PARTABLE_VALUE_WIDTH = 10 230 230 231 # Header included before every kernel.232 # This makes sure that the appropriate math constants are defined, and does233 # whatever is required to make the kernel compile as pure C rather than234 # as an OpenCL kernel.235 KERNEL_HEADER = """\236 // GENERATED CODE --- DO NOT EDIT ---237 // Code is produced by sasmodels.gen from sasmodels/models/MODEL.c238 239 #ifdef __OPENCL_VERSION__240 # define USE_OPENCL241 #endif242 243 // If opencl is not available, then we are compiling a C function244 // Note: if using a C++ compiler, then define kernel as extern "C"245 #ifndef USE_OPENCL246 # ifdef __cplusplus247 #include <cmath>248 #if defined(_MSC_VER)249 #define kernel extern "C" __declspec( dllexport )250 #else251 #define kernel extern "C"252 #endif253 using namespace std;254 inline void SINCOS(double angle, double &svar, double &cvar)255 { svar=sin(angle); cvar=cos(angle); }256 # else257 #include <math.h>258 #if defined(_MSC_VER)259 #define kernel __declspec( dllexport )260 #else261 #define kernel262 #endif263 #define SINCOS(angle,svar,cvar) do {svar=sin(angle);cvar=cos(angle);} while (0)264 # endif265 # define global266 # define local267 # define constant const268 # define powr(a,b) pow(a,b)269 # define pown(a,b) pow(a,b)270 #else271 # ifdef USE_SINCOS272 # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)273 # else274 # define SINCOS(angle,svar,cvar) do {svar=sin(angle);cvar=cos(angle);} while (0)275 # endif276 #endif277 278 // Standard mathematical constants:279 // M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,280 // M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)281 // OpenCL defines M_constant_F for float constants, and nothing if double282 // is not enabled on the card, which is why these constants may be missing283 #ifndef M_PI284 # define M_PI 3.141592653589793285 #endif286 #ifndef M_PI_2287 # define M_PI_2 1.570796326794897288 #endif289 #ifndef M_PI_4290 # define M_PI_4 0.7853981633974483291 #endif292 293 // Non-standard pi/180, used for converting between degrees and radians294 #ifndef M_PI_180295 # define M_PI_180 0.017453292519943295296 #endif297 """298 299 300 # The I(q) kernel and the I(qx, qy) kernel have one and two q parameters301 # respectively, so the template builder will need to do extra work to302 # declare, initialize and pass the q parameters.303 KERNEL_1D = {304 'fn': "Iq",305 'q_par_decl': "global const double *q,",306 'qinit': "const double qi = q[i];",307 'qcall': "qi",308 'qwork': ["q"],309 }310 311 KERNEL_2D = {312 'fn': "Iqxy",313 'q_par_decl': "global const double *qx,\n global const double *qy,",314 'qinit': "const double qxi = qx[i];\n const double qyi = qy[i];",315 'qcall': "qxi, qyi",316 'qwork': ["qx", "qy"],317 }318 319 # Generic kernel template for the polydispersity loop.320 # This defines the opencl kernel that is available to the host. The same321 # structure is used for Iq and Iqxy kernels, so extra flexibility is needed322 # for q parameters. The polydispersity loop is built elsewhere and323 # substituted into this template.324 KERNEL_TEMPLATE = """\325 kernel void %(name)s(326 %(q_par_decl)s327 global double *result,328 #ifdef USE_OPENCL329 global double *loops_g,330 #else331 const int Nq,332 #endif333 local double *loops,334 const double cutoff,335 %(par_decl)s336 )337 {338 #ifdef USE_OPENCL339 // copy loops info to local memory340 event_t e = async_work_group_copy(loops, loops_g, (%(pd_length)s)*2, 0);341 wait_group_events(1, &e);342 343 int i = get_global_id(0);344 int Nq = get_global_size(0);345 #endif346 347 #ifdef USE_OPENCL348 if (i < Nq)349 #else350 #pragma omp parallel for351 for (int i=0; i < Nq; i++)352 #endif353 {354 %(qinit)s355 double ret=0.0, norm=0.0;356 double vol=0.0, norm_vol=0.0;357 %(loops)s358 if (vol*norm_vol != 0.0) {359 ret *= norm_vol/vol;360 }361 result[i] = scale*ret/norm+background;362 }363 }364 """365 366 # Polydispersity loop level.367 # This pulls the parameter value and weight from the looping vector in order368 # in preperation for a nested loop.369 LOOP_OPEN="""\370 for (int %(name)s_i=0; %(name)s_i < N%(name)s; %(name)s_i++) {371 const double %(name)s = loops[2*(%(name)s_i%(offset)s)];372 const double %(name)s_w = loops[2*(%(name)s_i%(offset)s)+1];\373 """374 375 376 377 ##########################################################378 # #379 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #380 # !! !! #381 # !! KEEP THIS CODE CONSISTENT WITH PYKERNEL.PY !! #382 # !! !! #383 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #384 # #385 ##########################################################386 387 388 # Polydispersity loop body.389 # This computes the weight, and if it is sufficient, calls the scattering390 # function and adds it to the total. If there is a volume normalization,391 # it will also be added here.392 LOOP_BODY="""\393 const double weight = %(weight_product)s;394 if (weight > cutoff) {395 const double I = %(fn)s(%(qcall)s, %(pcall)s);396 if (I>=0.0) { // scattering cannot be negative397 ret += weight*I%(sasview_spherical)s;398 norm += weight;399 %(volume_norm)s400 }401 //else { printf("exclude qx,qy,I:%%g,%%g,%%g\\n",%(qcall)s,I); }402 }403 //else { printf("exclude weight:%%g\\n",weight); }\404 """405 406 # Use this when integrating over orientation407 SPHERICAL_CORRECTION="""\408 // Correction factor for spherical integration p(theta) I(q) sin(theta) dtheta409 double spherical_correction = (Ntheta>1 ? fabs(sin(M_PI_180*theta)) : 1.0);\410 """411 # Use this to reproduce sasview behaviour412 SASVIEW_SPHERICAL_CORRECTION="""\413 // Correction factor for spherical integration p(theta) I(q) sin(theta) dtheta414 double spherical_correction = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2 : 1.0);\415 """416 417 # Volume normalization.418 # If there are "volume" polydispersity parameters, then these will be used419 # to call the form_volume function from the user supplied kernel, and accumulate420 # a normalized weight.421 VOLUME_NORM="""const double vol_weight = %(vol_weight)s;422 vol += vol_weight*form_volume(%(vol_pars)s);423 norm_vol += vol_weight;\424 """425 426 # functions defined as strings in the .py module427 WORK_FUNCTION="""\428 double %(name)s(%(pars)s);429 double %(name)s(%(pars)s)430 {431 %(body)s432 }\433 """434 435 231 # Documentation header for the module, giving the model name, its short 436 232 # description and its parameter table. The remainder of the doc comes … … 449 245 %(docs)s 450 246 """ 451 452 def indent(s, depth):453 """454 Indent a string of text with *depth* additional spaces on each line.455 """456 spaces = " "*depth457 sep = "\n"+spaces458 return spaces + sep.join(s.split("\n"))459 460 461 def kernel_name(info, is_2D):462 """463 Name of the exported kernel symbol.464 """465 return info['name'] + "_" + ("Iqxy" if is_2D else "Iq")466 467 468 def use_single(source):469 """470 Convert code from double precision to single precision.471 """472 # Convert double keyword to float. Accept an 'n' parameter for vector473 # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented474 # as cdouble which is typedef'd to double2.475 source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))',476 r'\1float\2', source)477 # Convert floating point constants to single by adding 'f' to the end.478 # OS/X driver complains if you don't do this.479 source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?',480 r'\g<0>f', source)481 return source482 483 484 def make_kernel(info, is_2D):485 """486 Build a kernel call from metadata supplied by the user.487 488 *info* is the json object defined in the kernel file.489 490 *form* is either "Iq" or "Iqxy".491 492 This does not create a complete OpenCL kernel source, only the top493 level kernel call with polydispersity and a call to the appropriate494 Iq or Iqxy function.495 """496 497 # If we are building the Iqxy kernel, we need to propagate qx,qy498 # parameters, otherwise we can499 dim = "2d" if is_2D else "1d"500 fixed_pars = info['partype']['fixed-'+dim]501 pd_pars = info['partype']['pd-'+dim]502 vol_pars = info['partype']['volume']503 q_pars = KERNEL_2D if is_2D else KERNEL_1D504 fn = q_pars['fn']505 506 507 # Build polydispersity loops508 depth = 4509 offset = ""510 loop_head = []511 loop_end = []512 for name in pd_pars:513 subst = { 'name': name, 'offset': offset }514 loop_head.append(indent(LOOP_OPEN%subst, depth))515 loop_end.insert(0, (" "*depth) + "}")516 offset += '+N'+name517 depth += 2518 519 # The volume parameters in the inner loop are used to call the volume()520 # function in the kernel, with the parameters defined in vol_pars and the521 # weight product defined in weight. If there are no volume parameters,522 # then there will be no volume normalization.523 if vol_pars:524 subst = {525 'vol_weight': "*".join(p+"_w" for p in vol_pars),526 'vol_pars': ", ".join(vol_pars),527 }528 volume_norm = VOLUME_NORM%subst529 else:530 volume_norm = ""531 532 # Define the inner loop function call533 # The parameters to the f(q,p1,p2...) call should occur in the same534 # order as given in the parameter info structure. This may be different535 # from the parameter order in the call to the kernel since the kernel536 # call places all fixed parameters before all polydisperse parameters.537 fq_pars = [p[0] for p in info['parameters'][len(COMMON_PARAMETERS):]538 if p[0] in set(fixed_pars+pd_pars)]539 if False and "theta" in pd_pars:540 spherical_correction = [indent(SPHERICAL_CORRECTION, depth)]541 weights = [p+"_w" for p in pd_pars]+['spherical_correction']542 sasview_spherical = ""543 elif True and "theta" in pd_pars:544 spherical_correction = [indent(SASVIEW_SPHERICAL_CORRECTION,depth)]545 weights = [p+"_w" for p in pd_pars]546 sasview_spherical = "*spherical_correction"547 else:548 spherical_correction = []549 weights = [p+"_w" for p in pd_pars]550 sasview_spherical = ""551 weight_product = "*".join(weights) if len(weights) > 1 else "1.0"552 subst = {553 'weight_product': weight_product,554 'volume_norm': volume_norm,555 'fn': fn,556 'qcall': q_pars['qcall'],557 'pcall': ", ".join(fq_pars), # skip scale and background558 'sasview_spherical': sasview_spherical,559 }560 loop_body = [indent(LOOP_BODY%subst, depth)]561 loops = "\n".join(loop_head+spherical_correction+loop_body+loop_end)562 563 # declarations for non-pd followed by pd pars564 # e.g.,565 # const double sld,566 # const int Nradius567 fixed_par_decl = ",\n ".join("const double %s"%p for p in fixed_pars)568 pd_par_decl = ",\n ".join("const int N%s"%p for p in pd_pars)569 if fixed_par_decl and pd_par_decl:570 par_decl = ",\n ".join((fixed_par_decl, pd_par_decl))571 elif fixed_par_decl:572 par_decl = fixed_par_decl573 else:574 par_decl = pd_par_decl575 576 # Finally, put the pieces together in the kernel.577 pd_length = "+".join('N'+p for p in pd_pars) if len(pd_pars) > 0 else "0"578 subst = {579 # kernel name is, e.g., cylinder_Iq580 'name': kernel_name(info, is_2D),581 # to declare, e.g., global double q[],582 'q_par_decl': q_pars['q_par_decl'],583 # to declare, e.g., double sld, int Nradius, int Nlength584 'par_decl': par_decl,585 # to copy global to local pd pars we need, e.g., Nradius+Nlength586 'pd_length': pd_length,587 # the q initializers, e.g., double qi = q[i];588 'qinit': q_pars['qinit'],589 # the actual polydispersity loop590 'loops': loops,591 }592 kernel = KERNEL_TEMPLATE%subst593 594 # If the working function is defined in the kernel metadata as a595 # string, translate the string to an actual function definition596 # and put it before the kernel.597 if info[fn]:598 subst = {599 'name': fn,600 'pars': ", ".join("double "+p for p in q_pars['qwork']+fq_pars),601 'body': info[fn],602 }603 kernel = "\n".join((WORK_FUNCTION%subst, kernel))604 return kernel605 247 606 248 def make_partable(pars): … … 641 283 """ 642 284 for path in search_path: 643 target = os.path.join(path, filename)644 if os.path.exists(target):285 target = joinpath(path, filename) 286 if exists(target): 645 287 return target 646 288 raise ValueError("%r not found in %s"%(filename, search_path)) … … 650 292 Return a list of the sources file paths for the module. 651 293 """ 652 from os.path import abspath, dirname, join as joinpath653 294 search_path = [ dirname(info['filename']), 654 295 abspath(joinpath(dirname(__file__),'models')) ] 655 296 return [_search(search_path, f) for f in info['source']] 656 297 657 def make_model(info): 658 """ 659 Generate the code for the kernel defined by info, using source files 660 found in the given search path. 661 """ 662 source = [open(f).read() for f in sources(info)] 663 # If the form volume is defined as a string, then wrap it in a 664 # function definition and place it after the external sources but 665 # before the kernel functions. If the kernel functions are strings, 666 # they will be translated in the make_kernel call. 667 if info['form_volume']: 668 subst = { 669 'name': "form_volume", 670 'pars': ", ".join("double "+p for p in info['partype']['volume']), 671 'body': info['form_volume'], 672 } 673 source.append(WORK_FUNCTION%subst) 674 kernel_Iq = make_kernel(info, is_2D=False) if not callable(info['Iq']) else "" 675 kernel_Iqxy = make_kernel(info, is_2D=True) if not callable(info['Iqxy']) else "" 676 kernel = "\n\n".join([KERNEL_HEADER]+source+[kernel_Iq, kernel_Iqxy]) 677 return kernel 298 def use_single(source): 299 """ 300 Convert code from double precision to single precision. 301 """ 302 # Convert double keyword to float. Accept an 'n' parameter for vector 303 # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented 304 # as cdouble which is typedef'd to double2. 305 source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))', 306 r'\1float\2', source) 307 # Convert floating point constants to single by adding 'f' to the end. 308 # OS/X driver complains if you don't do this. 309 source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?', 310 r'\g<0>f', source) 311 return source 312 313 314 def kernel_name(info, is_2D): 315 """ 316 Name of the exported kernel symbol. 317 """ 318 return info['name'] + "_" + ("Iqxy" if is_2D else "Iq") 319 678 320 679 321 def categorize_parameters(pars): … … 708 350 return partype 709 351 352 def indent(s, depth): 353 """ 354 Indent a string of text with *depth* additional spaces on each line. 355 """ 356 spaces = " "*depth 357 sep = "\n"+spaces 358 return spaces + sep.join(s.split("\n")) 359 360 361 def build_polydispersity_loops(pd_pars): 362 """ 363 Build polydispersity loops 364 365 Returns loop opening and loop closing 366 """ 367 LOOP_OPEN="""\ 368 for (int %(name)s_i=0; %(name)s_i < N%(name)s; %(name)s_i++) { 369 const double %(name)s = loops[2*(%(name)s_i%(offset)s)]; 370 const double %(name)s_w = loops[2*(%(name)s_i%(offset)s)+1];\ 371 """ 372 depth = 4 373 offset = "" 374 loop_head = [] 375 loop_end = [] 376 for name in pd_pars: 377 subst = { 'name': name, 'offset': offset } 378 loop_head.append(indent(LOOP_OPEN%subst, depth)) 379 loop_end.insert(0, (" "*depth) + "}") 380 offset += '+N'+name 381 depth += 2 382 return "\n".join(loop_head), "\n".join(loop_end) 383 384 C_KERNEL_TEMPLATE=None 385 def make_model(info): 386 """ 387 Generate the code for the kernel defined by info, using source files 388 found in the given search path. 389 """ 390 # TODO: need something other than volume to indicate dispersion parameters 391 # No volume normalization despite having a volume parameter. 392 # Thickness is labelled a volume in order to trigger polydispersity. 393 # May want a separate dispersion flag, or perhaps a separate category for 394 # disperse, but not volume. Volume parameters also use relative values 395 # for the distribution rather than the absolute values used by angular 396 # dispersion. Need to be careful that necessary parameters are available 397 # for computing volume even if we allow non-disperse volume parameters. 398 399 # Load template 400 global C_KERNEL_TEMPLATE 401 if C_KERNEL_TEMPLATE is None: 402 with open(C_KERNEL_TEMPLATE_PATH) as fid: 403 C_KERNEL_TEMPLATE = fid.read() 404 405 # Load additional sources 406 source = [open(f).read() for f in sources(info)] 407 408 # Prepare defines 409 defines = [] 410 partype = info['partype'] 411 pd_1d = partype['pd-1d'] 412 pd_2d = partype['pd-2d'] 413 fixed_1d = partype['fixed-1d'] 414 fixed_2d = partype['fixed-1d'] 415 416 iq_parameters = [p[0] 417 for p in info['parameters'][2:] # skip scale, background 418 if p[0] in set(fixed_1d+pd_1d)] 419 iqxy_parameters = [p[0] 420 for p in info['parameters'][2:] # skip scale, background 421 if p[0] in set(fixed_2d+pd_2d)] 422 volume_parameters = [p[0] 423 for p in info['parameters'] 424 if p[4]=='volume'] 425 426 # Fill in defintions for volume parameters 427 if volume_parameters: 428 defines.append(('VOLUME_PARAMETERS', 429 ','.join(volume_parameters))) 430 defines.append(('VOLUME_WEIGHT_PRODUCT', 431 '*'.join(p+'_w' for p in volume_parameters))) 432 433 # Generate form_volume function from body only 434 if info['form_volume'] is not None: 435 if volume_parameters: 436 vol_par_decl = ', '.join('double '+p for p in volume_parameters) 437 else: 438 vol_par_decl = 'void' 439 defines.append(('VOLUME_PARAMETER_DECLARATIONS', 440 vol_par_decl)) 441 fn = """\ 442 double form_volume(VOLUME_PARAMETER_DECLARATIONS); 443 double form_volume(VOLUME_PARAMETER_DECLARATIONS) { 444 %(body)s 445 } 446 """%{'body':info['form_volume']} 447 source.append(fn) 448 449 # Fill in definitions for Iq parameters 450 defines.append(('IQ_KERNEL_NAME', info['name']+'_Iq')) 451 defines.append(('IQ_PARAMETERS', ', '.join(iq_parameters))) 452 if fixed_1d: 453 defines.append(('IQ_FIXED_PARAMETER_DECLARATIONS', 454 ', \\\n '.join('const double %s'%p for p in fixed_1d))) 455 if pd_1d: 456 defines.append(('IQ_WEIGHT_PRODUCT', 457 '*'.join(p+'_w' for p in pd_1d))) 458 defines.append(('IQ_DISPERSION_LENGTH_DECLARATIONS', 459 ', \\\n '.join('const int N%s'%p for p in pd_1d))) 460 defines.append(('IQ_DISPERSION_LENGTH_SUM', 461 '+'.join('N'+p for p in pd_1d))) 462 open_loops, close_loops = build_polydispersity_loops(pd_1d) 463 defines.append(('IQ_OPEN_LOOPS', 464 open_loops.replace('\n',' \\\n'))) 465 defines.append(('IQ_CLOSE_LOOPS', 466 close_loops.replace('\n',' \\\n'))) 467 if info['Iq'] is not None: 468 defines.append(('IQ_PARAMETER_DECLARATIONS', 469 ', '.join('double '+p for p in iq_parameters))) 470 fn = """\ 471 double Iq(double q, IQ_PARAMETER_DECLARATIONS); 472 double Iq(double q, IQ_PARAMETER_DECLARATIONS) { 473 %(body)s 474 } 475 """%{'body':info['Iq']} 476 source.append(fn) 477 478 # Fill in definitions for Iqxy parameters 479 defines.append(('IQXY_KERNEL_NAME', info['name']+'_Iqxy')) 480 defines.append(('IQXY_PARAMETERS', ', '.join(iqxy_parameters))) 481 if fixed_2d: 482 defines.append(('IQXY_FIXED_PARAMETER_DECLARATIONS', 483 ', \\\n '.join('const double %s'%p for p in fixed_2d))) 484 if pd_2d: 485 defines.append(('IQXY_WEIGHT_PRODUCT', 486 '*'.join(p+'_w' for p in pd_2d))) 487 defines.append(('IQXY_DISPERSION_LENGTH_DECLARATIONS', 488 ', \\\n '.join('const int N%s'%p for p in pd_2d))) 489 defines.append(('IQXY_DISPERSION_LENGTH_SUM', 490 '+'.join('N'+p for p in pd_2d))) 491 open_loops, close_loops = build_polydispersity_loops(pd_2d) 492 defines.append(('IQXY_OPEN_LOOPS', 493 open_loops.replace('\n',' \\\n'))) 494 defines.append(('IQXY_CLOSE_LOOPS', 495 close_loops.replace('\n',' \\\n'))) 496 if info['Iqxy'] is not None: 497 defines.append(('IQXY_PARAMETER_DECLARATIONS', 498 ', '.join('double '+p for p in iqxy_parameters))) 499 fn = """\ 500 double Iqxy(double qx, double qy, IQXY_PARAMETER_DECLARATIONS); 501 double Iqxy(double qx, double qy, IQXY_PARAMETER_DECLARATIONS) { 502 %(body)s 503 } 504 """%{'body':info['Iqxy']} 505 source.append(fn) 506 507 # Need to know if we have a theta parameter for Iqxy; it is not there 508 # for the magnetic sphere model, for example, which has a magnetic 509 # orientation but no shape orientation. 510 if 'theta' in pd_2d: 511 defines.append(('IQXY_HAS_THETA', '1')) 512 513 #for d in defines: print d 514 DEFINES='\n'.join('#define %s %s'%(k,v) for k,v in defines) 515 SOURCES='\n\n'.join(source) 516 return C_KERNEL_TEMPLATE%{ 517 'DEFINES':DEFINES, 518 'SOURCES':SOURCES, 519 } 520 710 521 def make(kernel_module): 711 522 """ … … 717 528 """ 718 529 # TODO: allow Iq and Iqxy to be defined in python 719 from os.path import abspath720 530 #print kernelfile 721 531 info = dict( … … 737 547 info['defaults'] = dict((p[0],p[2]) for p in info['parameters']) 738 548 739 source = make_model(info)740 549 # Assume if one part of the kernel is python then all parts are. 550 source = make_model(info) if not callable(info['Iq']) else None 741 551 return source, info 742 552 -
sasmodels/kernelcl.py
rf1ecfa92 rf734e7d 44 44 45 45 from . import generate 46 from .kernelpy import PyInput, Py Kernel46 from .kernelpy import PyInput, PyModel 47 47 48 48 F64_DEFS = """\ … … 68 68 """ 69 69 source, info = generate.make(kernel_module) 70 if callable(info.get('Iq',None)): 71 return PyModel(info) 70 72 ## for debugging, save source to a .cl file, edit it, and reload as model 71 73 #open(info['name']+'.cl','w').write(source) … … 234 236 235 237 def __call__(self, input): 236 # Support pure python kernel call237 if input.is_2D and callable(self.info['Iqxy']):238 return PyKernel(self.info['Iqxy'], self.info, input)239 elif not input.is_2D and callable(self.info['Iq']):240 return PyKernel(self.info['Iq'], self.info, input)241 242 238 if self.dtype != input.dtype: 243 239 raise TypeError("data and kernel have different types") … … 261 257 ctypes and some may be pure python. 262 258 """ 263 # Support pure python kernel call 264 if len(q_vectors) == 1 and callable(self.info['Iq']): 265 return PyInput(q_vectors, dtype=self.dtype) 266 elif callable(self.info['Iqxy']): 267 return PyInput(q_vectors, dtype=self.dtype) 268 else: 269 return GpuInput(q_vectors, dtype=self.dtype) 259 return GpuInput(q_vectors, dtype=self.dtype) 270 260 271 261 # TODO: check that we don't need a destructor for buffers which go out of scope … … 349 339 350 340 351 def __call__(self, pars, pd_pars, cutoff=1e-5):341 def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 352 342 real = np.float32 if self.input.dtype == generate.F32 else np.float64 353 fixed = [real(p) for p in pars] 354 cutoff = real(cutoff) 355 loops = np.hstack(pd_pars) if pd_pars else np.empty(0,dtype=self.input.dtype) 356 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 357 Nloops = [np.uint32(len(p[0])) for p in pd_pars] 358 #print "loops",Nloops, loops 359 360 #import sys; print >>sys.stderr,"opencl eval",pars 361 #print "opencl eval",pars 362 if len(loops) > 2*MAX_LOOPS: 363 raise ValueError("too many polydispersity points") 343 364 344 device_num = 0 345 queuei = environment().queues[device_num] 365 346 res_bi = self.res_b[device_num] 366 queuei = environment().queues[device_num] 367 loops_bi = self.loops_b[device_num] 368 loops_l = cl.LocalMemory(len(loops.data)) 369 cl.enqueue_copy(queuei, loops_bi, loops) 370 #ctx = environment().context 371 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 372 args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + Nloops 347 nq = np.uint32(self.input.nq) 348 if pd_pars: 349 cutoff = real(cutoff) 350 loops_N = [np.uint32(len(p[0])) for p in pd_pars] 351 loops = np.hstack(pd_pars) if pd_pars else np.empty(0,dtype=self.input.dtype) 352 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 353 #print "loops",Nloops, loops 354 355 #import sys; print >>sys.stderr,"opencl eval",pars 356 #print "opencl eval",pars 357 if len(loops) > 2*MAX_LOOPS: 358 raise ValueError("too many polydispersity points") 359 360 loops_bi = self.loops_b[device_num] 361 cl.enqueue_copy(queuei, loops_bi, loops) 362 loops_l = cl.LocalMemory(len(loops.data)) 363 #ctx = environment().context 364 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 365 dispersed = [loops_bi, loops_l, cutoff] + loops_N 366 else: 367 dispersed = [] 368 fixed = [real(p) for p in fixed_pars] 369 args = self.input.q_buffers + [res_bi, nq] + dispersed + fixed 373 370 self.kernel(queuei, self.input.global_size, None, *args) 374 371 cl.enqueue_copy(queuei, self.res, res_bi) -
sasmodels/kerneldll.py
r68d3c1b rf734e7d 11 11 12 12 from . import generate 13 from .kernelpy import PyInput, Py Kernel13 from .kernelpy import PyInput, PyModel 14 14 15 15 from .generate import F32, F64 … … 22 22 if "VCINSTALLDIR" in os.environ: 23 23 # MSVC compiler is available, so use it. 24 # TODO: remove intermediate OBJ file created in the directory 25 # TODO: maybe don't use randomized name for the c file 24 26 COMPILE = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG /Tp%(source)s /openmp /link /DLL /INCREMENTAL:NO /MANIFEST /OUT:%(output)s" 25 27 # Can't find VCOMP90.DLL (don't know why), so remove openmp support from windows compiler build … … 54 56 be defined without using too many resources. 55 57 """ 56 import tempfile57 58 58 source, info = generate.make(kernel_module) 59 if callable(info.get('Iq',None)): 60 return PyModel(info) 59 61 source_files = generate.sources(info) + [info['filename']] 60 62 newest = max(os.path.getmtime(f) for f in source_files) … … 68 70 status = os.system(command) 69 71 if status != 0: 70 print "compile failed. File is in %r"%filename72 raise RuntimeError("compile failed. File is in %r"%filename) 71 73 else: 72 74 ## uncomment the following to keep the generated c file … … 76 78 77 79 78 IQ_ARGS = [c_void_p, c_void_p, c_int , c_void_p, c_double]79 IQXY_ARGS = [c_void_p, c_void_p, c_void_p, c_int , c_void_p, c_double]80 IQ_ARGS = [c_void_p, c_void_p, c_int] 81 IQXY_ARGS = [c_void_p, c_void_p, c_void_p, c_int] 80 82 81 83 class DllModel(object): … … 107 109 self.dll = ct.CDLL(self.dllpath) 108 110 111 pd_args_1d = [c_void_p, c_double] + [c_int]*Npd1d if Npd1d else [] 112 pd_args_2d= [c_void_p, c_double] + [c_int]*Npd2d if Npd2d else [] 109 113 self.Iq = self.dll[generate.kernel_name(self.info, False)] 110 self.Iq.argtypes = IQ_ARGS + [c_double]*Nfixed1d + [c_int]*Npd1d114 self.Iq.argtypes = IQ_ARGS + pd_args_1d + [c_double]*Nfixed1d 111 115 112 116 self.Iqxy = self.dll[generate.kernel_name(self.info, True)] 113 self.Iqxy.argtypes = IQXY_ARGS + [c_double]*Nfixed2d + [c_int]*Npd2d117 self.Iqxy.argtypes = IQXY_ARGS + pd_args_2d + [c_double]*Nfixed2d 114 118 115 119 def __getstate__(self): … … 120 124 121 125 def __call__(self, input): 122 # Support pure python kernel call123 if input.is_2D and callable(self.info['Iqxy']):124 return PyKernel(self.info['Iqxy'], self.info, input)125 elif not input.is_2D and callable(self.info['Iq']):126 return PyKernel(self.info['Iq'], self.info, input)127 128 126 if self.dll is None: self._load_dll() 129 127 kernel = self.Iqxy if input.is_2D else self.Iq … … 175 173 self.p_res = self.res.ctypes.data 176 174 177 def __call__(self, pars, pd_pars, cutoff):175 def __call__(self, fixed_pars, pd_pars, cutoff): 178 176 real = np.float32 if self.input.dtype == F32 else np.float64 179 fixed = [real(p) for p in pars]180 cutoff = real(cutoff)181 loops = np.hstack(pd_pars)182 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten()183 loops_N = [np.uint32(len(p[0])) for p in pd_pars]184 177 185 178 nq = c_int(self.input.nq) 186 p_loops = loops.ctypes.data 187 args = self.input.q_pointers + [self.p_res, nq, p_loops, cutoff] + fixed + loops_N 179 if pd_pars: 180 cutoff = real(cutoff) 181 loops_N = [np.uint32(len(p[0])) for p in pd_pars] 182 loops = np.hstack(pd_pars) 183 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 184 p_loops = loops.ctypes.data 185 dispersed = [p_loops, cutoff] + loops_N 186 else: 187 dispersed = [] 188 fixed = [real(p) for p in fixed_pars] 189 args = self.input.q_pointers + [self.p_res, nq] + dispersed + fixed 188 190 #print pars 189 191 self.kernel(*args) -
sasmodels/kernelpy.py
r6edb74a rf734e7d 3 3 4 4 from .generate import F32, F64 5 6 class PyModel(object): 7 def __init__(self, info): 8 self.info = info 9 def __call__(self, input): 10 kernel = self.info['Iqxy'] if input.is_2D else self.info['Iq'] 11 return PyKernel(kernel, self.info, input) 12 def make_input(self, q_vectors): 13 return PyInput(q_vectors, dtype=F64) 14 def release(self): 15 pass 5 16 6 17 class PyInput(object): … … 134 145 """ 135 146 136 ########################################################## 137 # #138 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #139 # !! !! #140 # !! KEEP THIS CODE CONSISTENT WITH GENERATE.PY!! #141 # !! !! #142 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #143 # #144 ########################################################## 147 ################################################################ 148 # # 149 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # 150 # !! !! # 151 # !! KEEP THIS CODE CONSISTENT WITH KERNEL_TEMPLATE.C !! # 152 # !! !! # 153 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # 154 # # 155 ################################################################ 145 156 146 157 weight = np.empty(len(pd), 'd') … … 164 175 stride = np.array([1]) 165 176 vol_weight_index = slice(None, None) 177 # keep lint happy 178 fast_value = [None] 179 fast_weight = [None] 166 180 167 181 ret = np.zeros_like(args[0]) … … 193 207 # Correction factor for spherical integration p(theta) I(q) sin(theta) dtheta 194 208 #spherical_correction = abs(sin(pi*args[theta_index])) if theta_index>=0 else 1.0 195 #spherical_correction = abs(cos(pi*args[theta_index]))*pi/2 if theta_index>=0 else 1.0196 spherical_correction = 1.0209 spherical_correction = abs(cos(pi*args[theta_index]))*pi/2 if theta_index>=0 else 1.0 210 #spherical_correction = 1.0 197 211 ret += w*I*spherical_correction*positive 198 212 norm += w*positive -
sasmodels/model_test.py
r5428233 r6137124 6 6 """ 7 7 8 import sys 8 9 import unittest 9 import warnings 10 10 11 import numpy as np 11 12 12 from os.path import basename, dirname, join as joinpath 13 from glob import glob 14 15 try: 16 from .kernelcl import load_model 17 except ImportError,exc: 18 warnings.warn(str(exc)) 19 warnings.warn("using ctypes instead") 20 from .kerneldll import load_model 21 22 def load_kernel(model, dtype='single'): 23 kernel = load_model(model, dtype=dtype) 24 kernel.info['defaults'] = dict((p[0],p[2]) for p in kernel.info['parameters']) 25 return kernel 26 27 def get_weights(model, pars, name): 28 from . import weights 29 30 relative = name in model.info['partype']['pd-rel'] 31 disperser = pars.get(name+"_pd_type", "gaussian") 32 value = pars.get(name, model.info['defaults'][name]) 33 width = pars.get(name+"_pd", 0.0) 34 npts = pars.get(name+"_pd_n", 30) 35 nsigma = pars.get(name+"_pd_nsigma", 3.0) 36 v,w = weights.get_weights( 37 disperser, npts, width, nsigma, 38 value, model.info['limits'][name], relative) 39 return v,w/np.sum(w) 40 41 def eval_kernel(kernel, q, pars, cutoff=1e-5): 42 input = kernel.make_input(q) 43 finput = kernel(input) 44 45 fixed_pars = [pars.get(name, finput.info['defaults'][name]) 46 for name in finput.fixed_pars] 47 pd_pars = [get_weights(finput, pars, p) for p in finput.pd_pars] 48 return finput(fixed_pars, pd_pars, cutoff) 13 from .core import list_models, load_model_definition 14 from .core import load_model_cl, load_model_dll 15 from .core import make_kernel, call_kernel 49 16 50 17 def annotate_exception(exc, msg): … … 66 33 args = exc.args 67 34 if not args: 68 arg0 = msg35 exc.args = (msg,) 69 36 else: 70 arg0 = " ".join((args[0],msg)) 71 exc.args = tuple([arg0] + list(args[1:])) 37 try: 38 arg0 = " ".join((args[0],msg)) 39 exc.args = tuple([arg0] + list(args[1:])) 40 except: 41 exc.args = (" ".join((str(exc),msg)),) 72 42 73 def suite(): 74 root = dirname(__file__) 75 files = sorted(glob(joinpath(root, 'models', "[a-zA-Z]*.py"))) 76 models_names = [basename(f)[:-3] for f in files] 77 43 def suite(loaders, models): 44 78 45 suite = unittest.TestSuite() 79 80 for model_name in models_names:81 module = __import__('sasmodels.models.' + model_name)82 module = getattr(module, 'models', None)83 46 84 model = getattr(module, model_name, None) 85 tests = getattr(model, 'tests', []) 47 if models[0] == 'all': 48 skip = models[1:] 49 models = list_models() 50 else: 51 skip = [] 52 for model_name in models: 53 if model_name in skip: continue 54 model_definition = load_model_definition(model_name) 55 56 smoke_tests = [[{},0.1,None],[{},(0.1,0.1),None]] 57 tests = smoke_tests + getattr(model_definition, 'tests', []) 86 58 87 if tests: 59 if tests: # in case there are no smoke tests... 88 60 #print '------' 89 61 #print 'found tests in', model_name 90 62 #print '------' 91 92 kernel = load_kernel(model) 93 suite.addTest(ModelTestCase(model_name, kernel, tests)) 63 64 # if ispy then use the dll loader to call pykernel 65 # don't try to call cl kernel since it will not be 66 # available in some environmentes. 67 ispy = callable(getattr(model_definition,'Iq', None)) 68 69 # test using opencl if desired 70 if not ispy and ('opencl' in loaders and load_model_cl): 71 test_name = "Model: %s, Kernel: OpenCL"%model_name 72 test = ModelTestCase(test_name, model_definition, 73 load_model_cl, tests) 74 #print "defining", test_name 75 suite.addTest(test) 76 77 # test using dll if desired 78 if ispy or ('dll' in loaders and load_model_dll): 79 test_name = "Model: %s, Kernel: dll"%model_name 80 test = ModelTestCase(test_name, model_definition, 81 load_model_dll, tests) 82 #print "defining", test_name 83 suite.addTest(test) 94 84 95 85 return suite … … 97 87 class ModelTestCase(unittest.TestCase): 98 88 99 def __init__(self, model_name, kernel, tests):89 def __init__(self, test_name, definition, loader, tests): 100 90 unittest.TestCase.__init__(self) 101 91 102 self.model_name = model_name 103 self.kernel = kernel 92 self.test_name = test_name 93 self.definition = definition 94 self.loader = loader 104 95 self.tests = tests 105 96 106 97 def runTest(self): 107 #print '------' 108 #print self.model_name 109 #print '------' 98 #print "running", self.test_name 110 99 try: 100 model = self.loader(self.definition) 111 101 for test in self.tests: 112 params = test[0] 113 Q = test[1] 114 I = test[2] 115 102 pars, Q, I = test 103 116 104 if not isinstance(Q, list): 117 105 Q = [Q] … … 120 108 121 109 if isinstance(Q[0], tuple): 122 npQ = [np.array([Qi[d] for Qi in Q]) for d in xrange(len(Q[0]))] 110 Qx,Qy = zip(*Q) 111 Q_vectors = [np.array(Qx), np.array(Qy)] 123 112 else: 124 npQ= [np.array(Q)]113 Q_vectors = [np.array(Q)] 125 114 126 self.assert True(Q)127 self.assertEqual(len(I), len(Q)) 128 129 Iq = eval_kernel(self.kernel, npQ, params)115 self.assertEqual(len(I), len(Q)) 116 117 kernel = make_kernel(model, Q_vectors) 118 Iq = call_kernel(kernel, pars) 130 119 131 120 self.assertGreater(len(Iq), 0) … … 133 122 134 123 for q, i, iq in zip(Q, I, Iq): 135 err = np.abs(i - iq) 136 nrm = np.abs(i) 124 if i is None: continue # smoke test --- make sure it runs 125 err = abs(i - iq) 126 nrm = abs(i) 137 127 138 128 self.assertLess(err * 10**5, nrm, 'q:%s; expected:%s; actual:%s' % (q, i, iq)) 139 129 140 130 except Exception,exc: 141 annotate_exception(exc, '\r\nModel: %s' % self.model_name)131 annotate_exception(exc, self.test_name) 142 132 raise 143 133 144 134 def main(): 145 #unittest.main() 146 runner = unittest.TextTestRunner() 147 runner.run(suite()) 135 136 models = sys.argv[1:] 137 if models and models[0] == 'opencl': 138 if load_model_cl is None: 139 print >>sys.stderr, "opencl is not available" 140 sys.exit(1) 141 loaders = ['opencl'] 142 models = models[1:] 143 elif models and models[0] == 'dll': 144 # TODO: test if compiler is available? 145 loaders = ['dll'] 146 models = models[1:] 147 else: 148 loaders = ['opencl', 'dll'] 149 if models: 150 runner = unittest.TextTestRunner() 151 runner.run(suite(loaders, models)) 152 else: 153 print >>sys.stderr, "usage: python -m sasmodels.model_test [opencl|dll] model1 model2 ..." 154 print >>sys.stderr, "if model1 is 'all', then all except the remaining models will be tested" 148 155 149 156 if __name__ == "__main__": -
sasmodels/models/bcc.c
rc95dc908 r6137124 112 112 113 113 const double Da = d_factor*dnn; 114 const double qDa_2 = q*q*Da*Da;115 114 const double s1 = dnn/sqrt(0.75); 116 115 -
sasmodels/models/broad_peak.py
rf57d123 rf734e7d 91 91 92 92 93 def form_volume():94 return 193 #def form_volume(): 94 # return 1 95 95 96 96 def Iq(q, porod_scale, porod_exp, lorentz_scale, lorentz_length, peak_pos, lorentz_exp): … … 102 102 Iq.vectorized = True 103 103 104 def Iqxy(qx, qy, porod_scale, porod_exp, lorentz_scale, lorentz_length, peak_pos, lorentz_exp):105 return Iq(sqrt(qx**2 + qy**2), porod_scale, porod_exp, lorentz_scale, lorentz_length, peak_pos, lorentz_exp)104 def Iqxy(qx, qy, *args): 105 return Iq(sqrt(qx**2 + qy**2), *args) 106 106 107 107 # FOR VECTORIZED VERSION, UNCOMMENT THE NEXT LINE -
sasmodels/models/fcc.c
re7b3d7b r6137124 112 112 113 113 const double Da = d_factor*dnn; 114 const double qDa_2 = q*q*Da*Da;115 114 const double s1 = dnn/sqrt(0.75); 116 115 -
sasmodels/models/lamellarPC.py
rdc02af0 rf734e7d 92 92 source = [ "lamellarPC_kernel.c"] 93 93 94 # No volume normalization despite having a volume parameter95 # This should perhaps be volume normalized?96 94 form_volume = """ 97 95 return 1.0; … … 99 97 100 98 Iqxy = """ 101 // never called since no orientation or magnetic parameters. 102 return -1.0; 99 return Iq(sqrt(qx*qx+qy*qy), IQ_PARAMETERS); 103 100 """ 104 101 -
sasmodels/models/lamellarPC_kernel.c
rdc02af0 rf734e7d 28 28 //get the fractional part of Nlayers, to determine the "mixing" of N's 29 29 30 n1 = trunc(Nlayers); //rounds towards zero30 n1 = (long)trunc(Nlayers); //rounds towards zero 31 31 n2 = n1 + 1; 32 32 xn = (double)n2 - Nlayers; //fractional contribution of n1
Note: See TracChangeset
for help on using the changeset viewer.