Changeset f734e7d in sasmodels for sasmodels/generate.py
- Timestamp:
- Feb 22, 2015 1:44:54 AM (9 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 6137124
- Parents:
- 711d8e2
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/generate.py
rf1ecfa92 rf734e7d 190 190 191 191 import sys 192 import os 193 import os.path 192 from os.path import abspath, dirname, join as joinpath, exists 194 193 import re 195 194 196 195 import numpy as np 196 C_KERNEL_TEMPLATE_PATH = joinpath(dirname(__file__), 'kernel_template.c') 197 197 198 198 F64 = np.dtype('float64') … … 229 229 PARTABLE_VALUE_WIDTH = 10 230 230 231 # Header included before every kernel.232 # This makes sure that the appropriate math constants are defined, and does233 # whatever is required to make the kernel compile as pure C rather than234 # as an OpenCL kernel.235 KERNEL_HEADER = """\236 // GENERATED CODE --- DO NOT EDIT ---237 // Code is produced by sasmodels.gen from sasmodels/models/MODEL.c238 239 #ifdef __OPENCL_VERSION__240 # define USE_OPENCL241 #endif242 243 // If opencl is not available, then we are compiling a C function244 // Note: if using a C++ compiler, then define kernel as extern "C"245 #ifndef USE_OPENCL246 # ifdef __cplusplus247 #include <cmath>248 #if defined(_MSC_VER)249 #define kernel extern "C" __declspec( dllexport )250 #else251 #define kernel extern "C"252 #endif253 using namespace std;254 inline void SINCOS(double angle, double &svar, double &cvar)255 { svar=sin(angle); cvar=cos(angle); }256 # else257 #include <math.h>258 #if defined(_MSC_VER)259 #define kernel __declspec( dllexport )260 #else261 #define kernel262 #endif263 #define SINCOS(angle,svar,cvar) do {svar=sin(angle);cvar=cos(angle);} while (0)264 # endif265 # define global266 # define local267 # define constant const268 # define powr(a,b) pow(a,b)269 # define pown(a,b) pow(a,b)270 #else271 # ifdef USE_SINCOS272 # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)273 # else274 # define SINCOS(angle,svar,cvar) do {svar=sin(angle);cvar=cos(angle);} while (0)275 # endif276 #endif277 278 // Standard mathematical constants:279 // M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,280 // M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)281 // OpenCL defines M_constant_F for float constants, and nothing if double282 // is not enabled on the card, which is why these constants may be missing283 #ifndef M_PI284 # define M_PI 3.141592653589793285 #endif286 #ifndef M_PI_2287 # define M_PI_2 1.570796326794897288 #endif289 #ifndef M_PI_4290 # define M_PI_4 0.7853981633974483291 #endif292 293 // Non-standard pi/180, used for converting between degrees and radians294 #ifndef M_PI_180295 # define M_PI_180 0.017453292519943295296 #endif297 """298 299 300 # The I(q) kernel and the I(qx, qy) kernel have one and two q parameters301 # respectively, so the template builder will need to do extra work to302 # declare, initialize and pass the q parameters.303 KERNEL_1D = {304 'fn': "Iq",305 'q_par_decl': "global const double *q,",306 'qinit': "const double qi = q[i];",307 'qcall': "qi",308 'qwork': ["q"],309 }310 311 KERNEL_2D = {312 'fn': "Iqxy",313 'q_par_decl': "global const double *qx,\n global const double *qy,",314 'qinit': "const double qxi = qx[i];\n const double qyi = qy[i];",315 'qcall': "qxi, qyi",316 'qwork': ["qx", "qy"],317 }318 319 # Generic kernel template for the polydispersity loop.320 # This defines the opencl kernel that is available to the host. The same321 # structure is used for Iq and Iqxy kernels, so extra flexibility is needed322 # for q parameters. The polydispersity loop is built elsewhere and323 # substituted into this template.324 KERNEL_TEMPLATE = """\325 kernel void %(name)s(326 %(q_par_decl)s327 global double *result,328 #ifdef USE_OPENCL329 global double *loops_g,330 #else331 const int Nq,332 #endif333 local double *loops,334 const double cutoff,335 %(par_decl)s336 )337 {338 #ifdef USE_OPENCL339 // copy loops info to local memory340 event_t e = async_work_group_copy(loops, loops_g, (%(pd_length)s)*2, 0);341 wait_group_events(1, &e);342 343 int i = get_global_id(0);344 int Nq = get_global_size(0);345 #endif346 347 #ifdef USE_OPENCL348 if (i < Nq)349 #else350 #pragma omp parallel for351 for (int i=0; i < Nq; i++)352 #endif353 {354 %(qinit)s355 double ret=0.0, norm=0.0;356 double vol=0.0, norm_vol=0.0;357 %(loops)s358 if (vol*norm_vol != 0.0) {359 ret *= norm_vol/vol;360 }361 result[i] = scale*ret/norm+background;362 }363 }364 """365 366 # Polydispersity loop level.367 # This pulls the parameter value and weight from the looping vector in order368 # in preperation for a nested loop.369 LOOP_OPEN="""\370 for (int %(name)s_i=0; %(name)s_i < N%(name)s; %(name)s_i++) {371 const double %(name)s = loops[2*(%(name)s_i%(offset)s)];372 const double %(name)s_w = loops[2*(%(name)s_i%(offset)s)+1];\373 """374 375 376 377 ##########################################################378 # #379 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #380 # !! !! #381 # !! KEEP THIS CODE CONSISTENT WITH PYKERNEL.PY !! #382 # !! !! #383 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #384 # #385 ##########################################################386 387 388 # Polydispersity loop body.389 # This computes the weight, and if it is sufficient, calls the scattering390 # function and adds it to the total. If there is a volume normalization,391 # it will also be added here.392 LOOP_BODY="""\393 const double weight = %(weight_product)s;394 if (weight > cutoff) {395 const double I = %(fn)s(%(qcall)s, %(pcall)s);396 if (I>=0.0) { // scattering cannot be negative397 ret += weight*I%(sasview_spherical)s;398 norm += weight;399 %(volume_norm)s400 }401 //else { printf("exclude qx,qy,I:%%g,%%g,%%g\\n",%(qcall)s,I); }402 }403 //else { printf("exclude weight:%%g\\n",weight); }\404 """405 406 # Use this when integrating over orientation407 SPHERICAL_CORRECTION="""\408 // Correction factor for spherical integration p(theta) I(q) sin(theta) dtheta409 double spherical_correction = (Ntheta>1 ? fabs(sin(M_PI_180*theta)) : 1.0);\410 """411 # Use this to reproduce sasview behaviour412 SASVIEW_SPHERICAL_CORRECTION="""\413 // Correction factor for spherical integration p(theta) I(q) sin(theta) dtheta414 double spherical_correction = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2 : 1.0);\415 """416 417 # Volume normalization.418 # If there are "volume" polydispersity parameters, then these will be used419 # to call the form_volume function from the user supplied kernel, and accumulate420 # a normalized weight.421 VOLUME_NORM="""const double vol_weight = %(vol_weight)s;422 vol += vol_weight*form_volume(%(vol_pars)s);423 norm_vol += vol_weight;\424 """425 426 # functions defined as strings in the .py module427 WORK_FUNCTION="""\428 double %(name)s(%(pars)s);429 double %(name)s(%(pars)s)430 {431 %(body)s432 }\433 """434 435 231 # Documentation header for the module, giving the model name, its short 436 232 # description and its parameter table. The remainder of the doc comes … … 449 245 %(docs)s 450 246 """ 451 452 def indent(s, depth):453 """454 Indent a string of text with *depth* additional spaces on each line.455 """456 spaces = " "*depth457 sep = "\n"+spaces458 return spaces + sep.join(s.split("\n"))459 460 461 def kernel_name(info, is_2D):462 """463 Name of the exported kernel symbol.464 """465 return info['name'] + "_" + ("Iqxy" if is_2D else "Iq")466 467 468 def use_single(source):469 """470 Convert code from double precision to single precision.471 """472 # Convert double keyword to float. Accept an 'n' parameter for vector473 # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented474 # as cdouble which is typedef'd to double2.475 source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))',476 r'\1float\2', source)477 # Convert floating point constants to single by adding 'f' to the end.478 # OS/X driver complains if you don't do this.479 source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?',480 r'\g<0>f', source)481 return source482 483 484 def make_kernel(info, is_2D):485 """486 Build a kernel call from metadata supplied by the user.487 488 *info* is the json object defined in the kernel file.489 490 *form* is either "Iq" or "Iqxy".491 492 This does not create a complete OpenCL kernel source, only the top493 level kernel call with polydispersity and a call to the appropriate494 Iq or Iqxy function.495 """496 497 # If we are building the Iqxy kernel, we need to propagate qx,qy498 # parameters, otherwise we can499 dim = "2d" if is_2D else "1d"500 fixed_pars = info['partype']['fixed-'+dim]501 pd_pars = info['partype']['pd-'+dim]502 vol_pars = info['partype']['volume']503 q_pars = KERNEL_2D if is_2D else KERNEL_1D504 fn = q_pars['fn']505 506 507 # Build polydispersity loops508 depth = 4509 offset = ""510 loop_head = []511 loop_end = []512 for name in pd_pars:513 subst = { 'name': name, 'offset': offset }514 loop_head.append(indent(LOOP_OPEN%subst, depth))515 loop_end.insert(0, (" "*depth) + "}")516 offset += '+N'+name517 depth += 2518 519 # The volume parameters in the inner loop are used to call the volume()520 # function in the kernel, with the parameters defined in vol_pars and the521 # weight product defined in weight. If there are no volume parameters,522 # then there will be no volume normalization.523 if vol_pars:524 subst = {525 'vol_weight': "*".join(p+"_w" for p in vol_pars),526 'vol_pars': ", ".join(vol_pars),527 }528 volume_norm = VOLUME_NORM%subst529 else:530 volume_norm = ""531 532 # Define the inner loop function call533 # The parameters to the f(q,p1,p2...) call should occur in the same534 # order as given in the parameter info structure. This may be different535 # from the parameter order in the call to the kernel since the kernel536 # call places all fixed parameters before all polydisperse parameters.537 fq_pars = [p[0] for p in info['parameters'][len(COMMON_PARAMETERS):]538 if p[0] in set(fixed_pars+pd_pars)]539 if False and "theta" in pd_pars:540 spherical_correction = [indent(SPHERICAL_CORRECTION, depth)]541 weights = [p+"_w" for p in pd_pars]+['spherical_correction']542 sasview_spherical = ""543 elif True and "theta" in pd_pars:544 spherical_correction = [indent(SASVIEW_SPHERICAL_CORRECTION,depth)]545 weights = [p+"_w" for p in pd_pars]546 sasview_spherical = "*spherical_correction"547 else:548 spherical_correction = []549 weights = [p+"_w" for p in pd_pars]550 sasview_spherical = ""551 weight_product = "*".join(weights) if len(weights) > 1 else "1.0"552 subst = {553 'weight_product': weight_product,554 'volume_norm': volume_norm,555 'fn': fn,556 'qcall': q_pars['qcall'],557 'pcall': ", ".join(fq_pars), # skip scale and background558 'sasview_spherical': sasview_spherical,559 }560 loop_body = [indent(LOOP_BODY%subst, depth)]561 loops = "\n".join(loop_head+spherical_correction+loop_body+loop_end)562 563 # declarations for non-pd followed by pd pars564 # e.g.,565 # const double sld,566 # const int Nradius567 fixed_par_decl = ",\n ".join("const double %s"%p for p in fixed_pars)568 pd_par_decl = ",\n ".join("const int N%s"%p for p in pd_pars)569 if fixed_par_decl and pd_par_decl:570 par_decl = ",\n ".join((fixed_par_decl, pd_par_decl))571 elif fixed_par_decl:572 par_decl = fixed_par_decl573 else:574 par_decl = pd_par_decl575 576 # Finally, put the pieces together in the kernel.577 pd_length = "+".join('N'+p for p in pd_pars) if len(pd_pars) > 0 else "0"578 subst = {579 # kernel name is, e.g., cylinder_Iq580 'name': kernel_name(info, is_2D),581 # to declare, e.g., global double q[],582 'q_par_decl': q_pars['q_par_decl'],583 # to declare, e.g., double sld, int Nradius, int Nlength584 'par_decl': par_decl,585 # to copy global to local pd pars we need, e.g., Nradius+Nlength586 'pd_length': pd_length,587 # the q initializers, e.g., double qi = q[i];588 'qinit': q_pars['qinit'],589 # the actual polydispersity loop590 'loops': loops,591 }592 kernel = KERNEL_TEMPLATE%subst593 594 # If the working function is defined in the kernel metadata as a595 # string, translate the string to an actual function definition596 # and put it before the kernel.597 if info[fn]:598 subst = {599 'name': fn,600 'pars': ", ".join("double "+p for p in q_pars['qwork']+fq_pars),601 'body': info[fn],602 }603 kernel = "\n".join((WORK_FUNCTION%subst, kernel))604 return kernel605 247 606 248 def make_partable(pars): … … 641 283 """ 642 284 for path in search_path: 643 target = os.path.join(path, filename)644 if os.path.exists(target):285 target = joinpath(path, filename) 286 if exists(target): 645 287 return target 646 288 raise ValueError("%r not found in %s"%(filename, search_path)) … … 650 292 Return a list of the sources file paths for the module. 651 293 """ 652 from os.path import abspath, dirname, join as joinpath653 294 search_path = [ dirname(info['filename']), 654 295 abspath(joinpath(dirname(__file__),'models')) ] 655 296 return [_search(search_path, f) for f in info['source']] 656 297 657 def make_model(info): 658 """ 659 Generate the code for the kernel defined by info, using source files 660 found in the given search path. 661 """ 662 source = [open(f).read() for f in sources(info)] 663 # If the form volume is defined as a string, then wrap it in a 664 # function definition and place it after the external sources but 665 # before the kernel functions. If the kernel functions are strings, 666 # they will be translated in the make_kernel call. 667 if info['form_volume']: 668 subst = { 669 'name': "form_volume", 670 'pars': ", ".join("double "+p for p in info['partype']['volume']), 671 'body': info['form_volume'], 672 } 673 source.append(WORK_FUNCTION%subst) 674 kernel_Iq = make_kernel(info, is_2D=False) if not callable(info['Iq']) else "" 675 kernel_Iqxy = make_kernel(info, is_2D=True) if not callable(info['Iqxy']) else "" 676 kernel = "\n\n".join([KERNEL_HEADER]+source+[kernel_Iq, kernel_Iqxy]) 677 return kernel 298 def use_single(source): 299 """ 300 Convert code from double precision to single precision. 301 """ 302 # Convert double keyword to float. Accept an 'n' parameter for vector 303 # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented 304 # as cdouble which is typedef'd to double2. 305 source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))', 306 r'\1float\2', source) 307 # Convert floating point constants to single by adding 'f' to the end. 308 # OS/X driver complains if you don't do this. 309 source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?', 310 r'\g<0>f', source) 311 return source 312 313 314 def kernel_name(info, is_2D): 315 """ 316 Name of the exported kernel symbol. 317 """ 318 return info['name'] + "_" + ("Iqxy" if is_2D else "Iq") 319 678 320 679 321 def categorize_parameters(pars): … … 708 350 return partype 709 351 352 def indent(s, depth): 353 """ 354 Indent a string of text with *depth* additional spaces on each line. 355 """ 356 spaces = " "*depth 357 sep = "\n"+spaces 358 return spaces + sep.join(s.split("\n")) 359 360 361 def build_polydispersity_loops(pd_pars): 362 """ 363 Build polydispersity loops 364 365 Returns loop opening and loop closing 366 """ 367 LOOP_OPEN="""\ 368 for (int %(name)s_i=0; %(name)s_i < N%(name)s; %(name)s_i++) { 369 const double %(name)s = loops[2*(%(name)s_i%(offset)s)]; 370 const double %(name)s_w = loops[2*(%(name)s_i%(offset)s)+1];\ 371 """ 372 depth = 4 373 offset = "" 374 loop_head = [] 375 loop_end = [] 376 for name in pd_pars: 377 subst = { 'name': name, 'offset': offset } 378 loop_head.append(indent(LOOP_OPEN%subst, depth)) 379 loop_end.insert(0, (" "*depth) + "}") 380 offset += '+N'+name 381 depth += 2 382 return "\n".join(loop_head), "\n".join(loop_end) 383 384 C_KERNEL_TEMPLATE=None 385 def make_model(info): 386 """ 387 Generate the code for the kernel defined by info, using source files 388 found in the given search path. 389 """ 390 # TODO: need something other than volume to indicate dispersion parameters 391 # No volume normalization despite having a volume parameter. 392 # Thickness is labelled a volume in order to trigger polydispersity. 393 # May want a separate dispersion flag, or perhaps a separate category for 394 # disperse, but not volume. Volume parameters also use relative values 395 # for the distribution rather than the absolute values used by angular 396 # dispersion. Need to be careful that necessary parameters are available 397 # for computing volume even if we allow non-disperse volume parameters. 398 399 # Load template 400 global C_KERNEL_TEMPLATE 401 if C_KERNEL_TEMPLATE is None: 402 with open(C_KERNEL_TEMPLATE_PATH) as fid: 403 C_KERNEL_TEMPLATE = fid.read() 404 405 # Load additional sources 406 source = [open(f).read() for f in sources(info)] 407 408 # Prepare defines 409 defines = [] 410 partype = info['partype'] 411 pd_1d = partype['pd-1d'] 412 pd_2d = partype['pd-2d'] 413 fixed_1d = partype['fixed-1d'] 414 fixed_2d = partype['fixed-1d'] 415 416 iq_parameters = [p[0] 417 for p in info['parameters'][2:] # skip scale, background 418 if p[0] in set(fixed_1d+pd_1d)] 419 iqxy_parameters = [p[0] 420 for p in info['parameters'][2:] # skip scale, background 421 if p[0] in set(fixed_2d+pd_2d)] 422 volume_parameters = [p[0] 423 for p in info['parameters'] 424 if p[4]=='volume'] 425 426 # Fill in defintions for volume parameters 427 if volume_parameters: 428 defines.append(('VOLUME_PARAMETERS', 429 ','.join(volume_parameters))) 430 defines.append(('VOLUME_WEIGHT_PRODUCT', 431 '*'.join(p+'_w' for p in volume_parameters))) 432 433 # Generate form_volume function from body only 434 if info['form_volume'] is not None: 435 defines.append(('VOLUME_PARAMETER_DECLARATIONS', 436 ', '.join('double '+p for p in volume_parameters))) 437 fn = """\ 438 double form_volume(VOLUME_PARAMETER_DECLARATIONS); 439 double form_volume(VOLUME_PARAMETER_DECLARATIONS) { 440 %(body)s 441 } 442 """%{'body':info['form_volume']} 443 source.append(fn) 444 445 # Fill in definitions for Iq parameters 446 defines.append(('IQ_KERNEL_NAME', info['name']+'_Iq')) 447 defines.append(('IQ_PARAMETERS', ', '.join(iq_parameters))) 448 if fixed_1d: 449 defines.append(('IQ_FIXED_PARAMETER_DECLARATIONS', 450 ', \\\n '.join('const double %s'%p for p in fixed_1d))) 451 if pd_1d: 452 defines.append(('IQ_WEIGHT_PRODUCT', 453 '*'.join(p+'_w' for p in pd_1d))) 454 defines.append(('IQ_DISPERSION_LENGTH_DECLARATIONS', 455 ', \\\n '.join('const int N%s'%p for p in pd_1d))) 456 defines.append(('IQ_DISPERSION_LENGTH_SUM', 457 '+'.join('N'+p for p in pd_1d))) 458 open_loops, close_loops = build_polydispersity_loops(pd_1d) 459 defines.append(('IQ_OPEN_LOOPS', 460 open_loops.replace('\n',' \\\n'))) 461 defines.append(('IQ_CLOSE_LOOPS', 462 close_loops.replace('\n',' \\\n'))) 463 if info['Iq'] is not None: 464 defines.append(('IQ_PARAMETER_DECLARATIONS', 465 ', '.join('double '+p for p in iq_parameters))) 466 fn = """\ 467 double Iq(double q, IQ_PARAMETER_DECLARATIONS); 468 double Iq(double q, IQ_PARAMETER_DECLARATIONS) { 469 %(body)s 470 } 471 """%{'body':info['Iq']} 472 source.append(fn) 473 474 # Fill in definitions for Iqxy parameters 475 defines.append(('IQXY_KERNEL_NAME', info['name']+'_Iqxy')) 476 defines.append(('IQXY_PARAMETERS', ', '.join(iqxy_parameters))) 477 if fixed_2d: 478 defines.append(('IQXY_FIXED_PARAMETER_DECLARATIONS', 479 ', \\\n '.join('const double %s'%p for p in fixed_2d))) 480 if pd_2d: 481 defines.append(('IQXY_WEIGHT_PRODUCT', 482 '*'.join(p+'_w' for p in pd_2d))) 483 defines.append(('IQXY_DISPERSION_LENGTH_DECLARATIONS', 484 ', \\\n '.join('const int N%s'%p for p in pd_2d))) 485 defines.append(('IQXY_DISPERSION_LENGTH_SUM', 486 '+'.join('N'+p for p in pd_2d))) 487 open_loops, close_loops = build_polydispersity_loops(pd_2d) 488 defines.append(('IQXY_OPEN_LOOPS', 489 open_loops.replace('\n',' \\\n'))) 490 defines.append(('IQXY_CLOSE_LOOPS', 491 close_loops.replace('\n',' \\\n'))) 492 if info['Iqxy'] is not None: 493 defines.append(('IQXY_PARAMETER_DECLARATIONS', 494 ', '.join('double '+p for p in iqxy_parameters))) 495 fn = """\ 496 double Iqxy(double qx, double qy, IQXY_PARAMETER_DECLARATIONS); 497 double Iqxy(double qx, double qy, IQXY_PARAMETER_DECLARATIONS) { 498 %(body)s 499 } 500 """%{'body':info['Iqxy']} 501 source.append(fn) 502 503 # Need to know if we have a theta parameter for Iqxy; it is not there 504 # for the magnetic sphere model, for example, which has a magnetic 505 # orientation but no shape orientation. 506 if 'theta' in pd_2d: 507 defines.append(('IQXY_HAS_THETA', '1')) 508 509 #for d in defines: print d 510 DEFINES='\n'.join('#define %s %s'%(k,v) for k,v in defines) 511 SOURCES='\n\n'.join(source) 512 return C_KERNEL_TEMPLATE%{ 513 'DEFINES':DEFINES, 514 'SOURCES':SOURCES, 515 } 516 710 517 def make(kernel_module): 711 518 """ … … 717 524 """ 718 525 # TODO: allow Iq and Iqxy to be defined in python 719 from os.path import abspath720 526 #print kernelfile 721 527 info = dict( … … 737 543 info['defaults'] = dict((p[0],p[2]) for p in info['parameters']) 738 544 739 source = make_model(info)740 545 # Assume if one part of the kernel is python then all parts are. 546 source = make_model(info) if not callable(info['Iq']) else None 741 547 return source, info 742 548
Note: See TracChangeset
for help on using the changeset viewer.