Changeset 74e9b5f in sasmodels
- Timestamp:
- Oct 12, 2018 10:52:48 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 4de14584
- Parents:
- b0de252
- Location:
- sasmodels
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel_header.c
rb0de252 r74e9b5f 5 5 #elif defined(_OPENMP) 6 6 # define USE_OPENMP 7 #elif defined(__CUDACC__)8 # define USE_CUDA9 7 #endif 10 8 … … 17 15 18 16 #define USE_GPU 17 #define pglobal global 18 #define pconstant constant 19 19 20 typedef int int32_t; 20 #define global_par global21 #define local_par local22 #define constant_par constant23 #define global_var global24 #define local_var local25 #define constant_var constant26 #define __device__27 21 28 22 #if defined(USE_SINCOS) … … 45 39 46 40 #define USE_GPU 47 #define global_par 48 #define local_par 49 #define constant_par const 50 #define global_var 51 #define local_var __shared__ 52 #define constant_var __constant__ 53 41 #define local __shared__ 42 #define pglobal 43 #define constant __constant__ 44 #define pconstant const 54 45 #define kernel extern "C" __global__ 55 46 … … 67 58 #else // !USE_OPENCL && !USE_CUDA 68 59 69 #define global_par 70 #define local_par 71 #define constant_par const 72 #define global_var 73 #define local_var 74 #define constant_var const 75 #define __device__ 60 #define local 61 #define pglobal 62 #define constant const 63 #define pconstant const 76 64 77 65 #ifdef __cplusplus … … 193 181 # define M_4PI_3 4.18879020478639 194 182 #endif 195 __device__196 183 inline double square(double x) { return x*x; } 197 __device__198 184 inline double cube(double x) { return x*x*x; } 199 __device__200 185 inline double sas_sinx_x(double x) { return x==0 ? 1.0 : sin(x)/x; } 201 186 -
sasmodels/kernel_iq.c
rb0de252 r74e9b5f 67 67 68 68 // Return value restricted between low and high 69 __device__70 69 static double clip(double value, double low, double high) 71 70 { … … 80 79 // du * (m_sigma_y + 1j*m_sigma_z); 81 80 // weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag 82 __device__83 81 static void set_spin_weights(double in_spin, double out_spin, double weight[6]) 84 82 { … … 105 103 106 104 // Compute the magnetic sld 107 __device__108 105 static double mag_sld( 109 106 const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag … … 154 151 // jitter angles (dtheta, dphi). This matrix can be applied to all of the 155 152 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]' 156 __device__157 153 static void 158 154 qac_rotation( … … 188 184 // Apply the rotation matrix returned from qac_rotation to the point (qx,qy), 189 185 // returning R*[qx,qy]' = [qa,qc]' 190 __device__191 186 static void 192 187 qac_apply( … … 216 211 // jitter angles (dtheta, dphi, dpsi). This matrix can be applied to all of the 217 212 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]' 218 __device__219 213 static void 220 214 qabc_rotation( … … 263 257 // Apply the rotation matrix returned from qabc_rotation to the point (qx,qy), 264 258 // returning R*[qx,qy]' = [qa,qb,qc]' 265 __device__266 259 static void 267 260 qabc_apply( … … 285 278 const int32_t pd_start, // where we are in the dispersity loop 286 279 const int32_t pd_stop, // where we are stopping in the dispersity loop 287 global_parconst ProblemDetails *details,288 global_parconst double *values,289 global_parconst double *q, // nq q values, with padding to boundary290 global_pardouble *result, // nq+1 return values, again with padding280 pglobal const ProblemDetails *details, 281 pglobal const double *values, 282 pglobal const double *q, // nq q values, with padding to boundary 283 pglobal double *result, // nq+1 return values, again with padding 291 284 const double cutoff // cutoff in the dispersity weight product 292 285 ) … … 386 379 const int n4 = pd_length[4]; 387 380 const int p4 = pd_par[4]; 388 global_varconst double *v4 = pd_value + pd_offset[4];389 global_varconst double *w4 = pd_weight + pd_offset[4];381 pglobal const double *v4 = pd_value + pd_offset[4]; 382 pglobal const double *w4 = pd_weight + pd_offset[4]; 390 383 int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start 391 384 … … 573 566 const int n##_LOOP = details->pd_length[_LOOP]; \ 574 567 const int p##_LOOP = details->pd_par[_LOOP]; \ 575 global_varconst double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \576 global_varconst double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \568 pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 569 pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 577 570 int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 578 571 … … 598 591 // Pointers to the start of the dispersity and weight vectors, if needed. 599 592 #if MAX_PD>0 600 global_varconst double *pd_value = values + NUM_VALUES;601 global_varconst double *pd_weight = pd_value + details->num_weights;593 pglobal const double *pd_value = values + NUM_VALUES; 594 pglobal const double *pd_weight = pd_value + details->num_weights; 602 595 #endif 603 596 -
sasmodels/kernelcuda.py
rb0de252 r74e9b5f 62 62 import logging 63 63 import time 64 import re 64 65 65 66 import numpy as np # type: ignore … … 146 147 return dtype in (generate.F32, generate.F64) 147 148 149 150 FUNCTION_PATTERN = re.compile(r"""^ 151 (?P<space>\s*) # initial space 152 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # one or more qualifiers before function 153 (?P<function>\s*\b\w+\b\s*[(]) # function name plus open parens 154 """, re.VERBOSE|re.MULTILINE) 155 156 MARKED_PATTERN = re.compile(r""" 157 \b(return|else|kernel|device|__device__)\b 158 """, re.VERBOSE|re.MULTILINE) 159 160 def _add_device_tag(match): 161 # type: (None) -> str 162 # Note: should be re.Match, but that isn't a simple type 163 """ 164 replace qualifiers with __device__ qualifiers if needed 165 """ 166 qualifiers = match.group("qualifiers") 167 if MARKED_PATTERN.search(qualifiers): 168 start, end = match.span() 169 return match.string[start:end] 170 else: 171 function = match.group("function") 172 space = match.group("space") 173 return "".join((space, "__device__ ", qualifiers, function)) 174 175 def mark_device_functions(source): 176 # type: (str) -> str 177 """ 178 Mark all function declarations as __device__ functions (except kernel). 179 """ 180 return FUNCTION_PATTERN.sub(_add_device_tag, source) 181 182 def show_device_functions(source): 183 # type: (str) -> str 184 """ 185 Show all discovered function declarations, but don't change any. 186 """ 187 for match in FUNCTION_PATTERN.finditer(source): 188 print(match.group('qualifiers').replace('\n',r'\n'), match.group('function'), '(') 189 return source 190 148 191 def compile_model(source, dtype, fast=False): 149 192 # type: (str, np.dtype, bool) -> SourceModule … … 163 206 source_list.insert(0, "#define USE_SINCOS\n") 164 207 source = "\n".join(source_list) 165 options = '-use_fast_math' if fast else None 208 #source = show_device_functions(source) 209 source = mark_device_functions(source) 210 #with open('/tmp/kernel.cu', 'w') as fd: fd.write(source) 211 #print(source) 212 #options = ['--verbose', '-E'] 213 options = ['--use_fast_math'] if fast else None 166 214 program = SourceModule(source, no_extern_c=True, options=options) # include_dirs=[...] 215 167 216 #print("done with "+program) 168 217 return program -
sasmodels/model_test.py
r012cd34 r74e9b5f 5 5 Usage:: 6 6 7 python -m sasmodels.model_test [opencl| dll|opencl_and_dll] model1 model2 ...7 python -m sasmodels.model_test [opencl|cuda|dll] model1 model2 ... 8 8 9 9 if model1 is 'all', then all except the remaining models will be tested … … 63 63 from .modelinfo import expand_pars 64 64 from .kernelcl import use_opencl 65 from .kernelcuda import use_cuda 65 66 66 67 # pylint: disable=unused-import … … 80 81 Construct the pyunit test suite. 81 82 82 *loaders* is the list of kernel drivers to use, which is one of 83 *["dll", "opencl"]*, *["dll"]* or *["opencl"]*. For python models, 84 the python driver is always used. 83 *loaders* is the list of kernel drivers to use (dll, opencl or cuda). 84 For python model the python driver is always used. 85 85 86 86 *models* is the list of models to test, or *["all"]* to test all models. … … 135 135 136 136 # test using dll if desired 137 if 'dll' in loaders or not use_opencl():137 if 'dll' in loaders: 138 138 test_name = "%s-dll"%model_name 139 139 test_method_name = "test_%s_dll" % model_info.id … … 156 156 test_method_name, 157 157 platform="ocl", dtype=None, 158 stash=stash) 159 #print("defining", test_name) 160 suite.addTest(test) 161 162 # test using cuda if desired and available 163 if 'cuda' in loaders and use_cuda(): 164 test_name = "%s-cuda"%model_name 165 test_method_name = "test_%s_cuda" % model_info.id 166 # Using dtype=None so that the models that are only 167 # correct for double precision are not tested using 168 # single precision. The choice is determined by the 169 # presence of *single=False* in the model file. 170 test = ModelTestCase(test_name, model_info, 171 test_method_name, 172 platform="cuda", dtype=None, 158 173 stash=stash) 159 174 #print("defining", test_name) … … 220 235 221 236 # Check for missing tests. Only do so for the "dll" tests 222 # to reduce noise from both opencl and dll, and because237 # to reduce noise from both opencl and cuda, and because 223 238 # python kernels use platform="dll". 224 239 if self.platform == "dll": … … 368 383 369 384 # Build a test suite containing just the model 370 loader s = ['opencl'] if use_opencl() else ['dll']385 loader = 'opencl' if use_opencl() else 'cuda' if use_cuda() else 'dll' 371 386 models = [model] 372 387 try: 373 suite = make_suite( loaders, models)388 suite = make_suite([loader], models) 374 389 except Exception: 375 390 import traceback … … 434 449 loaders = ['opencl'] 435 450 models = models[1:] 451 elif models and models[0] == 'cuda': 452 if not use_cuda(): 453 print("cuda is not available") 454 return 1 455 loaders = ['cuda'] 456 models = models[1:] 436 457 elif models and models[0] == 'dll': 437 458 # TODO: test if compiler is available? 438 459 loaders = ['dll'] 439 460 models = models[1:] 440 elif models and models[0] == 'opencl_and_dll':441 loaders = ['opencl', 'dll'] if use_opencl() else ['dll']442 models = models[1:]443 461 else: 444 loaders = ['opencl', 'dll'] if use_opencl() else ['dll'] 462 loaders = ['dll'] 463 if use_opencl(): 464 loaders.append('opencl') 465 if use_cuda(): 466 loaders.append('cuda') 445 467 if not models: 446 468 print("""\ 447 469 usage: 448 python -m sasmodels.model_test [-v] [opencl| dll] model1 model2 ...470 python -m sasmodels.model_test [-v] [opencl|cuda|dll] model1 model2 ... 449 471 450 472 If -v is included on the command line, then use verbose output. 451 473 452 If n either opencl nor dll is specified, then models will be tested with453 both OpenCL and dll; the compute target is ignored for pure python models.474 If no platform is specified, then models will be tested with dll, and 475 if available, OpenCL and CUDA; the compute target is ignored for pure python models. 454 476 455 477 If model1 is 'all', then all except the remaining models will be tested. … … 471 493 Run "nosetests sasmodels" on the command line to invoke it. 472 494 """ 473 loaders = ['opencl', 'dll'] if use_opencl() else ['dll'] 495 loaders = ['dll'] 496 if use_opencl(): 497 loaders.append('opencl') 498 if use_cuda(): 499 loaders.append('cuda') 474 500 tests = make_suite(loaders, ['all']) 475 501 def build_test(test): -
sasmodels/models/cylinder.c
r0db7dbd r74e9b5f 1 1 #define INVALID(v) (v.radius<0 || v.length<0) 2 2 3 __device__4 3 static double 5 4 form_volume(double radius, double length) … … 8 7 } 9 8 10 __device__11 9 static double 12 10 fq(double qab, double qc, double radius, double length) … … 15 13 } 16 14 17 __device__18 15 static double 19 16 orient_avg_1D(double q, double radius, double length) … … 36 33 } 37 34 38 __device__39 35 static double 40 36 Iq(double q, … … 48 44 } 49 45 50 __device__51 46 static double 52 47 Iqac(double qab, double qc, -
sasmodels/models/lib/gauss76.c
r0db7dbd r74e9b5f 11 11 12 12 // Gaussians 13 constant _vardouble Gauss76Wt[76] = {13 constant double Gauss76Wt[76] = { 14 14 .00126779163408536, //0 15 15 .00294910295364247, … … 90 90 }; 91 91 92 constant _vardouble Gauss76Z[76] = {92 constant double Gauss76Z[76] = { 93 93 -.999505948362153, //0 94 94 -.997397786355355, -
sasmodels/models/lib/polevl.c
r0db7dbd r74e9b5f 51 51 */ 52 52 53 __device__static54 double polevl( double x, constant_pardouble *coef, int N )53 static 54 double polevl( double x, pconstant double *coef, int N ) 55 55 { 56 56 … … 72 72 */ 73 73 74 __device__static75 double p1evl( double x, constant_pardouble *coef, int N )74 static 75 double p1evl( double x, pconstant double *coef, int N ) 76 76 { 77 77 int i=0; -
sasmodels/models/lib/sas_J1.c
r0db7dbd r74e9b5f 43 43 //Cephes double pression function 44 44 45 constant _vardouble RPJ1[8] = {45 constant double RPJ1[8] = { 46 46 -8.99971225705559398224E8, 47 47 4.52228297998194034323E11, … … 53 53 0.0 }; 54 54 55 constant _vardouble RQJ1[8] = {55 constant double RQJ1[8] = { 56 56 6.20836478118054335476E2, 57 57 2.56987256757748830383E5, … … 64 64 }; 65 65 66 constant _vardouble PPJ1[8] = {66 constant double PPJ1[8] = { 67 67 7.62125616208173112003E-4, 68 68 7.31397056940917570436E-2, … … 75 75 76 76 77 constant _vardouble PQJ1[8] = {77 constant double PQJ1[8] = { 78 78 5.71323128072548699714E-4, 79 79 6.88455908754495404082E-2, … … 85 85 0.0 }; 86 86 87 constant _vardouble QPJ1[8] = {87 constant double QPJ1[8] = { 88 88 5.10862594750176621635E-2, 89 89 4.98213872951233449420E0, … … 95 95 2.52070205858023719784E1 }; 96 96 97 constant _vardouble QQJ1[8] = {97 constant double QQJ1[8] = { 98 98 7.42373277035675149943E1, 99 99 1.05644886038262816351E3, … … 105 105 0.0 }; 106 106 107 __device__static107 static 108 108 double cephes_j1(double x) 109 109 { … … 155 155 #else 156 156 //Single precission version of cephes 157 constant _varfloat JPJ1[8] = {157 constant float JPJ1[8] = { 158 158 -4.878788132172128E-009, 159 159 6.009061827883699E-007, … … 166 166 }; 167 167 168 constant _varfloat MO1J1[8] = {168 constant float MO1J1[8] = { 169 169 6.913942741265801E-002, 170 170 -2.284801500053359E-001, … … 177 177 }; 178 178 179 constant _varfloat PH1J1[8] = {179 constant float PH1J1[8] = { 180 180 -4.497014141919556E+001, 181 181 5.073465654089319E+001, … … 188 188 }; 189 189 190 __device__static190 static 191 191 float cephes_j1f(float xx) 192 192 { … … 239 239 240 240 //Finally J1c function that equals 2*J1(x)/x 241 __device__static241 static 242 242 double sas_2J1x_x(double x) 243 243 {
Note: See TracChangeset
for help on using the changeset viewer.