Changeset 0db7dbd in sasmodels
- Timestamp:
- Feb 16, 2018 7:10:04 PM (7 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 47fb816
- Parents:
- aa90015
- Location:
- sasmodels
- Files:
-
- 1 added
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/core.py
r3221de0 r0db7dbd 13 13 from glob import glob 14 14 import re 15 16 # Set "SAS_OPENCL=cuda" in the environment to use the CUDA rather than OpenCL 17 USE_CUDA = os.environ.get("SAS_OPENCL", "") == "cuda" 15 18 16 19 import numpy as np # type: ignore … … 21 24 from . import mixture 22 25 from . import kernelpy 23 from . import kernelcl 26 if USE_CUDA: 27 from . import kernelcuda 28 else: 29 from . import kernelcl 24 30 from . import kerneldll 25 31 from . import custom … … 210 216 #print("building dll", numpy_dtype) 211 217 return kerneldll.load_dll(source['dll'], model_info, numpy_dtype) 218 elif USE_CUDA: 219 #print("building cuda", numpy_dtype) 220 return kernelcuda.GpuModel(source, model_info, numpy_dtype, fast=fast) 212 221 else: 213 222 #print("building ocl", numpy_dtype) … … 268 277 if platform is None: 269 278 platform = "ocl" 270 if not kernelcl.use_opencl() or notmodel_info.opencl:279 if not model_info.opencl: 271 280 platform = "dll" 281 elif USE_CUDA: 282 if not kernelcuda.use_cuda(): 283 platform = "dll" 284 else: 285 if not kernelcl.use_opencl(): 286 platform = "dll" 272 287 273 288 # Check if type indicates dll regardless of which platform is given … … 294 309 # Make sure that the type is supported by opencl, otherwise use dll 295 310 if platform == "ocl": 296 env = kernelcl.environment() 311 if USE_CUDA: 312 env = kernelcuda.environment() 313 else: 314 env = kernelcl.environment() 297 315 if not env.has_type(numpy_dtype): 298 316 platform = "dll" -
sasmodels/kernel_header.c
r108e70e r0db7dbd 1 1 #ifdef __OPENCL_VERSION__ 2 2 # define USE_OPENCL 3 #elif defined(__CUDACC__) 4 # define USE_CUDA 3 5 #elif defined(_OPENMP) 4 6 # define USE_OPENMP … … 8 10 // Note: if using a C++ compiler, then define kernel as extern "C" 9 11 #ifdef USE_OPENCL 12 13 #define USE_GPU 10 14 typedef int int32_t; 11 # if defined(USE_SINCOS) 12 # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar) 13 # else 14 # define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) 15 # endif 15 #define global_par global 16 #define local_par local 17 #define constant_par constant 18 #define global_var global 19 #define local_var local 20 #define constant_var constant 21 #define __device__ 22 23 #if defined(USE_SINCOS) 24 # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar) 25 #else 26 # define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) 27 #endif 16 28 // Intel CPU on Mac gives strange values for erf(); on the verified 17 29 // platforms (intel, nvidia, amd), the cephes erf() is significantly … … 24 36 # define erfcf erfc 25 37 #endif 26 #else // !USE_OPENCL 27 // Use SAS_DOUBLE to force the use of double even for float kernels 28 # define SAS_DOUBLE dou ## ble 29 # ifdef __cplusplus 38 39 #elif defined(USE_CUDA) 40 41 #define USE_GPU 42 #define global_par 43 #define local_par 44 #define constant_par const 45 #define global_var 46 #define local_var __shared__ 47 #define constant_var __constant__ 48 49 #define kernel extern "C" __global__ 50 51 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0 52 // OpenCL pown(a,b) = C99 pow(a,b), b integer 53 #define powr(a,b) pow(a,b) 54 #define pown(a,b) pow(a,b) 55 //typedef int int32_t; 56 #if defined(USE_SINCOS) 57 # define SINCOS(angle,svar,cvar) sincos(angle,&svar,&cvar) 58 #else 59 # define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) 60 #endif 61 62 #else // !USE_OPENCL && !USE_CUDA 63 64 #define global_par 65 #define local_par 66 #define constant_par const 67 #define global_var 68 #define local_var 69 #define constant_var const 70 #define __device__ 71 72 #ifdef __cplusplus 30 73 #include <cstdio> 31 74 #include <cmath> … … 51 94 #endif 52 95 inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); } 53 #else // !__cplusplus96 #else // !__cplusplus 54 97 #include <inttypes.h> // C99 guarantees that int32_t types is here 55 98 #include <stdio.h> … … 76 119 #define kernel 77 120 #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) 78 # endif // !__cplusplus 79 # define global 80 # define local 81 # define constant const 82 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0 83 // OpenCL pown(a,b) = C99 pow(a,b), b integer 84 # define powr(a,b) pow(a,b) 85 # define pown(a,b) pow(a,b) 121 #endif // !__cplusplus 122 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0 123 // OpenCL pown(a,b) = C99 pow(a,b), b integer 124 #define powr(a,b) pow(a,b) 125 #define pown(a,b) pow(a,b) 126 86 127 #endif // !USE_OPENCL 128 129 // Use SAS_DOUBLE to force the use of double even for float kernels 130 #define SAS_DOUBLE dou ## ble 87 131 88 132 #if defined(NEED_EXPM1) … … 147 191 # define M_4PI_3 4.18879020478639 148 192 #endif 193 __device__ 149 194 inline double square(double x) { return x*x; } 195 __device__ 150 196 inline double cube(double x) { return x*x*x; } 197 __device__ 151 198 inline double sas_sinx_x(double x) { return x==0 ? 1.0 : sin(x)/x; } 152 199 -
sasmodels/kernel_iq.c
raadec17 r0db7dbd 67 67 68 68 // Return value restricted between low and high 69 __device__ 69 70 static double clip(double value, double low, double high) 70 71 { … … 79 80 // du * (m_sigma_y + 1j*m_sigma_z); 80 81 // weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag 82 __device__ 81 83 static void set_spin_weights(double in_spin, double out_spin, double spins[4]) 82 84 { … … 92 94 93 95 // Compute the magnetic sld 96 __device__ 94 97 static double mag_sld( 95 98 const unsigned int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag … … 140 143 // jitter angles (dtheta, dphi). This matrix can be applied to all of the 141 144 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]' 145 __device__ 142 146 static void 143 147 qac_rotation( … … 173 177 // Apply the rotation matrix returned from qac_rotation to the point (qx,qy), 174 178 // returning R*[qx,qy]' = [qa,qc]' 175 static double 179 __device__ 180 static void 176 181 qac_apply( 177 182 QACRotation *rotation, … … 200 205 // jitter angles (dtheta, dphi, dpsi). This matrix can be applied to all of the 201 206 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]' 207 __device__ 202 208 static void 203 209 qabc_rotation( … … 246 252 // Apply the rotation matrix returned from qabc_rotation to the point (qx,qy), 247 253 // returning R*[qx,qy]' = [qa,qb,qc]' 248 static double 254 __device__ 255 static void 249 256 qabc_apply( 250 257 QABCRotation *rotation, … … 267 274 const int32_t pd_start, // where we are in the dispersity loop 268 275 const int32_t pd_stop, // where we are stopping in the dispersity loop 269 global const ProblemDetails *details,270 global const double *values,271 global const double *q, // nq q values, with padding to boundary272 global double *result, // nq+1 return values, again with padding276 global_par const ProblemDetails *details, 277 global_par const double *values, 278 global_par const double *q, // nq q values, with padding to boundary 279 global_par double *result, // nq+1 return values, again with padding 273 280 const double cutoff // cutoff in the dispersity weight product 274 281 ) 275 282 { 276 #if def USE_OPENCL283 #if defined(USE_GPU) 277 284 // who we are and what element we are working with 285 #if defined(USE_OPENCL) 278 286 const int q_index = get_global_id(0); 287 #else // USE_CUDA 288 const int q_index = threadIdx.x + blockIdx.x * blockDim.x; 289 #endif 279 290 if (q_index >= nq) return; 280 291 #else … … 329 340 // seeing one q value (stored in the variable "this_result") while the dll 330 341 // version must loop over all q. 331 #if def USE_OPENCL342 #if defined(USE_GPU) 332 343 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 333 344 double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 334 #else // !USE_ OPENCL345 #else // !USE_GPU 335 346 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 336 347 if (pd_start == 0) { … … 341 352 } 342 353 //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 343 #endif // !USE_ OPENCL354 #endif // !USE_GPU 344 355 345 356 … … 364 375 const int n4 = pd_length[4]; 365 376 const int p4 = pd_par[4]; 366 global const double *v4 = pd_value + pd_offset[4];367 global const double *w4 = pd_weight + pd_offset[4];377 global_var const double *v4 = pd_value + pd_offset[4]; 378 global_var const double *w4 = pd_weight + pd_offset[4]; 368 379 int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start 369 380 … … 551 562 const int n##_LOOP = details->pd_length[_LOOP]; \ 552 563 const int p##_LOOP = details->pd_par[_LOOP]; \ 553 global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \554 global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \564 global_var const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 565 global_var const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 555 566 int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 556 567 … … 576 587 // Pointers to the start of the dispersity and weight vectors, if needed. 577 588 #if MAX_PD>0 578 global const double *pd_value = values + NUM_VALUES;579 global const double *pd_weight = pd_value + details->num_weights;589 global_var const double *pd_value = values + NUM_VALUES; 590 global_var const double *pd_weight = pd_value + details->num_weights; 580 591 #endif 581 592 … … 637 648 BUILD_ROTATION(); 638 649 639 #if ndef USE_OPENCL650 #if !defined(USE_GPU) 640 651 // DLL needs to explicitly loop over the q values. 641 652 #ifdef USE_OPENMP … … 643 654 #endif 644 655 for (q_index=0; q_index<nq; q_index++) 645 #endif // !USE_ OPENCL656 #endif // !USE_GPU 646 657 { 647 658 … … 684 695 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0); 685 696 686 #if def USE_OPENCL697 #if defined(USE_GPU) 687 698 this_result += weight * scattering; 688 #else // !USE_ OPENCL699 #else // !USE_GPU 689 700 result[q_index] += weight * scattering; 690 #endif // !USE_ OPENCL701 #endif // !USE_GPU 691 702 } 692 703 } … … 712 723 713 724 // Remember the current result and the updated norm. 714 #if def USE_OPENCL725 #if defined(USE_GPU) 715 726 result[q_index] = this_result; 716 727 if (q_index == 0) result[nq] = pd_norm; 717 728 //if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 718 #else // !USE_ OPENCL729 #else // !USE_GPU 719 730 result[nq] = pd_norm; 720 731 //printf("res: %g/%g\n", result[0], pd_norm); 721 #endif // !USE_ OPENCL732 #endif // !USE_GPU 722 733 723 734 // ** clear the macros in preparation for the next kernel ** -
sasmodels/models/cylinder.c
r108e70e r0db7dbd 1 1 #define INVALID(v) (v.radius<0 || v.length<0) 2 2 3 __device__ 3 4 static double 4 5 form_volume(double radius, double length) … … 7 8 } 8 9 10 __device__ 9 11 static double 10 12 fq(double qab, double qc, double radius, double length) … … 13 15 } 14 16 17 __device__ 15 18 static double 16 19 orient_avg_1D(double q, double radius, double length) … … 33 36 } 34 37 38 __device__ 35 39 static double 36 40 Iq(double q, … … 44 48 } 45 49 50 __device__ 46 51 static double 47 52 Iqac(double qab, double qc, -
sasmodels/models/lib/gauss76.c
r99b84ec r0db7dbd 11 11 12 12 // Gaussians 13 constant double Gauss76Wt[76]={13 constant_var double Gauss76Wt[76] = { 14 14 .00126779163408536, //0 15 15 .00294910295364247, … … 90 90 }; 91 91 92 constant double Gauss76Z[76]={92 constant_var double Gauss76Z[76] = { 93 93 -.999505948362153, //0 94 94 -.997397786355355, -
sasmodels/models/lib/polevl.c
r447e9aa r0db7dbd 51 51 */ 52 52 53 double polevl( double x, constant double *coef, int N ); 54 double polevl( double x, constant double *coef, int N )53 __device__ static 54 double polevl( double x, constant_par double *coef, int N ) 55 55 { 56 56 … … 72 72 */ 73 73 74 double p1evl( double x, constant double *coef, int N ); 75 double p1evl( double x, constant double *coef, int N )74 __device__ static 75 double p1evl( double x, constant_par double *coef, int N ) 76 76 { 77 77 int i=0; -
sasmodels/models/lib/sas_J1.c
r5181ccc r0db7dbd 42 42 #if FLOAT_SIZE>4 43 43 //Cephes double pression function 44 double cephes_j1(double x); 45 46 constant double RPJ1[8] = { 44 45 constant_var double RPJ1[8] = { 47 46 -8.99971225705559398224E8, 48 47 4.52228297998194034323E11, … … 54 53 0.0 }; 55 54 56 constant double RQJ1[8] = {55 constant_var double RQJ1[8] = { 57 56 6.20836478118054335476E2, 58 57 2.56987256757748830383E5, … … 65 64 }; 66 65 67 constant double PPJ1[8] = {66 constant_var double PPJ1[8] = { 68 67 7.62125616208173112003E-4, 69 68 7.31397056940917570436E-2, … … 76 75 77 76 78 constant double PQJ1[8] = {77 constant_var double PQJ1[8] = { 79 78 5.71323128072548699714E-4, 80 79 6.88455908754495404082E-2, … … 86 85 0.0 }; 87 86 88 constant double QPJ1[8] = {87 constant_var double QPJ1[8] = { 89 88 5.10862594750176621635E-2, 90 89 4.98213872951233449420E0, … … 96 95 2.52070205858023719784E1 }; 97 96 98 constant double QQJ1[8] = {97 constant_var double QQJ1[8] = { 99 98 7.42373277035675149943E1, 100 99 1.05644886038262816351E3, … … 106 105 0.0 }; 107 106 107 __device__ static 108 108 double cephes_j1(double x) 109 109 { … … 155 155 #else 156 156 //Single precission version of cephes 157 float cephes_j1f(float x); 158 159 constant float JPJ1[8] = { 157 constant_var float JPJ1[8] = { 160 158 -4.878788132172128E-009, 161 159 6.009061827883699E-007, … … 168 166 }; 169 167 170 constant float MO1J1[8] = {168 constant_var float MO1J1[8] = { 171 169 6.913942741265801E-002, 172 170 -2.284801500053359E-001, … … 179 177 }; 180 178 181 constant float PH1J1[8] = {179 constant_var float PH1J1[8] = { 182 180 -4.497014141919556E+001, 183 181 5.073465654089319E+001, … … 190 188 }; 191 189 190 __device__ static 192 191 float cephes_j1f(float xx) 193 192 { … … 240 239 241 240 //Finally J1c function that equals 2*J1(x)/x 242 double sas_2J1x_x(double x); 241 __device__ static 243 242 double sas_2J1x_x(double x) 244 243 {
Note: See TracChangeset
for help on using the changeset viewer.