Changeset 0d26e91 in sasmodels
- Timestamp:
- Mar 6, 2019 3:44:39 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 15f5138
- Parents:
- da3638f (diff), e589e9a (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - git-author:
- Paul Kienzle <pkienzle@…> (03/06/19 15:44:39)
- git-committer:
- GitHub <noreply@…> (03/06/19 15:44:39)
- Location:
- sasmodels
- Files:
-
- 16 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/compare.py
r07646b6 rc1799d3 1152 1152 'rel_err' : True, 1153 1153 'explore' : False, 1154 'use_demo' : True,1154 'use_demo' : False, 1155 1155 'zero' : False, 1156 1156 'html' : False, -
sasmodels/direct_model.py
r5024a56 rc1799d3 332 332 333 333 # Need to pull background out of resolution for multiple scattering 334 background = pars.get('background', DEFAULT_BACKGROUND) 334 default_background = self._model.info.parameters.common_parameters[1].default 335 background = pars.get('background', default_background) 335 336 pars = pars.copy() 336 337 pars['background'] = 0. -
sasmodels/generate.py
rcd28947 ra8a1d48 1006 1006 pars = model_info.parameters.kernel_parameters 1007 1007 else: 1008 pars = model_info.parameters.COMMON + model_info.parameters.kernel_parameters 1008 pars = (model_info.parameters.common_parameters 1009 + model_info.parameters.kernel_parameters) 1009 1010 partable = make_partable(pars) 1010 1011 subst = dict(id=model_info.id.replace('_', '-'), -
sasmodels/jitter.py
r1198f90 r7d97437 15 15 pass 16 16 17 import matplotlib as mpl 17 18 import matplotlib.pyplot as plt 18 19 from matplotlib.widgets import Slider … … 746 747 pass 747 748 748 axcolor = 'lightgoldenrodyellow' 749 # CRUFT: use axisbg instead of facecolor for matplotlib<2 750 facecolor_prop = 'facecolor' if mpl.__version__ > '2' else 'axisbg' 751 props = {facecolor_prop: 'lightgoldenrodyellow'} 749 752 750 753 ## add control widgets to plot 751 axes_theta = plt.axes([0.1, 0.15, 0.45, 0.04], axisbg=axcolor)752 axes_phi = plt.axes([0.1, 0.1, 0.45, 0.04], axisbg=axcolor)753 axes_psi = plt.axes([0.1, 0.05, 0.45, 0.04], axisbg=axcolor)754 axes_theta = plt.axes([0.1, 0.15, 0.45, 0.04], **props) 755 axes_phi = plt.axes([0.1, 0.1, 0.45, 0.04], **props) 756 axes_psi = plt.axes([0.1, 0.05, 0.45, 0.04], **props) 754 757 stheta = Slider(axes_theta, 'Theta', -90, 90, valinit=theta) 755 758 sphi = Slider(axes_phi, 'Phi', -180, 180, valinit=phi) 756 759 spsi = Slider(axes_psi, 'Psi', -180, 180, valinit=psi) 757 760 758 axes_dtheta = plt.axes([0.75, 0.15, 0.15, 0.04], axisbg=axcolor)759 axes_dphi = plt.axes([0.75, 0.1, 0.15, 0.04], axisbg=axcolor)760 axes_dpsi = plt.axes([0.75, 0.05, 0.15, 0.04], axisbg=axcolor)761 axes_dtheta = plt.axes([0.75, 0.15, 0.15, 0.04], **props) 762 axes_dphi = plt.axes([0.75, 0.1, 0.15, 0.04], **props) 763 axes_dpsi = plt.axes([0.75, 0.05, 0.15, 0.04], **props) 761 764 # Note: using ridiculous definition of rectangle distribution, whose width 762 765 # in sasmodels is sqrt(3) times the given width. Divide by sqrt(3) to keep -
sasmodels/kernelcl.py
r3199b17 r0d26e91 58 58 import time 59 59 60 try: 61 from time import perf_counter as clock 62 except ImportError: # CRUFT: python < 3.3 63 import sys 64 if sys.platform.count("darwin") > 0: 65 from time import time as clock 66 else: 67 from time import clock 68 60 69 import numpy as np # type: ignore 61 62 70 63 71 # Attempt to setup OpenCL. This may fail if the pyopencl package is not … … 592 600 #call_details.show(values) 593 601 wait_for = None 594 last_nap = time.clock()602 last_nap = clock() 595 603 step = 1000000//self.q_input.nq + 1 596 604 for start in range(0, call_details.num_eval, step): … … 603 611 # Allow other processes to run. 604 612 wait_for[0].wait() 605 current_time = time.clock()613 current_time = clock() 606 614 if current_time - last_nap > 0.5: 607 615 time.sleep(0.001) -
sasmodels/modelinfo.py
r39a06c9 rc1799d3 404 404 parameters counted as n individual parameters p1, p2, ... 405 405 406 * *common_parameters* is the list of common parameters, with a unique 407 copy for each model so that structure factors can have a default 408 background of 0.0. 409 406 410 * *call_parameters* is the complete list of parameters to the kernel, 407 411 including scale and background, with vector parameters recorded as … … 416 420 parameters don't use vector notation, and instead use p1, p2, ... 417 421 """ 418 # scale and background are implicit parameters419 COMMON = [Parameter(*p) for p in COMMON_PARAMETERS]420 421 422 def __init__(self, parameters): 422 423 # type: (List[Parameter]) -> None 424 425 # scale and background are implicit parameters 426 # Need them to be unique to each model in case they have different 427 # properties, such as default=0.0 for structure factor backgrounds. 428 self.common_parameters = [Parameter(*p) for p in COMMON_PARAMETERS] 429 423 430 self.kernel_parameters = parameters 424 431 self._set_vector_lengths() … … 468 475 if p.polydisperse and p.type not in ('orientation', 'magnetic')) 469 476 self.pd_2d = set(p.name for p in self.call_parameters if p.polydisperse) 477 478 def set_zero_background(self): 479 """ 480 Set the default background to zero for this model. This is done for 481 structure factor models. 482 """ 483 # type: () -> None 484 # Make sure background is the second common parameter. 485 assert self.common_parameters[1].id == "background" 486 self.common_parameters[1].default = 0.0 487 self.defaults = self._get_defaults() 470 488 471 489 def check_angles(self): … … 567 585 def _get_call_parameters(self): 568 586 # type: () -> List[Parameter] 569 full_list = self. COMMON[:]587 full_list = self.common_parameters[:] 570 588 for p in self.kernel_parameters: 571 589 if p.length == 1: … … 670 688 671 689 # Gather the user parameters in order 672 result = control + self. COMMON690 result = control + self.common_parameters 673 691 for p in self.kernel_parameters: 674 692 if not is2d and p.type in ('orientation', 'magnetic'): … … 770 788 771 789 info = ModelInfo() 790 791 # Build the parameter table 772 792 #print("make parameter table", kernel_module.parameters) 773 793 parameters = make_parameter_table(getattr(kernel_module, 'parameters', [])) 794 795 # background defaults to zero for structure factor models 796 structure_factor = getattr(kernel_module, 'structure_factor', False) 797 if structure_factor: 798 parameters.set_zero_background() 799 800 # TODO: remove demo parameters 801 # The plots in the docs are generated from the model default values. 802 # Sascomp set parameters from the command line, and so doesn't need 803 # demo values for testing. 774 804 demo = expand_pars(parameters, getattr(kernel_module, 'demo', None)) 805 775 806 filename = abspath(kernel_module.__file__).replace('.pyc', '.py') 776 807 kernel_id = splitext(basename(filename))[0] -
sasmodels/models/hardsphere.py
r304c775 rc1799d3 162 162 return pars 163 163 164 demo = dict(radius_effective=200, volfraction=0.2,165 radius_effective_pd=0.1, radius_effective_pd_n=40)166 164 # Q=0.001 is in the Taylor series, low Q part, so add Q=0.1, 167 165 # assuming double precision sasview is correct -
sasmodels/resolution.py
rda3638f r0d26e91 445 445 q = np.sort(q) 446 446 if q_min + 2*MINIMUM_RESOLUTION < q[0]: 447 n_low = np.ceil((q[0]-q_min) / (q[1]-q[0])) if q[1] > q[0] else 15448 q_low = np.linspace(q_min, q[0], int(n_low)+1)[:-1]447 n_low = int(np.ceil((q[0]-q_min) / (q[1]-q[0]))) if q[1] > q[0] else 15 448 q_low = np.linspace(q_min, q[0], n_low+1)[:-1] 449 449 else: 450 450 q_low = [] 451 451 if q_max - 2*MINIMUM_RESOLUTION > q[-1]: 452 n_high = np.ceil((q_max-q[-1]) / (q[-1]-q[-2])) if q[-1] > q[-2] else 15453 q_high = np.linspace(q[-1], q_max, int(n_high)+1)[1:]452 n_high = int(np.ceil((q_max-q[-1]) / (q[-1]-q[-2]))) if q[-1] > q[-2] else 15 453 q_high = np.linspace(q[-1], q_max, n_high+1)[1:] 454 454 else: 455 455 q_high = [] … … 498 498 if q_min < 0: 499 499 q_min = q[0]*MINIMUM_ABSOLUTE_Q 500 n_low = np.ceil(log_delta_q * (log(q[0])-log(q_min)))501 q_low = np.logspace(log10(q_min), log10(q[0]), int(n_low)+1)[:-1]500 n_low = int(np.ceil(log_delta_q * (log(q[0])-log(q_min)))) 501 q_low = np.logspace(log10(q_min), log10(q[0]), n_low+1)[:-1] 502 502 else: 503 503 q_low = [] 504 504 if q_max > q[-1]: 505 n_high = np.ceil(log_delta_q * (log(q_max)-log(q[-1])))506 q_high = np.logspace(log10(q[-1]), log10(q_max), int(n_high)+1)[1:]505 n_high = int(np.ceil(log_delta_q * (log(q_max)-log(q[-1])))) 506 q_high = np.logspace(log10(q[-1]), log10(q_max), n_high+1)[1:] 507 507 else: 508 508 q_high = [] -
sasmodels/sasview_model.py
r3a1afed ra8a1d48 382 382 hidden.add('scale') 383 383 hidden.add('background') 384 self._model_info.parameters.defaults['background'] = 0.385 384 386 385 # Update the parameter lists to exclude any hidden parameters … … 931 930 CylinderModel().evalDistribution([0.1, 0.1]) 932 931 932 def test_structure_factor_background(): 933 # type: () -> None 934 """ 935 Check that sasview model and direct model match, with background=0. 936 """ 937 from .data import empty_data1D 938 from .core import load_model_info, build_model 939 from .direct_model import DirectModel 940 941 model_name = "hardsphere" 942 q = [0.0] 943 944 sasview_model = _make_standard_model(model_name)() 945 sasview_value = sasview_model.evalDistribution(np.array(q))[0] 946 947 data = empty_data1D(q) 948 model_info = load_model_info(model_name) 949 model = build_model(model_info) 950 direct_model = DirectModel(data, model) 951 direct_value_zero_background = direct_model(background=0.0) 952 953 assert sasview_value == direct_value_zero_background 954 955 # Additionally check that direct value background defaults to zero 956 direct_value_default = direct_model() 957 assert sasview_value == direct_value_default 958 959 933 960 def magnetic_demo(): 934 961 Model = _make_standard_model('sphere') … … 951 978 #print("rpa:", test_rpa()) 952 979 #test_empty_distribution() 980 #test_structure_factor_background() -
sasmodels/weights.py
r3d58247 re2592f0 23 23 default = dict(npts=35, width=0, nsigmas=3) 24 24 def __init__(self, npts=None, width=None, nsigmas=None): 25 self.npts = self.default['npts'] if npts is None else npts25 self.npts = self.default['npts'] if npts is None else int(npts) 26 26 self.width = self.default['width'] if width is None else width 27 27 self.nsigmas = self.default['nsigmas'] if nsigmas is None else nsigmas -
sasmodels/kernel.py
rcd28947 r36a2418 23 23 # pylint: enable=unused-import 24 24 25 25 26 class KernelModel(object): 26 27 info = None # type: ModelInfo … … 33 34 # type: () -> None 34 35 pass 36 35 37 36 38 class Kernel(object): -
sasmodels/kernelcuda.py
rf872fd1 rfa26e78 63 63 import time 64 64 import re 65 import atexit 65 66 66 67 import numpy as np # type: ignore 67 68 68 69 69 # Attempt to setup cuda. This may fail if the pycuda package is not70 # Attempt to setup CUDA. This may fail if the pycuda package is not 70 71 # installed or if it is installed but there are no devices available. 71 72 try: … … 107 108 MAX_LOOPS = 2048 108 109 110 109 111 def use_cuda(): 110 env = os.environ.get("SAS_OPENCL", "").lower() 111 return HAVE_CUDA and (env == "" or env.startswith("cuda")) 112 sas_opencl = os.environ.get("SAS_OPENCL", "CUDA").lower() 113 return HAVE_CUDA and sas_opencl.startswith("cuda") 114 112 115 113 116 ENV = None … … 121 124 ENV.release() 122 125 ENV = GpuEnvironment() if use_cuda() else None 126 123 127 124 128 def environment(): … … 138 142 return ENV 139 143 144 145 # PyTest is not freeing ENV, so make sure it gets freed. 146 atexit.register(lambda: ENV.release() if ENV is not None else None) 147 148 140 149 def has_type(dtype): 141 150 # type: (np.dtype) -> bool … … 143 152 Return true if device supports the requested precision. 144 153 """ 145 # Assume the nvidiacard supports 32-bit and 64-bit floats.146 # TODO: check if pycuda support F16154 # Assume the NVIDIA card supports 32-bit and 64-bit floats. 155 # TODO: Check if pycuda support F16. 147 156 return dtype in (generate.F32, generate.F64) 148 157 149 158 150 159 FUNCTION_PATTERN = re.compile(r"""^ 151 (?P<space>\s*) # initial space152 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # one or more qualifiers before function153 (?P<function>\s*\b\w+\b\s*[(]) # function name plus open parens160 (?P<space>\s*) # Initial space. 161 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # One or more qualifiers before function. 162 (?P<function>\s*\b\w+\b\s*[(]) # Function name plus open parens. 154 163 """, re.VERBOSE|re.MULTILINE) 155 164 … … 158 167 """, re.VERBOSE|re.MULTILINE) 159 168 169 160 170 def _add_device_tag(match): 161 171 # type: (None) -> str 162 # Note: should be re.Match, but that isn't a simple type172 # Note: Should be re.Match, but that isn't a simple type. 163 173 """ 164 174 replace qualifiers with __device__ qualifiers if needed … … 173 183 return "".join((space, "__device__ ", qualifiers, function)) 174 184 185 175 186 def mark_device_functions(source): 176 187 # type: (str) -> str … … 179 190 """ 180 191 return FUNCTION_PATTERN.sub(_add_device_tag, source) 192 181 193 182 194 def show_device_functions(source): … … 188 200 print(match.group('qualifiers').replace('\n',r'\n'), match.group('function'), '(') 189 201 return source 202 190 203 191 204 def compile_model(source, dtype, fast=False): … … 212 225 #options = ['--verbose', '-E'] 213 226 options = ['--use_fast_math'] if fast else None 214 program = SourceModule(source, no_extern_c=True, options=options) # include_dirs=[...]227 program = SourceModule(source, no_extern_c=True, options=options) #, include_dirs=[...]) 215 228 216 229 #print("done with "+program) … … 218 231 219 232 220 # for now, this returns one device in the context221 # TODO: create a context that contains all devices on all platforms233 # For now, this returns one device in the context. 234 # TODO: Create a context that contains all devices on all platforms. 222 235 class GpuEnvironment(object): 223 236 """ 224 GPU context , with possibly many devices, and one queue per device.237 GPU context for CUDA. 225 238 """ 226 239 context = None # type: cuda.Context 227 240 def __init__(self, devnum=None): 228 241 # type: (int) -> None 229 # Byte boundary for data alignment230 #self.data_boundary = max(d.min_data_type_align_size231 # for d in self.context.devices)232 self.compiled = {}233 242 env = os.environ.get("SAS_OPENCL", "").lower() 234 243 if devnum is None and env.startswith("cuda:"): 235 244 devnum = int(env[5:]) 245 236 246 # Set the global context to the particular device number if one is 237 247 # given, otherwise use the default context. Perhaps this will be set … … 242 252 self.context = make_default_context() 243 253 254 ## Byte boundary for data alignment. 255 #self.data_boundary = max(d.min_data_type_align_size 256 # for d in self.context.devices) 257 258 # Cache for compiled programs, and for items in context. 259 self.compiled = {} 260 244 261 def release(self): 245 262 if self.context is not None: … … 262 279 Compile the program for the device in the given context. 263 280 """ 264 # Note: PyOpenCL caches based on md5 hash of source, options and device 265 # so we don't really need to cache things for ourselves. I'll do so 266 # anyway just to save some data munging time. 281 # Note: PyCuda (probably) caches but I'll do so as well just to 282 # save some data munging time. 267 283 tag = generate.tag_source(source) 268 284 key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else "")) 269 # Check timestamp on program 285 # Check timestamp on program. 270 286 program, program_timestamp = self.compiled.get(key, (None, np.inf)) 271 287 if program_timestamp < timestamp: … … 277 293 return program 278 294 295 279 296 class GpuModel(KernelModel): 280 297 """ … … 292 309 that the compiler is allowed to take shortcuts. 293 310 """ 294 info = None # type: ModelInfo295 source = "" # type: str296 dtype = None # type: np.dtype297 fast = False # type: bool298 program = None# type: SourceModule299 _kernels = None # type: List[cuda.Function]311 info = None # type: ModelInfo 312 source = "" # type: str 313 dtype = None # type: np.dtype 314 fast = False # type: bool 315 _program = None # type: SourceModule 316 _kernels = None # type: Dict[str, cuda.Function] 300 317 301 318 def __init__(self, source, model_info, dtype=generate.F32, fast=False): … … 305 322 self.dtype = dtype 306 323 self.fast = fast 307 self.program = None # delay program creation308 self._kernels = None309 324 310 325 def __getstate__(self): … … 315 330 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 316 331 self.info, self.source, self.dtype, self.fast = state 317 self. program= None332 self._program = self._kernels = None 318 333 319 334 def make_kernel(self, q_vectors): 320 335 # type: (List[np.ndarray]) -> "GpuKernel" 321 if self.program is None: 322 compile_program = environment().compile_program 323 timestamp = generate.ocl_timestamp(self.info) 324 self.program = compile_program( 325 self.info.name, 326 self.source['opencl'], 327 self.dtype, 328 self.fast, 329 timestamp) 330 variants = ['Iq', 'Iqxy', 'Imagnetic'] 331 names = [generate.kernel_name(self.info, k) for k in variants] 332 kernels = [self.program.get_function(k) for k in names] 333 self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 334 is_2d = len(q_vectors) == 2 335 if is_2d: 336 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 337 else: 338 kernel = [self._kernels['Iq']]*2 339 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 340 341 def release(self): 342 # type: () -> None 343 """ 344 Free the resources associated with the model. 345 """ 346 if self.program is not None: 347 self.program = None 348 349 def __del__(self): 350 # type: () -> None 351 self.release() 352 353 # TODO: check that we don't need a destructor for buffers which go out of scope 336 return GpuKernel(self, q_vectors) 337 338 def get_function(self, name): 339 # type: (str) -> cuda.Function 340 """ 341 Fetch the kernel from the environment by name, compiling it if it 342 does not already exist. 343 """ 344 if self._program is None: 345 self._prepare_program() 346 return self._kernels[name] 347 348 def _prepare_program(self): 349 # type: (str) -> None 350 env = environment() 351 timestamp = generate.ocl_timestamp(self.info) 352 program = env.compile_program( 353 self.info.name, 354 self.source['opencl'], 355 self.dtype, 356 self.fast, 357 timestamp) 358 variants = ['Iq', 'Iqxy', 'Imagnetic'] 359 names = [generate.kernel_name(self.info, k) for k in variants] 360 functions = [program.get_function(k) for k in names] 361 self._kernels = {k: v for k, v in zip(variants, functions)} 362 # Keep a handle to program so GC doesn't collect. 363 self._program = program 364 365 366 # TODO: Check that we don't need a destructor for buffers which go out of scope. 354 367 class GpuInput(object): 355 368 """ … … 373 386 def __init__(self, q_vectors, dtype=generate.F32): 374 387 # type: (List[np.ndarray], np.dtype) -> None 375 # TODO: do we ever need double precision q?388 # TODO: Do we ever need double precision q? 376 389 self.nq = q_vectors[0].size 377 390 self.dtype = np.dtype(dtype) 378 391 self.is_2d = (len(q_vectors) == 2) 379 # TODO: stretch input based on get_warp()380 # not doing it now since warp depends on kernel, which is not known392 # TODO: Stretch input based on get_warp(). 393 # Not doing it now since warp depends on kernel, which is not known 381 394 # at this point, so instead using 32, which is good on the set of 382 395 # architectures tested so far. 383 396 if self.is_2d: 384 # Note: 16 rather than 15 because result is 1 longer than input. 385 width = ((self.nq+16)//16)*16 397 width = ((self.nq+15)//16)*16 386 398 self.q = np.empty((width, 2), dtype=dtype) 387 399 self.q[:self.nq, 0] = q_vectors[0] 388 400 self.q[:self.nq, 1] = q_vectors[1] 389 401 else: 390 # Note: 32 rather than 31 because result is 1 longer than input. 391 width = ((self.nq+32)//32)*32 402 width = ((self.nq+31)//32)*32 392 403 self.q = np.empty(width, dtype=dtype) 393 404 self.q[:self.nq] = q_vectors[0] 394 405 self.global_size = [self.q.shape[0]] 395 406 #print("creating inputs of size", self.global_size) 407 408 # Transfer input value to GPU. 396 409 self.q_b = cuda.to_device(self.q) 397 410 … … 399 412 # type: () -> None 400 413 """ 401 Free the memory.414 Free the buffer associated with the q value. 402 415 """ 403 416 if self.q_b is not None: … … 409 422 self.release() 410 423 424 411 425 class GpuKernel(Kernel): 412 426 """ 413 427 Callable SAS kernel. 414 428 415 *kernel* is the GpuKernel object to call 416 417 *model_info* is the module information 418 419 *q_vectors* is the q vectors at which the kernel should be evaluated 420 421 *dtype* is the kernel precision 422 423 The resulting call method takes the *pars*, a list of values for 424 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 425 vectors for the polydisperse parameters. *cutoff* determines the 426 integration limits: any points with combined weight less than *cutoff* 427 will not be calculated. 429 *model* is the GpuModel object to call 430 431 The kernel is derived from :class:`Kernel`, providing the 432 :meth:`call_kernel` method to evaluate the kernel for a given set of 433 parameters. Because of the need to move the q values to the GPU before 434 evaluation, the kernel is instantiated for a particular set of q vectors, 435 and can be called many times without transfering q each time. 428 436 429 437 Call :meth:`release` when done with the kernel instance. 430 438 """ 431 def __init__(self, kernel, dtype, model_info, q_vectors): 432 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 439 #: SAS model information structure. 440 info = None # type: ModelInfo 441 #: Kernel precision. 442 dtype = None # type: np.dtype 443 #: Kernel dimensions (1d or 2d). 444 dim = "" # type: str 445 #: Calculation results, updated after each call to :meth:`_call_kernel`. 446 result = None # type: np.ndarray 447 448 def __init__(self, model, q_vectors): 449 # type: (GpuModel, List[np.ndarray]) -> None 450 dtype = model.dtype 433 451 self.q_input = GpuInput(q_vectors, dtype) 434 self.kernel = kernel 435 # F16 isn't sufficient, so don't support it 452 self._model = model 453 454 # Attributes accessed from the outside. 455 self.dim = '2d' if self.q_input.is_2d else '1d' 456 self.info = model.info 457 self.dtype = dtype 458 459 # Converter to translate input to target type. 436 460 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 437 461 438 # attributes accessed from the outside 439 self.dim = '2d' if self.q_input.is_2d else '1d' 440 self.info = model_info 441 self.dtype = dtype 442 443 # holding place for the returned value 462 # Holding place for the returned value. 444 463 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 445 extra_q = 4 # total weight, form volume, shell volume and R_eff 446 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 447 448 # Inputs and outputs for each kernel call 449 # Note: res may be shorter than res_b if global_size != nq 464 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 465 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 466 467 # Allocate result value on GPU. 450 468 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 451 self.result_b = cuda.mem_alloc(width) 452 self._need_release = [self.result_b] 453 454 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 455 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 456 # Arrange data transfer to card 469 self._result_b = cuda.mem_alloc(width) 470 471 def _call_kernel(self, call_details, values, cutoff, magnetic, 472 effective_radius_type): 473 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 474 475 # Arrange data transfer to card. 457 476 details_b = cuda.to_device(call_details.buffer) 458 477 values_b = cuda.to_device(values) 459 478 460 kernel = self.kernel[1 if magnetic else 0] 461 args = [ 462 np.uint32(self.q_input.nq), None, None, 463 details_b, values_b, self.q_input.q_b, self.result_b, 464 self._as_dtype(cutoff), 465 np.uint32(effective_radius_type), 479 # Setup kernel function and arguments. 480 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 481 kernel = self._model.get_function(name) 482 kernel_args = [ 483 np.uint32(self.q_input.nq), # Number of inputs. 484 None, # Placeholder for pd_start. 485 None, # Placeholder for pd_stop. 486 details_b, # Problem definition. 487 values_b, # Parameter values. 488 self.q_input.q_b, # Q values. 489 self._result_b, # Result storage. 490 self._as_dtype(cutoff), # Probability cutoff. 491 np.uint32(effective_radius_type), # R_eff mode. 466 492 ] 467 493 grid = partition(self.q_input.nq) 468 #print("Calling OpenCL") 494 495 # Call kernel and retrieve results. 496 #print("Calling CUDA") 469 497 #call_details.show(values) 470 # Call kernel and retrieve results471 498 last_nap = time.clock() 472 499 step = 100000000//self.q_input.nq + 1 … … 475 502 stop = min(start + step, call_details.num_eval) 476 503 #print("queuing",start,stop) 477 args[1:3] = [np.int32(start), np.int32(stop)]478 kernel(* args, **grid)504 kernel_args[1:3] = [np.int32(start), np.int32(stop)] 505 kernel(*kernel_args, **grid) 479 506 if stop < call_details.num_eval: 480 507 sync() 481 # Allow other processes to run 508 # Allow other processes to run. 482 509 current_time = time.clock() 483 510 if current_time - last_nap > 0.5: … … 485 512 last_nap = current_time 486 513 sync() 487 cuda.memcpy_dtoh(self.result, self. result_b)514 cuda.memcpy_dtoh(self.result, self._result_b) 488 515 #print("result", self.result) 489 516 … … 496 523 Release resources associated with the kernel. 497 524 """ 498 for p in self._need_release: 499 p.free() 500 self._need_release = [] 525 self.q_input.release() 526 if self._result_b is not None: 527 self._result_b.free() 528 self._result_b = None 501 529 502 530 def __del__(self): … … 512 540 Note: Maybe context.synchronize() is sufficient. 513 541 """ 514 #return # The following works in C++; don't know what pycuda is doing 515 # Create an event with which to synchronize 542 # Create an event with which to synchronize. 516 543 done = cuda.Event() 517 544 … … 519 546 done.record() 520 547 521 # line added to not hog resources548 # Make sure we don't hog resource while waiting to sync. 522 549 while not done.query(): 523 550 time.sleep(0.01) … … 525 552 # Block until the GPU executes the kernel. 526 553 done.synchronize() 554 527 555 # Clean up the event; I don't think they can be reused. 528 556 del done -
sasmodels/kerneldll.py
re44432d r3199b17 100 100 # pylint: enable=unused-import 101 101 102 # Compiler output is a byte stream that needs to be decode in python 3 102 # Compiler output is a byte stream that needs to be decode in python 3. 103 103 decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8')) 104 104 … … 115 115 COMPILER = "tinycc" 116 116 elif "VCINSTALLDIR" in os.environ: 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment 118 # and we can use the MSVC compiler. Otherwise, if tinycc is available 119 # the use it. Otherwise, hope that mingw is available. 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the 118 # environment and we can use the MSVC compiler. Otherwise, if 119 # tinycc is available then use it. Otherwise, hope that mingw 120 # is available. 120 121 COMPILER = "msvc" 121 122 else: … … 124 125 COMPILER = "unix" 125 126 126 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 127 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86. 127 128 if COMPILER == "unix": 128 # Generic unix compile 129 # On mac users will need the X code command line tools installed129 # Generic unix compile. 130 # On Mac users will need the X code command line tools installed. 130 131 #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp" 131 132 CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split() 132 # add openmp support if not running on a mac133 # Add OpenMP support if not running on a Mac. 133 134 if sys.platform != "darwin": 134 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) 135 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9). 135 136 # Shut it off for all unix until we can investigate. 136 137 #CC.append("-fopenmp") … … 144 145 # vcomp90.dll on the path. One may be found here: 145 146 # C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll 146 # Copy this to the python directory and uncomment the OpenMP COMPILE 147 # TODO: remove intermediate OBJ file created in the directory148 # TODO: maybe don't use randomized name for the c file149 # TODO: maybe ask distutils to find MSVC147 # Copy this to the python directory and uncomment the OpenMP COMPILE. 148 # TODO: Remove intermediate OBJ file created in the directory. 149 # TODO: Maybe don't use randomized name for the c file. 150 # TODO: Maybe ask distutils to find MSVC. 150 151 CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split() 151 152 if "SAS_OPENMP" in os.environ: … … 172 173 ALLOW_SINGLE_PRECISION_DLLS = True 173 174 175 174 176 def compile(source, output): 175 177 # type: (str, str) -> None … … 183 185 logging.info(command_str) 184 186 try: 185 # need shell=True on windows to keep console box from popping up187 # Need shell=True on windows to keep console box from popping up. 186 188 shell = (os.name == 'nt') 187 189 subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT) … … 192 194 raise RuntimeError("compile failed. File is in %r"%source) 193 195 196 194 197 def dll_name(model_info, dtype): 195 198 # type: (ModelInfo, np.dtype) -> str … … 202 205 basename += ARCH + ".so" 203 206 204 # Hack to find precompiled dlls 207 # Hack to find precompiled dlls. 205 208 path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename) 206 209 if os.path.exists(path): … … 242 245 raise ValueError("16 bit floats not supported") 243 246 if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS: 244 dtype = F64 # Force 64-bit dll 245 # Note: dtype may be F128 for long double precision 247 dtype = F64 # Force 64-bit dll. 248 # Note: dtype may be F128 for long double precision. 246 249 247 250 dll = dll_path(model_info, dtype) … … 254 257 need_recompile = dll_time < newest_source 255 258 if need_recompile: 256 # Make sure the DLL path exists 259 # Make sure the DLL path exists. 257 260 if not os.path.exists(SAS_DLL_PATH): 258 261 os.makedirs(SAS_DLL_PATH) … … 263 266 file_handle.write(source) 264 267 compile(source=filename, output=dll) 265 # comment the following to keep the generated c file266 # Note: if there is a syntax error then compile raises an error268 # Comment the following to keep the generated C file. 269 # Note: If there is a syntax error then compile raises an error 267 270 # and the source file will not be deleted. 268 271 os.unlink(filename) … … 303 306 self.dllpath = dllpath 304 307 self._dll = None # type: ct.CDLL 305 self._kernels = None # type: List[Callable, Callable]308 self._kernels = None # type: List[Callable, Callable] 306 309 self.dtype = np.dtype(dtype) 307 310 … … 338 341 # type: (List[np.ndarray]) -> DllKernel 339 342 q_input = PyInput(q_vectors, self.dtype) 340 # Note: pickle not supported for DllKernel343 # Note: DLL is lazy loaded. 341 344 if self._dll is None: 342 345 self._load_dll() … … 358 361 self._dll = None 359 362 363 360 364 class DllKernel(Kernel): 361 365 """ … … 379 383 def __init__(self, kernel, model_info, q_input): 380 384 # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None 381 #,model_info,q_input) 385 dtype = q_input.dtype 386 self.q_input = q_input 382 387 self.kernel = kernel 388 389 # Attributes accessed from the outside. 390 self.dim = '2d' if q_input.is_2d else '1d' 383 391 self.info = model_info 384 self.q_input = q_input 385 self.dtype = q_input.dtype 386 self.dim = '2d' if q_input.is_2d else '1d' 387 # leave room for f1/f2 results in case we need to compute beta for 1d models 392 self.dtype = dtype 393 394 # Converter to translate input to target type. 395 self._as_dtype = (np.float32 if dtype == generate.F32 396 else np.float64 if dtype == generate.F64 397 else np.float128) 398 399 # Holding place for the returned value. 388 400 nout = 2 if self.info.have_Fq else 1 389 # +4 for total weight, shell volume, effective radius, form volume390 self.result = np.empty( q_input.nq*nout + 4, self.dtype)391 self.real = (np.float32 if self.q_input.dtype == generate.F32 392 else np.float64 if self.q_input.dtype == generate.F64393 e lse np.float128)394 395 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 396 # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray401 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 402 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 403 404 def _call_kernel(self, call_details, values, cutoff, magnetic, 405 effective_radius_type): 406 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 407 408 # Setup kernel function and arguments. 397 409 kernel = self.kernel[1 if magnetic else 0] 398 args = [399 self.q_input.nq, # nq400 None, # pd_start401 None, # pd_stop pd_stride[MAX_PD]402 call_details.buffer.ctypes.data, # problem403 values.ctypes.data, # pars404 self.q_input.q.ctypes.data, # q405 self.result.ctypes.data, # results406 self. real(cutoff), # cutoff407 effective_radius_type, # cutoff410 kernel_args = [ 411 self.q_input.nq, # Number of inputs. 412 None, # Placeholder for pd_start. 413 None, # Placeholder for pd_stop. 414 call_details.buffer.ctypes.data, # Problem definition. 415 values.ctypes.data, # Parameter values. 416 self.q_input.q.ctypes.data, # Q values. 417 self.result.ctypes.data, # Result storage. 418 self._as_dtype(cutoff), # Probability cutoff. 419 effective_radius_type, # R_eff mode. 408 420 ] 421 422 # Call kernel and retrieve results. 409 423 #print("Calling DLL") 410 424 #call_details.show(values) 411 425 step = 100 426 # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops? 412 427 for start in range(0, call_details.num_eval, step): 413 428 stop = min(start + step, call_details.num_eval) 414 args[1:3] = [start, stop]415 kernel(* args) # type: ignore429 kernel_args[1:3] = [start, stop] 430 kernel(*kernel_args) # type: ignore 416 431 417 432 def release(self): 418 433 # type: () -> None 419 434 """ 420 Release anyresources associated with the kernel.435 Release resources associated with the kernel. 421 436 """ 422 self.q_input.release() 437 # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release. 438 # Should we be doing the same for DLL? 439 #self.q_input.release() 440 pass 441 442 def __del__(self): 443 # type: () -> None 444 self.release() -
sasmodels/kernelpy.py
raa8c6e0 r3199b17 33 33 logger = logging.getLogger(__name__) 34 34 35 35 36 class PyModel(KernelModel): 36 37 """ … … 38 39 """ 39 40 def __init__(self, model_info): 40 # Make sure Iq is available and vectorized 41 # Make sure Iq is available and vectorized. 41 42 _create_default_functions(model_info) 42 43 self.info = model_info … … 53 54 """ 54 55 pass 56 55 57 56 58 class PyInput(object): … … 91 93 self.q = None 92 94 95 93 96 class PyKernel(Kernel): 94 97 """ … … 131 134 parameter_vector = np.empty(len(partable.call_parameters)-2, 'd') 132 135 133 # Create views into the array to hold the arguments 136 # Create views into the array to hold the arguments. 134 137 offset = 0 135 138 kernel_args, volume_args = [], [] … … 174 177 else (lambda mode: 1.0)) 175 178 176 177 178 179 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 179 180 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray … … 195 196 self.q_input.release() 196 197 self.q_input = None 198 197 199 198 200 def _loops(parameters, # type: np.ndarray … … 254 256 total = np.zeros(nq, 'd') 255 257 for loop_index in range(call_details.num_eval): 256 # update polydispersity parameter values258 # Update polydispersity parameter values. 257 259 if p0_index == p0_length: 258 260 pd_index = (loop_index//pd_stride)%pd_length … … 265 267 p0_index += 1 266 268 if weight > cutoff: 267 # Call the scattering function 269 # Call the scattering function. 268 270 # Assume that NaNs are only generated if the parameters are bad; 269 271 # exclude all q for that NaN. Even better would be to have an … … 273 275 continue 274 276 275 # update value and norm277 # Update value and norm. 276 278 total += weight * Iq 277 279 weight_norm += weight … … 293 295 any functions that are not already marked as vectorized. 294 296 """ 295 # Note: must call create_vector_Iq before create_vector_Iqxy297 # Note: Must call create_vector_Iq before create_vector_Iqxy. 296 298 _create_vector_Iq(model_info) 297 299 _create_vector_Iqxy(model_info) -
sasmodels/model_test.py
r5024a56 r00afc15 167 167 # test using cuda if desired and available 168 168 if 'cuda' in loaders and use_cuda(): 169 test_name = "%s-cuda" %model_name169 test_name = "%s-cuda" % model_info.id 170 170 test_method_name = "test_%s_cuda" % model_info.id 171 171 # Using dtype=None so that the models that are only -
sasmodels/models/rpa.c
r71b751d r19dc29e7 25 25 double S0ba,Pbb,S0bb,Pbc,S0bc,Pbd,S0bd; 26 26 double S0ca,S0cb,Pcc,S0cc,Pcd,S0cd; 27 double S0da,S0db,S0dc;27 //double S0da,S0db,S0dc; 28 28 double Pdd,S0dd; 29 29 double Kaa,Kbb,Kcc; 30 30 double Kba,Kca,Kcb; 31 double Kda,Kdb,Kdc,Kdd;31 //double Kda,Kdb,Kdc,Kdd; 32 32 double Zaa,Zab,Zac,Zba,Zbb,Zbc,Zca,Zcb,Zcc; 33 33 double DenT,T11,T12,T13,T21,T22,T23,T31,T32,T33; … … 36 36 double N11,N12,N13,N21,N22,N23,N31,N32,N33; 37 37 double M11,M12,M13,M21,M22,M23,M31,M32,M33; 38 double S11,S12,S13,S14,S21,S22,S23,S24; 39 double S31,S32,S33,S34,S41,S42,S43,S44; 38 double S11,S12,S22,S23,S13,S33; 39 //double S21,S31,S32,S44; 40 //double S14,S24,S34,S41,S42,S43; 40 41 double Lad,Lbd,Lcd,Nav,Intg; 41 42 … … 115 116 S0cd=(Phicd*vcd*Ncd)*Pcd; 116 117 117 S0da=S0ad;118 S0db=S0bd;119 S0dc=S0cd;118 //S0da=S0ad; 119 //S0db=S0bd; 120 //S0dc=S0cd; 120 121 Pdd=2.0*(exp(-Xd)-1.0+Xd)/(Xd*Xd); // free D chain 121 122 S0dd=N[3]*Phi[3]*v[3]*Pdd; … … 198 199 S0ca=S0ac; 199 200 S0cb=S0bc; 200 S0da=S0ad;201 S0db=S0bd;202 S0dc=S0cd;201 //S0da=S0ad; 202 //S0db=S0bd; 203 //S0dc=S0cd; 203 204 204 205 // self chi parameter is 0 ... of course … … 206 207 Kbb=0.0; 207 208 Kcc=0.0; 208 Kdd=0.0;209 //Kdd=0.0; 209 210 210 211 Kba=Kab; 211 212 Kca=Kac; 212 213 Kcb=Kbc; 213 Kda=Kad;214 Kdb=Kbd;215 Kdc=Kcd;214 //Kda=Kad; 215 //Kdb=Kbd; 216 //Kdc=Kcd; 216 217 217 218 Zaa=Kaa-Kad-Kad; … … 294 295 S12= Q12*S0aa + Q22*S0ab + Q32*S0ac; 295 296 S13= Q13*S0aa + Q23*S0ab + Q33*S0ac; 296 S14=-S11-S12-S13;297 S21= Q11*S0ba + Q21*S0bb + Q31*S0bc;298 297 S22= Q12*S0ba + Q22*S0bb + Q32*S0bc; 299 298 S23= Q13*S0ba + Q23*S0bb + Q33*S0bc; 300 S24=-S21-S22-S23;301 S31= Q11*S0ca + Q21*S0cb + Q31*S0cc;302 S32= Q12*S0ca + Q22*S0cb + Q32*S0cc;303 299 S33= Q13*S0ca + Q23*S0cb + Q33*S0cc; 304 S34=-S31-S32-S33; 305 S41=S14; 306 S42=S24; 307 S43=S34; 308 S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23; 300 //S21= Q11*S0ba + Q21*S0bb + Q31*S0bc; 301 //S31= Q11*S0ca + Q21*S0cb + Q31*S0cc; 302 //S32= Q12*S0ca + Q22*S0cb + Q32*S0cc; 303 //S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23; 304 //S14=-S11-S12-S13; 305 //S24=-S21-S22-S23; 306 //S34=-S31-S32-S33; 307 //S41=S14; 308 //S42=S24; 309 //S43=S34; 309 310 310 311 //calculate contrast where L[i] is the scattering length of i and D is the matrix
Note: See TracChangeset
for help on using the changeset viewer.