Changeset 15f5138 in sasmodels
- Timestamp:
- Mar 6, 2019 6:09:15 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- c11d09f
- Parents:
- 0d26e91 (diff), 9150036 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - Files:
-
- 15 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/guide/plugin.rst
raa8c6e0 r9150036 272 272 structure factor to account for interactions between particles. See 273 273 `Form_Factors`_ for more details. 274 275 **model_info = ...** lets you define a model directly, for example, by 276 loading and modifying existing models. This is done implicitly by 277 :func:`sasmodels.core.load_model_info`, which can create a mixture model 278 from a pair of existing models. For example:: 279 280 from sasmodels.core import load_model_info 281 model_info = load_model_info('sphere+cylinder') 282 283 See :class:`sasmodels.modelinfo.ModelInfo` for details about the model 284 attributes that are defined. 274 285 275 286 Model Parameters … … 894 905 - \frac{\sin(x)}{x}\left(\frac{1}{x} - \frac{3!}{x^3} + \frac{5!}{x^5} - \frac{7!}{x^7}\right) 895 906 896 For small arguments 907 For small arguments, 897 908 898 909 .. math:: -
example/multiscatfit.py
r49d1f8b8 r2c4a190 15 15 16 16 # Show the model without fitting 17 PYTHONPATH=..:../ explore:../../bumps:../../sasview/src python multiscatfit.py17 PYTHONPATH=..:../../bumps:../../sasview/src python multiscatfit.py 18 18 19 19 # Run the fit 20 PYTHONPATH=..:../ explore:../../bumps:../../sasview/src ../../bumps/run.py \20 PYTHONPATH=..:../../bumps:../../sasview/src ../../bumps/run.py \ 21 21 multiscatfit.py --store=/tmp/t1 22 22 … … 55 55 ) 56 56 57 # Tie the model to the data 58 M = Experiment(data=data, model=model) 59 60 # Stack mulitple scattering on top of the existing resolution function. 61 M.resolution = MultipleScattering(resolution=M.resolution, probability=0.) 62 57 63 # SET THE FITTING PARAMETERS 58 64 model.radius_polar.range(15, 3000) … … 65 71 model.scale.range(0, 0.1) 66 72 67 # Mulitple scattering probability parameter 68 # HACK: the probability is stuffed in as an extra parameter to the experiment. 69 probability = Parameter(name="probability", value=0.0) 70 probability.range(0.0, 0.9) 73 # The multiple scattering probability parameter is in the resolution function 74 # instead of the scattering function, so access it through M.resolution 75 M.scattering_probability.range(0.0, 0.9) 71 76 72 M = Experiment(data=data, model=model, extra_pars={'probability': probability}) 73 74 # Stack mulitple scattering on top of the existing resolution function. 75 # Because resolution functions in sasview don't have fitting parameters, 76 # we instead allow the multiple scattering calculator to take a function 77 # instead of a probability. This function returns the current value of 78 # the parameter. ** THIS IS TEMPORARY ** when multiple scattering is 79 # properly integrated into sasmodels and sasview, its fittable parameter 80 # will be treated like the model parameters. 81 M.resolution = MultipleScattering(resolution=M.resolution, 82 probability=lambda: probability.value, 83 ) 84 M._kernel_inputs = M.resolution.q_calc 77 # Let bumps know that we are fitting this experiment 85 78 problem = FitProblem(M) 86 79 -
sasmodels/__init__.py
ra1ec908 r37f38ff 14 14 defining new models. 15 15 """ 16 __version__ = "0.9 8"16 __version__ = "0.99" 17 17 18 18 def data_files(): -
sasmodels/bumps_model.py
r49d1f8b8 r2c4a190 35 35 # when bumps is not on the path. 36 36 from bumps.names import Parameter # type: ignore 37 from bumps.parameter import Reference # type: ignore 37 38 except ImportError: 38 39 pass … … 139 140 def __init__(self, data, model, cutoff=1e-5, name=None, extra_pars=None): 140 141 # type: (Data, Model, float) -> None 142 # Allow resolution function to define fittable parameters. We do this 143 # by creating reference parameters within the resolution object rather 144 # than modifying the object itself to use bumps parameters. We need 145 # to reset the parameters each time the object has changed. These 146 # additional parameters need to be returned from the fitting engine. 147 # To make them available to the user, they are added as top-level 148 # attributes to the experiment object. The only change to the 149 # resolution function is that it needs an optional 'fittable' attribute 150 # which maps the internal name to the user visible name for the 151 # for the parameter. 152 self._resolution = None 153 self._resolution_pars = {} 141 154 # remember inputs so we can inspect from outside 142 155 self.name = data.filename if name is None else name … … 145 158 self._interpret_data(data, model.sasmodel) 146 159 self._cache = {} 160 # CRUFT: no longer need extra parameters 161 # Multiple scattering probability is now retrieved directly from the 162 # multiple scattering resolution function. 147 163 self.extra_pars = extra_pars 148 164 … … 162 178 return len(self.Iq) 163 179 180 @property 181 def resolution(self): 182 return self._resolution 183 184 @resolution.setter 185 def resolution(self, value): 186 self._resolution = value 187 188 # Remove old resolution fitting parameters from experiment 189 for name in self._resolution_pars: 190 delattr(self, name) 191 192 # Create new resolution fitting parameters 193 res_pars = getattr(self._resolution, 'fittable', {}) 194 self._resolution_pars = { 195 name: Reference(self._resolution, refname, name=name) 196 for refname, name in res_pars.items() 197 } 198 199 # Add new resolution fitting parameters as experiment attributes 200 for name, ref in self._resolution_pars.items(): 201 setattr(self, name, ref) 202 164 203 def parameters(self): 165 204 # type: () -> Dict[str, Parameter] … … 168 207 """ 169 208 pars = self.model.parameters() 170 if self.extra_pars :209 if self.extra_pars is not None: 171 210 pars.update(self.extra_pars) 211 pars.update(self._resolution_pars) 172 212 return pars 173 213 -
sasmodels/direct_model.py
rc1799d3 r9150036 224 224 else: 225 225 Iq, dIq = None, None 226 #self._theory = np.zeros_like(q)227 q_vectors = [res.q_calc]228 226 elif self.data_type == 'Iqxy': 229 227 #if not model.info.parameters.has_2d: … … 242 240 res = resolution2d.Pinhole2D(data=data, index=index, 243 241 nsigma=3.0, accuracy=accuracy) 244 #self._theory = np.zeros_like(self.Iq)245 q_vectors = res.q_calc246 242 elif self.data_type == 'Iq': 247 243 index = (data.x >= data.qmin) & (data.x <= data.qmax) … … 268 264 else: 269 265 res = resolution.Perfect1D(data.x[index]) 270 271 #self._theory = np.zeros_like(self.Iq)272 q_vectors = [res.q_calc]273 266 elif self.data_type == 'Iq-oriented': 274 267 index = (data.x >= data.qmin) & (data.x <= data.qmax) … … 286 279 qx_width=data.dxw[index], 287 280 qy_width=data.dxl[index]) 288 q_vectors = res.q_calc289 281 else: 290 282 raise ValueError("Unknown data type") # never gets here … … 292 284 # Remember function inputs so we can delay loading the function and 293 285 # so we can save/restore state 294 self._kernel_inputs = q_vectors295 286 self._kernel = None 296 287 self.Iq, self.dIq, self.index = Iq, dIq, index … … 329 320 # type: (ParameterSet, float) -> np.ndarray 330 321 if self._kernel is None: 331 self._kernel = self._model.make_kernel(self._kernel_inputs) 322 # TODO: change interfaces so that resolution returns kernel inputs 323 # Maybe have resolution always return a tuple, or maybe have 324 # make_kernel accept either an ndarray or a pair of ndarrays. 325 kernel_inputs = self.resolution.q_calc 326 if isinstance(kernel_inputs, np.ndarray): 327 kernel_inputs = (kernel_inputs,) 328 self._kernel = self._model.make_kernel(kernel_inputs) 332 329 333 330 # Need to pull background out of resolution for multiple scattering -
sasmodels/multiscat.py
rb3703f5 r2c4a190 342 342 343 343 *probability* is related to the expected number of scattering 344 events in the sample $\lambda$ as $p = 1 = e^{-\lambda}$. As a 345 hack to allow probability to be a fitted parameter, the "value" 346 can be a function that takes no parameters and returns the current 347 value of the probability. *coverage* determines how many scattering 348 steps to consider. The default is 0.99, which sets $n$ such that 349 $1 \ldots n$ covers 99% of the Poisson probability mass function. 344 events in the sample $\lambda$ as $p = 1 - e^{-\lambda}$. 345 *coverage* determines how many scattering steps to consider. The 346 default is 0.99, which sets $n$ such that $1 \ldots n$ covers 99% 347 of the Poisson probability mass function. 350 348 351 349 *is2d* is True then 2D scattering is used, otherwise it accepts … … 399 397 self.qmin = qmin 400 398 self.nq = nq 401 self.probability = probability399 self.probability = 0. if probability is None else probability 402 400 self.coverage = coverage 403 401 self.is2d = is2d … … 456 454 self.Iqxy = None # type: np.ndarray 457 455 456 # Label probability as a fittable parameter, and give its external name 457 # Note that the external name must be a valid python identifier, since 458 # is will be set as an experiment attribute. 459 self.fittable = {'probability': 'scattering_probability'} 460 458 461 def apply(self, theory): 459 462 if self.is2d: … … 463 466 Iq_calc = Iq_calc.reshape(self.nq, self.nq) 464 467 468 # CRUFT: don't need probability as a function anymore 465 469 probability = self.probability() if callable(self.probability) else self.probability 466 470 coverage = self.coverage -
sasmodels/sasview_model.py
ra8a1d48 r9150036 25 25 from . import core 26 26 from . import custom 27 from . import kernelcl 27 28 from . import product 28 29 from . import generate … … 30 31 from . import modelinfo 31 32 from .details import make_kernel_args, dispersion_mesh 33 from .kernelcl import reset_environment 32 34 33 35 # pylint: disable=unused-import … … 68 70 #: has changed since we last reloaded. 69 71 _CACHED_MODULE = {} # type: Dict[str, "module"] 72 73 def reset_environment(): 74 # type: () -> None 75 """ 76 Clear the compute engine context so that the GUI can change devices. 77 78 This removes all compiled kernels, even those that are active on fit 79 pages, but they will be restored the next time they are needed. 80 """ 81 kernelcl.reset_environment() 82 for model in MODELS.values(): 83 model._model = None 70 84 71 85 def find_model(modelname): … … 696 710 def _calculate_Iq(self, qx, qy=None): 697 711 if self._model is None: 698 self._model = core.build_model(self._model_info) 712 # Only need one copy of the compiled kernel regardless of how many 713 # times it is used, so store it in the class. Also, to reset the 714 # compute engine, need to clear out all existing compiled kernels, 715 # which is much easier to do if we store them in the class. 716 self.__class__._model = core.build_model(self._model_info) 699 717 if qy is not None: 700 718 q_vectors = [np.asarray(qx), np.asarray(qy)] -
sasmodels/kernel.py
rcd28947 r36a2418 23 23 # pylint: enable=unused-import 24 24 25 25 26 class KernelModel(object): 26 27 info = None # type: ModelInfo … … 33 34 # type: () -> None 34 35 pass 36 35 37 36 38 class Kernel(object): -
sasmodels/kernelcl.py
r8064d5e r0d26e91 69 69 import numpy as np # type: ignore 70 70 71 # Attempt to setup opencl. This may fail if the pyopencl package is not71 # Attempt to setup OpenCL. This may fail if the pyopencl package is not 72 72 # installed or if it is installed but there are no devices available. 73 73 try: … … 75 75 from pyopencl import mem_flags as mf 76 76 from pyopencl.characterize import get_fast_inaccurate_build_options 77 # Ask OpenCL for the default context so that we know that one exists 77 # Ask OpenCL for the default context so that we know that one exists. 78 78 cl.create_some_context(interactive=False) 79 79 HAVE_OPENCL = True … … 96 96 # pylint: enable=unused-import 97 97 98 # CRUFT: pyopencl < 2017.1 (as of June 2016 needs quotes around include path) 98 99 # CRUFT: pyopencl < 2017.1 (as of June 2016 needs quotes around include path). 99 100 def quote_path(v): 100 101 """ … … 107 108 return '"'+v+'"' if v and ' ' in v and not v[0] in "\"'-" else v 108 109 110 109 111 def fix_pyopencl_include(): 110 112 """ … … 113 115 import pyopencl as cl 114 116 if hasattr(cl, '_DEFAULT_INCLUDE_OPTIONS'): 115 cl._DEFAULT_INCLUDE_OPTIONS = [quote_path(v) for v in cl._DEFAULT_INCLUDE_OPTIONS] 117 cl._DEFAULT_INCLUDE_OPTIONS = [ 118 quote_path(v) for v in cl._DEFAULT_INCLUDE_OPTIONS 119 ] 120 116 121 117 122 if HAVE_OPENCL: … … 126 131 MAX_LOOPS = 2048 127 132 128 129 133 # Pragmas for enable OpenCL features. Be sure to protect them so that they 130 134 # still compile even if OpenCL is not present. … … 141 145 """ 142 146 147 143 148 def use_opencl(): 144 149 sas_opencl = os.environ.get("SAS_OPENCL", "OpenCL").lower() 145 150 return HAVE_OPENCL and sas_opencl != "none" and not sas_opencl.startswith("cuda") 146 151 152 147 153 ENV = None 148 154 def reset_environment(): … … 152 158 global ENV 153 159 ENV = GpuEnvironment() if use_opencl() else None 160 154 161 155 162 def environment(): … … 169 176 return ENV 170 177 178 171 179 def has_type(device, dtype): 172 180 # type: (cl.Device, np.dtype) -> bool … … 179 187 return "cl_khr_fp64" in device.extensions 180 188 else: 181 # Not supporting F16 type since it isn't accurate enough 189 # Not supporting F16 type since it isn't accurate enough. 182 190 return False 191 183 192 184 193 def get_warp(kernel, queue): … … 190 199 cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE, 191 200 queue.device) 201 192 202 193 203 def compile_model(context, source, dtype, fast=False): … … 211 221 source_list.insert(0, _F64_PRAGMA) 212 222 213 # Note: USE_SINCOS makes the intel cpu slower under opencl223 # Note: USE_SINCOS makes the Intel CPU slower under OpenCL. 214 224 if context.devices[0].type == cl.device_type.GPU: 215 225 source_list.insert(0, "#define USE_SINCOS\n") … … 218 228 source = "\n".join(source_list) 219 229 program = cl.Program(context, source).build(options=options) 230 220 231 #print("done with "+program) 221 232 return program 222 233 223 234 224 # for now, this returns one device in the context225 # TODO: create a context that contains all devices on all platforms235 # For now, this returns one device in the context. 236 # TODO: Create a context that contains all devices on all platforms. 226 237 class GpuEnvironment(object): 227 238 """ 228 GPU context, with possibly many devices, and one queue per device. 229 230 Because the environment can be reset during a live program (e.g., if the 231 user changes the active GPU device in the GUI), everything associated 232 with the device context must be cached in the environment and recreated 233 if the environment changes. The *cache* attribute is a simple dictionary 234 which holds keys and references to objects, such as compiled kernels and 235 allocated buffers. The running program should check in the cache for 236 long lived objects and create them if they are not there. The program 237 should not hold onto cached objects, but instead only keep them active 238 for the duration of a function call. When the environment is destroyed 239 then the *release* method for each active cache item is called before 240 the environment is freed. This means that each cl buffer should be 241 in its own cache entry. 239 GPU context for OpenCL, with possibly many devices and one queue per device. 242 240 """ 243 241 def __init__(self): 244 242 # type: () -> None 245 # find gpu context243 # Find gpu context. 246 244 context_list = _create_some_context() 247 245 … … 257 255 self.context[dtype] = None 258 256 259 # Build a queue for each context 257 # Build a queue for each context. 260 258 self.queue = {} 261 259 context = self.context[F32] … … 267 265 self.queue[F64] = cl.CommandQueue(context, context.devices[0]) 268 266 269 # Byte boundary for data alignment267 ## Byte boundary for data alignment. 270 268 #self.data_boundary = max(context.devices[0].min_data_type_align_size 271 269 # for context in self.context.values()) 272 270 273 # Cache for compiled programs, and for items in context 271 # Cache for compiled programs, and for items in context. 274 272 self.compiled = {} 275 self.cache = {}276 273 277 274 def has_type(self, dtype): … … 288 285 """ 289 286 # Note: PyOpenCL caches based on md5 hash of source, options and device 290 # so we don't really need to cache things for ourselves. I'll do so 291 # anyway just to save some data munging time. 287 # but I'll do so as well just to save some data munging time. 292 288 tag = generate.tag_source(source) 293 289 key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else "")) 294 # Check timestamp on program 290 # Check timestamp on program. 295 291 program, program_timestamp = self.compiled.get(key, (None, np.inf)) 296 292 if program_timestamp < timestamp: … … 305 301 return program 306 302 307 def free_buffer(self, key):308 if key in self.cache:309 self.cache[key].release()310 del self.cache[key]311 312 def __del__(self):313 for v in self.cache.values():314 release = getattr(v, 'release', lambda: None)315 release()316 self.cache = {}317 318 _CURRENT_ID = 0319 def unique_id():320 global _CURRENT_ID321 _CURRENT_ID += 1322 return _CURRENT_ID323 303 324 304 def _create_some_context(): … … 333 313 which one (and not a CUDA device, or no GPU). 334 314 """ 335 # Assume we do not get here if SAS_OPENCL is None or CUDA 315 # Assume we do not get here if SAS_OPENCL is None or CUDA. 336 316 sas_opencl = os.environ.get('SAS_OPENCL', 'opencl') 337 317 if sas_opencl.lower() != 'opencl': 338 # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 318 # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context. 339 319 os.environ["PYOPENCL_CTX"] = sas_opencl 340 320 … … 344 324 except Exception as exc: 345 325 warnings.warn(str(exc)) 346 warnings.warn("pyopencl.create_some_context() failed") 347 warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly") 326 warnings.warn("pyopencl.create_some_context() failed. The " 327 "environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might " 328 "not be set correctly") 348 329 349 330 return _get_default_context() 331 350 332 351 333 def _get_default_context(): … … 360 342 # is running may increase throughput. 361 343 # 362 # Mac book pro, base install:344 # MacBook Pro, base install: 363 345 # {'Apple': [Intel CPU, NVIDIA GPU]} 364 # Mac book pro, base install:346 # MacBook Pro, base install: 365 347 # {'Apple': [Intel CPU, Intel GPU]} 366 # 2 x nvidia 295 with Intel and NVIDIA opencl drivers installed348 # 2 x NVIDIA 295 with Intel and NVIDIA opencl drivers install: 367 349 # {'Intel': [CPU], 'NVIDIA': [GPU, GPU, GPU, GPU]} 368 350 gpu, cpu = None, None … … 387 369 else: 388 370 # System has cl.device_type.ACCELERATOR or cl.device_type.CUSTOM 389 # Intel Phi for example registers as an accelerator 371 # Intel Phi for example registers as an accelerator. 390 372 # Since the user installed a custom device on their system 391 373 # and went through the pain of sorting out OpenCL drivers for … … 394 376 gpu = device 395 377 396 # order the devices by gpu then by cpu; when searching for an available378 # Order the devices by gpu then by cpu; when searching for an available 397 379 # device by data type they will be checked in this order, which means 398 380 # that if the gpu supports double then the cpu will never be used (though … … 421 403 that the compiler is allowed to take shortcuts. 422 404 """ 405 info = None # type: ModelInfo 406 source = "" # type: str 407 dtype = None # type: np.dtype 408 fast = False # type: bool 409 _program = None # type: cl.Program 410 _kernels = None # type: Dict[str, cl.Kernel] 411 423 412 def __init__(self, source, model_info, dtype=generate.F32, fast=False): 424 413 # type: (Dict[str,str], ModelInfo, np.dtype, bool) -> None … … 427 416 self.dtype = dtype 428 417 self.fast = fast 429 self.timestamp = generate.ocl_timestamp(self.info)430 self._cache_key = unique_id()431 418 432 419 def __getstate__(self): … … 437 424 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 438 425 self.info, self.source, self.dtype, self.fast = state 426 self._program = self._kernels = None 439 427 440 428 def make_kernel(self, q_vectors): … … 442 430 return GpuKernel(self, q_vectors) 443 431 444 @property 445 def Iq(self): 446 return self._fetch_kernel('Iq') 447 448 def fetch_kernel(self, name): 432 def get_function(self, name): 449 433 # type: (str) -> cl.Kernel 450 434 """ … … 452 436 does not already exist. 453 437 """ 454 gpu = environment() 455 key = self._cache_key 456 if key not in gpu.cache: 457 program = gpu.compile_program( 458 self.info.name, 459 self.source['opencl'], 460 self.dtype, 461 self.fast, 462 self.timestamp) 463 variants = ['Iq', 'Iqxy', 'Imagnetic'] 464 names = [generate.kernel_name(self.info, k) for k in variants] 465 kernels = [getattr(program, k) for k in names] 466 data = dict((k, v) for k, v in zip(variants, kernels)) 467 # keep a handle to program so GC doesn't collect 468 data['program'] = program 469 gpu.cache[key] = data 470 else: 471 data = gpu.cache[key] 472 return data[name] 473 474 # TODO: check that we don't need a destructor for buffers which go out of scope 438 if self._program is None: 439 self._prepare_program() 440 return self._kernels[name] 441 442 def _prepare_program(self): 443 # type: (str) -> None 444 env = environment() 445 timestamp = generate.ocl_timestamp(self.info) 446 program = env.compile_program( 447 self.info.name, 448 self.source['opencl'], 449 self.dtype, 450 self.fast, 451 timestamp) 452 variants = ['Iq', 'Iqxy', 'Imagnetic'] 453 names = [generate.kernel_name(self.info, k) for k in variants] 454 functions = [getattr(program, k) for k in names] 455 self._kernels = {k: v for k, v in zip(variants, functions)} 456 # Keep a handle to program so GC doesn't collect. 457 self._program = program 458 459 460 # TODO: Check that we don't need a destructor for buffers which go out of scope. 475 461 class GpuInput(object): 476 462 """ … … 494 480 def __init__(self, q_vectors, dtype=generate.F32): 495 481 # type: (List[np.ndarray], np.dtype) -> None 496 # TODO: do we ever need double precision q?482 # TODO: Do we ever need double precision q? 497 483 self.nq = q_vectors[0].size 498 484 self.dtype = np.dtype(dtype) 499 485 self.is_2d = (len(q_vectors) == 2) 500 # TODO: stretch input based on get_warp()501 # not doing it now since warp depends on kernel, which is not known486 # TODO: Stretch input based on get_warp(). 487 # Not doing it now since warp depends on kernel, which is not known 502 488 # at this point, so instead using 32, which is good on the set of 503 489 # architectures tested so far. … … 512 498 self.q[:self.nq] = q_vectors[0] 513 499 self.global_size = [self.q.shape[0]] 514 self._cache_key = unique_id() 515 516 @property 517 def q_b(self): 518 """Lazy creation of q buffer so it can survive context reset""" 500 #print("creating inputs of size", self.global_size) 501 502 # Transfer input value to GPU. 519 503 env = environment() 520 key = self._cache_key 521 if key not in env.cache: 522 context = env.context[self.dtype] 523 #print("creating inputs of size", self.global_size) 524 buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 525 hostbuf=self.q) 526 env.cache[key] = buffer 527 return env.cache[key] 504 context = env.context[self.dtype] 505 self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 506 hostbuf=self.q) 528 507 529 508 def release(self): 530 509 # type: () -> None 531 510 """ 532 Free the buffer associated with the q value 533 """ 534 environment().free_buffer(id(self)) 511 Free the buffer associated with the q value. 512 """ 513 if self.q_b is not None: 514 self.q_b.release() 515 self.q_b = None 535 516 536 517 def __del__(self): … … 538 519 self.release() 539 520 521 540 522 class GpuKernel(Kernel): 541 523 """ … … 544 526 *model* is the GpuModel object to call 545 527 546 The following attributes are defined: 547 548 *info* is the module information 549 550 *dtype* is the kernel precision 551 552 *dim* is '1d' or '2d' 553 554 *result* is a vector to contain the results of the call 555 556 The resulting call method takes the *pars*, a list of values for 557 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 558 vectors for the polydisperse parameters. *cutoff* determines the 559 integration limits: any points with combined weight less than *cutoff* 560 will not be calculated. 528 The kernel is derived from :class:`Kernel`, providing the 529 :meth:`call_kernel` method to evaluate the kernel for a given set of 530 parameters. Because of the need to move the q values to the GPU before 531 evaluation, the kernel is instantiated for a particular set of q vectors, 532 and can be called many times without transfering q each time. 561 533 562 534 Call :meth:`release` when done with the kernel instance. 563 535 """ 536 #: SAS model information structure. 537 info = None # type: ModelInfo 538 #: Kernel precision. 539 dtype = None # type: np.dtype 540 #: Kernel dimensions (1d or 2d). 541 dim = "" # type: str 542 #: Calculation results, updated after each call to :meth:`_call_kernel`. 543 result = None # type: np.ndarray 544 564 545 def __init__(self, model, q_vectors): 565 # type: ( cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None546 # type: (GpuModel, List[np.ndarray]) -> None 566 547 dtype = model.dtype 567 548 self.q_input = GpuInput(q_vectors, dtype) 568 549 self._model = model 569 # F16 isn't sufficient, so don't support it 570 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 571 self._cache_key = unique_id() 572 573 # attributes accessed from the outside 550 551 # Attributes accessed from the outside. 574 552 self.dim = '2d' if self.q_input.is_2d else '1d' 575 553 self.info = model.info 576 self.dtype = model.dtype 577 578 # holding place for the returned value 554 self.dtype = dtype 555 556 # Converter to translate input to target type. 557 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 558 559 # Holding place for the returned value. 579 560 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 580 extra_q = 4 # total weight, form volume, shell volume and R_eff 581 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 582 583 @property 584 def _result_b(self): 585 """Lazy creation of result buffer so it can survive context reset""" 561 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 562 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 563 564 # Allocate result value on GPU. 586 565 env = environment() 587 key = self._cache_key 588 if key not in env.cache: 589 context = env.context[self.dtype] 590 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 591 buffer = cl.Buffer(context, mf.READ_WRITE, width) 592 env.cache[key] = buffer 593 return env.cache[key] 594 595 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 596 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 566 context = env.context[self.dtype] 567 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 568 self._result_b = cl.Buffer(context, mf.READ_WRITE, width) 569 570 def _call_kernel(self, call_details, values, cutoff, magnetic, 571 effective_radius_type): 572 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 597 573 env = environment() 598 574 queue = env.queue[self._model.dtype] 599 575 context = queue.context 600 576 601 # Arrange data transfer to/from card 602 q_b = self.q_input.q_b 603 result_b = self._result_b 577 # Arrange data transfer to card. 604 578 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 605 579 hostbuf=call_details.buffer) … … 607 581 hostbuf=values) 608 582 583 # Setup kernel function and arguments. 609 584 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 610 kernel = self._model. fetch_kernel(name)585 kernel = self._model.get_function(name) 611 586 kernel_args = [ 612 np.uint32(self.q_input.nq), None, None, 613 details_b, values_b, q_b, result_b, 614 self._as_dtype(cutoff), 615 np.uint32(effective_radius_type), 587 np.uint32(self.q_input.nq), # Number of inputs. 588 None, # Placeholder for pd_start. 589 None, # Placeholder for pd_stop. 590 details_b, # Problem definition. 591 values_b, # Parameter values. 592 self.q_input.q_b, # Q values. 593 self._result_b, # Result storage. 594 self._as_dtype(cutoff), # Probability cutoff. 595 np.uint32(effective_radius_type), # R_eff mode. 616 596 ] 597 598 # Call kernel and retrieve results. 617 599 #print("Calling OpenCL") 618 600 #call_details.show(values) 619 #Call kernel and retrieve results620 601 wait_for = None 621 602 last_nap = clock() … … 628 609 *kernel_args, wait_for=wait_for)] 629 610 if stop < call_details.num_eval: 630 # Allow other processes to run 611 # Allow other processes to run. 631 612 wait_for[0].wait() 632 613 current_time = clock() … … 634 615 time.sleep(0.001) 635 616 last_nap = current_time 636 cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for)617 cl.enqueue_copy(queue, self.result, self._result_b, wait_for=wait_for) 637 618 #print("result", self.result) 638 619 639 # Free buffers 640 for v in (details_b, values_b): 641 if v is not None: 642 v.release() 620 # Free buffers. 621 details_b.release() 622 values_b.release() 643 623 644 624 def release(self): … … 647 627 Release resources associated with the kernel. 648 628 """ 649 environment().free_buffer(id(self))650 629 self.q_input.release() 630 if self._result_b is not None: 631 self._result_b.release() 632 self._result_b = None 651 633 652 634 def __del__(self): -
sasmodels/kernelcuda.py
rf872fd1 rfa26e78 63 63 import time 64 64 import re 65 import atexit 65 66 66 67 import numpy as np # type: ignore 67 68 68 69 69 # Attempt to setup cuda. This may fail if the pycuda package is not70 # Attempt to setup CUDA. This may fail if the pycuda package is not 70 71 # installed or if it is installed but there are no devices available. 71 72 try: … … 107 108 MAX_LOOPS = 2048 108 109 110 109 111 def use_cuda(): 110 env = os.environ.get("SAS_OPENCL", "").lower() 111 return HAVE_CUDA and (env == "" or env.startswith("cuda")) 112 sas_opencl = os.environ.get("SAS_OPENCL", "CUDA").lower() 113 return HAVE_CUDA and sas_opencl.startswith("cuda") 114 112 115 113 116 ENV = None … … 121 124 ENV.release() 122 125 ENV = GpuEnvironment() if use_cuda() else None 126 123 127 124 128 def environment(): … … 138 142 return ENV 139 143 144 145 # PyTest is not freeing ENV, so make sure it gets freed. 146 atexit.register(lambda: ENV.release() if ENV is not None else None) 147 148 140 149 def has_type(dtype): 141 150 # type: (np.dtype) -> bool … … 143 152 Return true if device supports the requested precision. 144 153 """ 145 # Assume the nvidiacard supports 32-bit and 64-bit floats.146 # TODO: check if pycuda support F16154 # Assume the NVIDIA card supports 32-bit and 64-bit floats. 155 # TODO: Check if pycuda support F16. 147 156 return dtype in (generate.F32, generate.F64) 148 157 149 158 150 159 FUNCTION_PATTERN = re.compile(r"""^ 151 (?P<space>\s*) # initial space152 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # one or more qualifiers before function153 (?P<function>\s*\b\w+\b\s*[(]) # function name plus open parens160 (?P<space>\s*) # Initial space. 161 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # One or more qualifiers before function. 162 (?P<function>\s*\b\w+\b\s*[(]) # Function name plus open parens. 154 163 """, re.VERBOSE|re.MULTILINE) 155 164 … … 158 167 """, re.VERBOSE|re.MULTILINE) 159 168 169 160 170 def _add_device_tag(match): 161 171 # type: (None) -> str 162 # Note: should be re.Match, but that isn't a simple type172 # Note: Should be re.Match, but that isn't a simple type. 163 173 """ 164 174 replace qualifiers with __device__ qualifiers if needed … … 173 183 return "".join((space, "__device__ ", qualifiers, function)) 174 184 185 175 186 def mark_device_functions(source): 176 187 # type: (str) -> str … … 179 190 """ 180 191 return FUNCTION_PATTERN.sub(_add_device_tag, source) 192 181 193 182 194 def show_device_functions(source): … … 188 200 print(match.group('qualifiers').replace('\n',r'\n'), match.group('function'), '(') 189 201 return source 202 190 203 191 204 def compile_model(source, dtype, fast=False): … … 212 225 #options = ['--verbose', '-E'] 213 226 options = ['--use_fast_math'] if fast else None 214 program = SourceModule(source, no_extern_c=True, options=options) # include_dirs=[...]227 program = SourceModule(source, no_extern_c=True, options=options) #, include_dirs=[...]) 215 228 216 229 #print("done with "+program) … … 218 231 219 232 220 # for now, this returns one device in the context221 # TODO: create a context that contains all devices on all platforms233 # For now, this returns one device in the context. 234 # TODO: Create a context that contains all devices on all platforms. 222 235 class GpuEnvironment(object): 223 236 """ 224 GPU context , with possibly many devices, and one queue per device.237 GPU context for CUDA. 225 238 """ 226 239 context = None # type: cuda.Context 227 240 def __init__(self, devnum=None): 228 241 # type: (int) -> None 229 # Byte boundary for data alignment230 #self.data_boundary = max(d.min_data_type_align_size231 # for d in self.context.devices)232 self.compiled = {}233 242 env = os.environ.get("SAS_OPENCL", "").lower() 234 243 if devnum is None and env.startswith("cuda:"): 235 244 devnum = int(env[5:]) 245 236 246 # Set the global context to the particular device number if one is 237 247 # given, otherwise use the default context. Perhaps this will be set … … 242 252 self.context = make_default_context() 243 253 254 ## Byte boundary for data alignment. 255 #self.data_boundary = max(d.min_data_type_align_size 256 # for d in self.context.devices) 257 258 # Cache for compiled programs, and for items in context. 259 self.compiled = {} 260 244 261 def release(self): 245 262 if self.context is not None: … … 262 279 Compile the program for the device in the given context. 263 280 """ 264 # Note: PyOpenCL caches based on md5 hash of source, options and device 265 # so we don't really need to cache things for ourselves. I'll do so 266 # anyway just to save some data munging time. 281 # Note: PyCuda (probably) caches but I'll do so as well just to 282 # save some data munging time. 267 283 tag = generate.tag_source(source) 268 284 key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else "")) 269 # Check timestamp on program 285 # Check timestamp on program. 270 286 program, program_timestamp = self.compiled.get(key, (None, np.inf)) 271 287 if program_timestamp < timestamp: … … 277 293 return program 278 294 295 279 296 class GpuModel(KernelModel): 280 297 """ … … 292 309 that the compiler is allowed to take shortcuts. 293 310 """ 294 info = None # type: ModelInfo295 source = "" # type: str296 dtype = None # type: np.dtype297 fast = False # type: bool298 program = None# type: SourceModule299 _kernels = None # type: List[cuda.Function]311 info = None # type: ModelInfo 312 source = "" # type: str 313 dtype = None # type: np.dtype 314 fast = False # type: bool 315 _program = None # type: SourceModule 316 _kernels = None # type: Dict[str, cuda.Function] 300 317 301 318 def __init__(self, source, model_info, dtype=generate.F32, fast=False): … … 305 322 self.dtype = dtype 306 323 self.fast = fast 307 self.program = None # delay program creation308 self._kernels = None309 324 310 325 def __getstate__(self): … … 315 330 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 316 331 self.info, self.source, self.dtype, self.fast = state 317 self. program= None332 self._program = self._kernels = None 318 333 319 334 def make_kernel(self, q_vectors): 320 335 # type: (List[np.ndarray]) -> "GpuKernel" 321 if self.program is None: 322 compile_program = environment().compile_program 323 timestamp = generate.ocl_timestamp(self.info) 324 self.program = compile_program( 325 self.info.name, 326 self.source['opencl'], 327 self.dtype, 328 self.fast, 329 timestamp) 330 variants = ['Iq', 'Iqxy', 'Imagnetic'] 331 names = [generate.kernel_name(self.info, k) for k in variants] 332 kernels = [self.program.get_function(k) for k in names] 333 self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 334 is_2d = len(q_vectors) == 2 335 if is_2d: 336 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 337 else: 338 kernel = [self._kernels['Iq']]*2 339 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 340 341 def release(self): 342 # type: () -> None 343 """ 344 Free the resources associated with the model. 345 """ 346 if self.program is not None: 347 self.program = None 348 349 def __del__(self): 350 # type: () -> None 351 self.release() 352 353 # TODO: check that we don't need a destructor for buffers which go out of scope 336 return GpuKernel(self, q_vectors) 337 338 def get_function(self, name): 339 # type: (str) -> cuda.Function 340 """ 341 Fetch the kernel from the environment by name, compiling it if it 342 does not already exist. 343 """ 344 if self._program is None: 345 self._prepare_program() 346 return self._kernels[name] 347 348 def _prepare_program(self): 349 # type: (str) -> None 350 env = environment() 351 timestamp = generate.ocl_timestamp(self.info) 352 program = env.compile_program( 353 self.info.name, 354 self.source['opencl'], 355 self.dtype, 356 self.fast, 357 timestamp) 358 variants = ['Iq', 'Iqxy', 'Imagnetic'] 359 names = [generate.kernel_name(self.info, k) for k in variants] 360 functions = [program.get_function(k) for k in names] 361 self._kernels = {k: v for k, v in zip(variants, functions)} 362 # Keep a handle to program so GC doesn't collect. 363 self._program = program 364 365 366 # TODO: Check that we don't need a destructor for buffers which go out of scope. 354 367 class GpuInput(object): 355 368 """ … … 373 386 def __init__(self, q_vectors, dtype=generate.F32): 374 387 # type: (List[np.ndarray], np.dtype) -> None 375 # TODO: do we ever need double precision q?388 # TODO: Do we ever need double precision q? 376 389 self.nq = q_vectors[0].size 377 390 self.dtype = np.dtype(dtype) 378 391 self.is_2d = (len(q_vectors) == 2) 379 # TODO: stretch input based on get_warp()380 # not doing it now since warp depends on kernel, which is not known392 # TODO: Stretch input based on get_warp(). 393 # Not doing it now since warp depends on kernel, which is not known 381 394 # at this point, so instead using 32, which is good on the set of 382 395 # architectures tested so far. 383 396 if self.is_2d: 384 # Note: 16 rather than 15 because result is 1 longer than input. 385 width = ((self.nq+16)//16)*16 397 width = ((self.nq+15)//16)*16 386 398 self.q = np.empty((width, 2), dtype=dtype) 387 399 self.q[:self.nq, 0] = q_vectors[0] 388 400 self.q[:self.nq, 1] = q_vectors[1] 389 401 else: 390 # Note: 32 rather than 31 because result is 1 longer than input. 391 width = ((self.nq+32)//32)*32 402 width = ((self.nq+31)//32)*32 392 403 self.q = np.empty(width, dtype=dtype) 393 404 self.q[:self.nq] = q_vectors[0] 394 405 self.global_size = [self.q.shape[0]] 395 406 #print("creating inputs of size", self.global_size) 407 408 # Transfer input value to GPU. 396 409 self.q_b = cuda.to_device(self.q) 397 410 … … 399 412 # type: () -> None 400 413 """ 401 Free the memory.414 Free the buffer associated with the q value. 402 415 """ 403 416 if self.q_b is not None: … … 409 422 self.release() 410 423 424 411 425 class GpuKernel(Kernel): 412 426 """ 413 427 Callable SAS kernel. 414 428 415 *kernel* is the GpuKernel object to call 416 417 *model_info* is the module information 418 419 *q_vectors* is the q vectors at which the kernel should be evaluated 420 421 *dtype* is the kernel precision 422 423 The resulting call method takes the *pars*, a list of values for 424 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 425 vectors for the polydisperse parameters. *cutoff* determines the 426 integration limits: any points with combined weight less than *cutoff* 427 will not be calculated. 429 *model* is the GpuModel object to call 430 431 The kernel is derived from :class:`Kernel`, providing the 432 :meth:`call_kernel` method to evaluate the kernel for a given set of 433 parameters. Because of the need to move the q values to the GPU before 434 evaluation, the kernel is instantiated for a particular set of q vectors, 435 and can be called many times without transfering q each time. 428 436 429 437 Call :meth:`release` when done with the kernel instance. 430 438 """ 431 def __init__(self, kernel, dtype, model_info, q_vectors): 432 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 439 #: SAS model information structure. 440 info = None # type: ModelInfo 441 #: Kernel precision. 442 dtype = None # type: np.dtype 443 #: Kernel dimensions (1d or 2d). 444 dim = "" # type: str 445 #: Calculation results, updated after each call to :meth:`_call_kernel`. 446 result = None # type: np.ndarray 447 448 def __init__(self, model, q_vectors): 449 # type: (GpuModel, List[np.ndarray]) -> None 450 dtype = model.dtype 433 451 self.q_input = GpuInput(q_vectors, dtype) 434 self.kernel = kernel 435 # F16 isn't sufficient, so don't support it 452 self._model = model 453 454 # Attributes accessed from the outside. 455 self.dim = '2d' if self.q_input.is_2d else '1d' 456 self.info = model.info 457 self.dtype = dtype 458 459 # Converter to translate input to target type. 436 460 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 437 461 438 # attributes accessed from the outside 439 self.dim = '2d' if self.q_input.is_2d else '1d' 440 self.info = model_info 441 self.dtype = dtype 442 443 # holding place for the returned value 462 # Holding place for the returned value. 444 463 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 445 extra_q = 4 # total weight, form volume, shell volume and R_eff 446 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 447 448 # Inputs and outputs for each kernel call 449 # Note: res may be shorter than res_b if global_size != nq 464 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 465 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 466 467 # Allocate result value on GPU. 450 468 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 451 self.result_b = cuda.mem_alloc(width) 452 self._need_release = [self.result_b] 453 454 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 455 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 456 # Arrange data transfer to card 469 self._result_b = cuda.mem_alloc(width) 470 471 def _call_kernel(self, call_details, values, cutoff, magnetic, 472 effective_radius_type): 473 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 474 475 # Arrange data transfer to card. 457 476 details_b = cuda.to_device(call_details.buffer) 458 477 values_b = cuda.to_device(values) 459 478 460 kernel = self.kernel[1 if magnetic else 0] 461 args = [ 462 np.uint32(self.q_input.nq), None, None, 463 details_b, values_b, self.q_input.q_b, self.result_b, 464 self._as_dtype(cutoff), 465 np.uint32(effective_radius_type), 479 # Setup kernel function and arguments. 480 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 481 kernel = self._model.get_function(name) 482 kernel_args = [ 483 np.uint32(self.q_input.nq), # Number of inputs. 484 None, # Placeholder for pd_start. 485 None, # Placeholder for pd_stop. 486 details_b, # Problem definition. 487 values_b, # Parameter values. 488 self.q_input.q_b, # Q values. 489 self._result_b, # Result storage. 490 self._as_dtype(cutoff), # Probability cutoff. 491 np.uint32(effective_radius_type), # R_eff mode. 466 492 ] 467 493 grid = partition(self.q_input.nq) 468 #print("Calling OpenCL") 494 495 # Call kernel and retrieve results. 496 #print("Calling CUDA") 469 497 #call_details.show(values) 470 # Call kernel and retrieve results471 498 last_nap = time.clock() 472 499 step = 100000000//self.q_input.nq + 1 … … 475 502 stop = min(start + step, call_details.num_eval) 476 503 #print("queuing",start,stop) 477 args[1:3] = [np.int32(start), np.int32(stop)]478 kernel(* args, **grid)504 kernel_args[1:3] = [np.int32(start), np.int32(stop)] 505 kernel(*kernel_args, **grid) 479 506 if stop < call_details.num_eval: 480 507 sync() 481 # Allow other processes to run 508 # Allow other processes to run. 482 509 current_time = time.clock() 483 510 if current_time - last_nap > 0.5: … … 485 512 last_nap = current_time 486 513 sync() 487 cuda.memcpy_dtoh(self.result, self. result_b)514 cuda.memcpy_dtoh(self.result, self._result_b) 488 515 #print("result", self.result) 489 516 … … 496 523 Release resources associated with the kernel. 497 524 """ 498 for p in self._need_release: 499 p.free() 500 self._need_release = [] 525 self.q_input.release() 526 if self._result_b is not None: 527 self._result_b.free() 528 self._result_b = None 501 529 502 530 def __del__(self): … … 512 540 Note: Maybe context.synchronize() is sufficient. 513 541 """ 514 #return # The following works in C++; don't know what pycuda is doing 515 # Create an event with which to synchronize 542 # Create an event with which to synchronize. 516 543 done = cuda.Event() 517 544 … … 519 546 done.record() 520 547 521 # line added to not hog resources548 # Make sure we don't hog resource while waiting to sync. 522 549 while not done.query(): 523 550 time.sleep(0.01) … … 525 552 # Block until the GPU executes the kernel. 526 553 done.synchronize() 554 527 555 # Clean up the event; I don't think they can be reused. 528 556 del done -
sasmodels/kerneldll.py
re44432d r3199b17 100 100 # pylint: enable=unused-import 101 101 102 # Compiler output is a byte stream that needs to be decode in python 3 102 # Compiler output is a byte stream that needs to be decode in python 3. 103 103 decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8')) 104 104 … … 115 115 COMPILER = "tinycc" 116 116 elif "VCINSTALLDIR" in os.environ: 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment 118 # and we can use the MSVC compiler. Otherwise, if tinycc is available 119 # the use it. Otherwise, hope that mingw is available. 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the 118 # environment and we can use the MSVC compiler. Otherwise, if 119 # tinycc is available then use it. Otherwise, hope that mingw 120 # is available. 120 121 COMPILER = "msvc" 121 122 else: … … 124 125 COMPILER = "unix" 125 126 126 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 127 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86. 127 128 if COMPILER == "unix": 128 # Generic unix compile 129 # On mac users will need the X code command line tools installed129 # Generic unix compile. 130 # On Mac users will need the X code command line tools installed. 130 131 #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp" 131 132 CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split() 132 # add openmp support if not running on a mac133 # Add OpenMP support if not running on a Mac. 133 134 if sys.platform != "darwin": 134 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) 135 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9). 135 136 # Shut it off for all unix until we can investigate. 136 137 #CC.append("-fopenmp") … … 144 145 # vcomp90.dll on the path. One may be found here: 145 146 # C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll 146 # Copy this to the python directory and uncomment the OpenMP COMPILE 147 # TODO: remove intermediate OBJ file created in the directory148 # TODO: maybe don't use randomized name for the c file149 # TODO: maybe ask distutils to find MSVC147 # Copy this to the python directory and uncomment the OpenMP COMPILE. 148 # TODO: Remove intermediate OBJ file created in the directory. 149 # TODO: Maybe don't use randomized name for the c file. 150 # TODO: Maybe ask distutils to find MSVC. 150 151 CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split() 151 152 if "SAS_OPENMP" in os.environ: … … 172 173 ALLOW_SINGLE_PRECISION_DLLS = True 173 174 175 174 176 def compile(source, output): 175 177 # type: (str, str) -> None … … 183 185 logging.info(command_str) 184 186 try: 185 # need shell=True on windows to keep console box from popping up187 # Need shell=True on windows to keep console box from popping up. 186 188 shell = (os.name == 'nt') 187 189 subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT) … … 192 194 raise RuntimeError("compile failed. File is in %r"%source) 193 195 196 194 197 def dll_name(model_info, dtype): 195 198 # type: (ModelInfo, np.dtype) -> str … … 202 205 basename += ARCH + ".so" 203 206 204 # Hack to find precompiled dlls 207 # Hack to find precompiled dlls. 205 208 path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename) 206 209 if os.path.exists(path): … … 242 245 raise ValueError("16 bit floats not supported") 243 246 if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS: 244 dtype = F64 # Force 64-bit dll 245 # Note: dtype may be F128 for long double precision 247 dtype = F64 # Force 64-bit dll. 248 # Note: dtype may be F128 for long double precision. 246 249 247 250 dll = dll_path(model_info, dtype) … … 254 257 need_recompile = dll_time < newest_source 255 258 if need_recompile: 256 # Make sure the DLL path exists 259 # Make sure the DLL path exists. 257 260 if not os.path.exists(SAS_DLL_PATH): 258 261 os.makedirs(SAS_DLL_PATH) … … 263 266 file_handle.write(source) 264 267 compile(source=filename, output=dll) 265 # comment the following to keep the generated c file266 # Note: if there is a syntax error then compile raises an error268 # Comment the following to keep the generated C file. 269 # Note: If there is a syntax error then compile raises an error 267 270 # and the source file will not be deleted. 268 271 os.unlink(filename) … … 303 306 self.dllpath = dllpath 304 307 self._dll = None # type: ct.CDLL 305 self._kernels = None # type: List[Callable, Callable]308 self._kernels = None # type: List[Callable, Callable] 306 309 self.dtype = np.dtype(dtype) 307 310 … … 338 341 # type: (List[np.ndarray]) -> DllKernel 339 342 q_input = PyInput(q_vectors, self.dtype) 340 # Note: pickle not supported for DllKernel343 # Note: DLL is lazy loaded. 341 344 if self._dll is None: 342 345 self._load_dll() … … 358 361 self._dll = None 359 362 363 360 364 class DllKernel(Kernel): 361 365 """ … … 379 383 def __init__(self, kernel, model_info, q_input): 380 384 # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None 381 #,model_info,q_input) 385 dtype = q_input.dtype 386 self.q_input = q_input 382 387 self.kernel = kernel 388 389 # Attributes accessed from the outside. 390 self.dim = '2d' if q_input.is_2d else '1d' 383 391 self.info = model_info 384 self.q_input = q_input 385 self.dtype = q_input.dtype 386 self.dim = '2d' if q_input.is_2d else '1d' 387 # leave room for f1/f2 results in case we need to compute beta for 1d models 392 self.dtype = dtype 393 394 # Converter to translate input to target type. 395 self._as_dtype = (np.float32 if dtype == generate.F32 396 else np.float64 if dtype == generate.F64 397 else np.float128) 398 399 # Holding place for the returned value. 388 400 nout = 2 if self.info.have_Fq else 1 389 # +4 for total weight, shell volume, effective radius, form volume390 self.result = np.empty( q_input.nq*nout + 4, self.dtype)391 self.real = (np.float32 if self.q_input.dtype == generate.F32 392 else np.float64 if self.q_input.dtype == generate.F64393 e lse np.float128)394 395 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 396 # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray401 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 402 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 403 404 def _call_kernel(self, call_details, values, cutoff, magnetic, 405 effective_radius_type): 406 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 407 408 # Setup kernel function and arguments. 397 409 kernel = self.kernel[1 if magnetic else 0] 398 args = [399 self.q_input.nq, # nq400 None, # pd_start401 None, # pd_stop pd_stride[MAX_PD]402 call_details.buffer.ctypes.data, # problem403 values.ctypes.data, # pars404 self.q_input.q.ctypes.data, # q405 self.result.ctypes.data, # results406 self. real(cutoff), # cutoff407 effective_radius_type, # cutoff410 kernel_args = [ 411 self.q_input.nq, # Number of inputs. 412 None, # Placeholder for pd_start. 413 None, # Placeholder for pd_stop. 414 call_details.buffer.ctypes.data, # Problem definition. 415 values.ctypes.data, # Parameter values. 416 self.q_input.q.ctypes.data, # Q values. 417 self.result.ctypes.data, # Result storage. 418 self._as_dtype(cutoff), # Probability cutoff. 419 effective_radius_type, # R_eff mode. 408 420 ] 421 422 # Call kernel and retrieve results. 409 423 #print("Calling DLL") 410 424 #call_details.show(values) 411 425 step = 100 426 # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops? 412 427 for start in range(0, call_details.num_eval, step): 413 428 stop = min(start + step, call_details.num_eval) 414 args[1:3] = [start, stop]415 kernel(* args) # type: ignore429 kernel_args[1:3] = [start, stop] 430 kernel(*kernel_args) # type: ignore 416 431 417 432 def release(self): 418 433 # type: () -> None 419 434 """ 420 Release anyresources associated with the kernel.435 Release resources associated with the kernel. 421 436 """ 422 self.q_input.release() 437 # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release. 438 # Should we be doing the same for DLL? 439 #self.q_input.release() 440 pass 441 442 def __del__(self): 443 # type: () -> None 444 self.release() -
sasmodels/kernelpy.py
raa8c6e0 r3199b17 33 33 logger = logging.getLogger(__name__) 34 34 35 35 36 class PyModel(KernelModel): 36 37 """ … … 38 39 """ 39 40 def __init__(self, model_info): 40 # Make sure Iq is available and vectorized 41 # Make sure Iq is available and vectorized. 41 42 _create_default_functions(model_info) 42 43 self.info = model_info … … 53 54 """ 54 55 pass 56 55 57 56 58 class PyInput(object): … … 91 93 self.q = None 92 94 95 93 96 class PyKernel(Kernel): 94 97 """ … … 131 134 parameter_vector = np.empty(len(partable.call_parameters)-2, 'd') 132 135 133 # Create views into the array to hold the arguments 136 # Create views into the array to hold the arguments. 134 137 offset = 0 135 138 kernel_args, volume_args = [], [] … … 174 177 else (lambda mode: 1.0)) 175 178 176 177 178 179 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 179 180 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray … … 195 196 self.q_input.release() 196 197 self.q_input = None 198 197 199 198 200 def _loops(parameters, # type: np.ndarray … … 254 256 total = np.zeros(nq, 'd') 255 257 for loop_index in range(call_details.num_eval): 256 # update polydispersity parameter values258 # Update polydispersity parameter values. 257 259 if p0_index == p0_length: 258 260 pd_index = (loop_index//pd_stride)%pd_length … … 265 267 p0_index += 1 266 268 if weight > cutoff: 267 # Call the scattering function 269 # Call the scattering function. 268 270 # Assume that NaNs are only generated if the parameters are bad; 269 271 # exclude all q for that NaN. Even better would be to have an … … 273 275 continue 274 276 275 # update value and norm277 # Update value and norm. 276 278 total += weight * Iq 277 279 weight_norm += weight … … 293 295 any functions that are not already marked as vectorized. 294 296 """ 295 # Note: must call create_vector_Iq before create_vector_Iqxy297 # Note: Must call create_vector_Iq before create_vector_Iqxy. 296 298 _create_vector_Iq(model_info) 297 299 _create_vector_Iqxy(model_info) -
sasmodels/model_test.py
r5024a56 r00afc15 167 167 # test using cuda if desired and available 168 168 if 'cuda' in loaders and use_cuda(): 169 test_name = "%s-cuda" %model_name169 test_name = "%s-cuda" % model_info.id 170 170 test_method_name = "test_%s_cuda" % model_info.id 171 171 # Using dtype=None so that the models that are only -
sasmodels/models/rpa.c
r71b751d r19dc29e7 25 25 double S0ba,Pbb,S0bb,Pbc,S0bc,Pbd,S0bd; 26 26 double S0ca,S0cb,Pcc,S0cc,Pcd,S0cd; 27 double S0da,S0db,S0dc;27 //double S0da,S0db,S0dc; 28 28 double Pdd,S0dd; 29 29 double Kaa,Kbb,Kcc; 30 30 double Kba,Kca,Kcb; 31 double Kda,Kdb,Kdc,Kdd;31 //double Kda,Kdb,Kdc,Kdd; 32 32 double Zaa,Zab,Zac,Zba,Zbb,Zbc,Zca,Zcb,Zcc; 33 33 double DenT,T11,T12,T13,T21,T22,T23,T31,T32,T33; … … 36 36 double N11,N12,N13,N21,N22,N23,N31,N32,N33; 37 37 double M11,M12,M13,M21,M22,M23,M31,M32,M33; 38 double S11,S12,S13,S14,S21,S22,S23,S24; 39 double S31,S32,S33,S34,S41,S42,S43,S44; 38 double S11,S12,S22,S23,S13,S33; 39 //double S21,S31,S32,S44; 40 //double S14,S24,S34,S41,S42,S43; 40 41 double Lad,Lbd,Lcd,Nav,Intg; 41 42 … … 115 116 S0cd=(Phicd*vcd*Ncd)*Pcd; 116 117 117 S0da=S0ad;118 S0db=S0bd;119 S0dc=S0cd;118 //S0da=S0ad; 119 //S0db=S0bd; 120 //S0dc=S0cd; 120 121 Pdd=2.0*(exp(-Xd)-1.0+Xd)/(Xd*Xd); // free D chain 121 122 S0dd=N[3]*Phi[3]*v[3]*Pdd; … … 198 199 S0ca=S0ac; 199 200 S0cb=S0bc; 200 S0da=S0ad;201 S0db=S0bd;202 S0dc=S0cd;201 //S0da=S0ad; 202 //S0db=S0bd; 203 //S0dc=S0cd; 203 204 204 205 // self chi parameter is 0 ... of course … … 206 207 Kbb=0.0; 207 208 Kcc=0.0; 208 Kdd=0.0;209 //Kdd=0.0; 209 210 210 211 Kba=Kab; 211 212 Kca=Kac; 212 213 Kcb=Kbc; 213 Kda=Kad;214 Kdb=Kbd;215 Kdc=Kcd;214 //Kda=Kad; 215 //Kdb=Kbd; 216 //Kdc=Kcd; 216 217 217 218 Zaa=Kaa-Kad-Kad; … … 294 295 S12= Q12*S0aa + Q22*S0ab + Q32*S0ac; 295 296 S13= Q13*S0aa + Q23*S0ab + Q33*S0ac; 296 S14=-S11-S12-S13;297 S21= Q11*S0ba + Q21*S0bb + Q31*S0bc;298 297 S22= Q12*S0ba + Q22*S0bb + Q32*S0bc; 299 298 S23= Q13*S0ba + Q23*S0bb + Q33*S0bc; 300 S24=-S21-S22-S23;301 S31= Q11*S0ca + Q21*S0cb + Q31*S0cc;302 S32= Q12*S0ca + Q22*S0cb + Q32*S0cc;303 299 S33= Q13*S0ca + Q23*S0cb + Q33*S0cc; 304 S34=-S31-S32-S33; 305 S41=S14; 306 S42=S24; 307 S43=S34; 308 S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23; 300 //S21= Q11*S0ba + Q21*S0bb + Q31*S0bc; 301 //S31= Q11*S0ca + Q21*S0cb + Q31*S0cc; 302 //S32= Q12*S0ca + Q22*S0cb + Q32*S0cc; 303 //S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23; 304 //S14=-S11-S12-S13; 305 //S24=-S21-S22-S23; 306 //S34=-S31-S32-S33; 307 //S41=S14; 308 //S42=S24; 309 //S43=S34; 309 310 310 311 //calculate contrast where L[i] is the scattering length of i and D is the matrix -
sasmodels/resolution.py
re2592f0 r0d26e91 498 498 if q_min < 0: 499 499 q_min = q[0]*MINIMUM_ABSOLUTE_Q 500 n_low = log_delta_q * (log(q[0])-log(q_min))501 q_low = np.logspace(log10(q_min), log10(q[0]), int(np.ceil(n_low))+1)[:-1]500 n_low = int(np.ceil(log_delta_q * (log(q[0])-log(q_min)))) 501 q_low = np.logspace(log10(q_min), log10(q[0]), n_low+1)[:-1] 502 502 else: 503 503 q_low = [] 504 504 if q_max > q[-1]: 505 n_high = log_delta_q * (log(q_max)-log(q[-1]))506 q_high = np.logspace(log10(q[-1]), log10(q_max), int(np.ceil(n_high))+1)[1:]505 n_high = int(np.ceil(log_delta_q * (log(q_max)-log(q[-1])))) 506 q_high = np.logspace(log10(q[-1]), log10(q_max), n_high+1)[1:] 507 507 else: 508 508 q_high = []
Note: See TracChangeset
for help on using the changeset viewer.