Changes in / [15f5138:9150036] in sasmodels
- Location:
- sasmodels
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel.py
r3199b17 rcd28947 23 23 # pylint: enable=unused-import 24 24 25 26 25 class KernelModel(object): 27 26 info = None # type: ModelInfo … … 34 33 # type: () -> None 35 34 pass 36 37 35 38 36 class Kernel(object): -
sasmodels/kernelcl.py
r3199b17 rf872fd1 69 69 import numpy as np # type: ignore 70 70 71 # Attempt to setup OpenCL. This may fail if the pyopencl package is not71 # Attempt to setup opencl. This may fail if the pyopencl package is not 72 72 # installed or if it is installed but there are no devices available. 73 73 try: … … 75 75 from pyopencl import mem_flags as mf 76 76 from pyopencl.characterize import get_fast_inaccurate_build_options 77 # Ask OpenCL for the default context so that we know that one exists .77 # Ask OpenCL for the default context so that we know that one exists 78 78 cl.create_some_context(interactive=False) 79 79 HAVE_OPENCL = True … … 96 96 # pylint: enable=unused-import 97 97 98 99 # CRUFT: pyopencl < 2017.1 (as of June 2016 needs quotes around include path). 98 # CRUFT: pyopencl < 2017.1 (as of June 2016 needs quotes around include path) 100 99 def quote_path(v): 101 100 """ … … 108 107 return '"'+v+'"' if v and ' ' in v and not v[0] in "\"'-" else v 109 108 110 111 109 def fix_pyopencl_include(): 112 110 """ … … 115 113 import pyopencl as cl 116 114 if hasattr(cl, '_DEFAULT_INCLUDE_OPTIONS'): 117 cl._DEFAULT_INCLUDE_OPTIONS = [ 118 quote_path(v) for v in cl._DEFAULT_INCLUDE_OPTIONS 119 ] 120 115 cl._DEFAULT_INCLUDE_OPTIONS = [quote_path(v) for v in cl._DEFAULT_INCLUDE_OPTIONS] 121 116 122 117 if HAVE_OPENCL: … … 131 126 MAX_LOOPS = 2048 132 127 128 133 129 # Pragmas for enable OpenCL features. Be sure to protect them so that they 134 130 # still compile even if OpenCL is not present. … … 145 141 """ 146 142 147 148 143 def use_opencl(): 149 144 sas_opencl = os.environ.get("SAS_OPENCL", "OpenCL").lower() 150 145 return HAVE_OPENCL and sas_opencl != "none" and not sas_opencl.startswith("cuda") 151 146 152 153 147 ENV = None 154 148 def reset_environment(): … … 158 152 global ENV 159 153 ENV = GpuEnvironment() if use_opencl() else None 160 161 154 162 155 def environment(): … … 176 169 return ENV 177 170 178 179 171 def has_type(device, dtype): 180 172 # type: (cl.Device, np.dtype) -> bool … … 187 179 return "cl_khr_fp64" in device.extensions 188 180 else: 189 # Not supporting F16 type since it isn't accurate enough .181 # Not supporting F16 type since it isn't accurate enough 190 182 return False 191 192 183 193 184 def get_warp(kernel, queue): … … 199 190 cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE, 200 191 queue.device) 201 202 192 203 193 def compile_model(context, source, dtype, fast=False): … … 221 211 source_list.insert(0, _F64_PRAGMA) 222 212 223 # Note: USE_SINCOS makes the Intel CPU slower under OpenCL.213 # Note: USE_SINCOS makes the intel cpu slower under opencl 224 214 if context.devices[0].type == cl.device_type.GPU: 225 215 source_list.insert(0, "#define USE_SINCOS\n") … … 228 218 source = "\n".join(source_list) 229 219 program = cl.Program(context, source).build(options=options) 230 231 220 #print("done with "+program) 232 221 return program 233 222 234 223 235 # For now, this returns one device in the context.236 # TODO: Create a context that contains all devices on all platforms.224 # for now, this returns one device in the context 225 # TODO: create a context that contains all devices on all platforms 237 226 class GpuEnvironment(object): 238 227 """ 239 GPU context for OpenCL, with possibly many devices and one queue per device. 228 GPU context, with possibly many devices, and one queue per device. 229 230 Because the environment can be reset during a live program (e.g., if the 231 user changes the active GPU device in the GUI), everything associated 232 with the device context must be cached in the environment and recreated 233 if the environment changes. The *cache* attribute is a simple dictionary 234 which holds keys and references to objects, such as compiled kernels and 235 allocated buffers. The running program should check in the cache for 236 long lived objects and create them if they are not there. The program 237 should not hold onto cached objects, but instead only keep them active 238 for the duration of a function call. When the environment is destroyed 239 then the *release* method for each active cache item is called before 240 the environment is freed. This means that each cl buffer should be 241 in its own cache entry. 240 242 """ 241 243 def __init__(self): 242 244 # type: () -> None 243 # Find gpu context.245 # find gpu context 244 246 context_list = _create_some_context() 245 247 … … 255 257 self.context[dtype] = None 256 258 257 # Build a queue for each context .259 # Build a queue for each context 258 260 self.queue = {} 259 261 context = self.context[F32] … … 265 267 self.queue[F64] = cl.CommandQueue(context, context.devices[0]) 266 268 267 # # Byte boundary for data alignment.269 # Byte boundary for data alignment 268 270 #self.data_boundary = max(context.devices[0].min_data_type_align_size 269 271 # for context in self.context.values()) 270 272 271 # Cache for compiled programs, and for items in context .273 # Cache for compiled programs, and for items in context 272 274 self.compiled = {} 275 self.cache = {} 273 276 274 277 def has_type(self, dtype): … … 285 288 """ 286 289 # Note: PyOpenCL caches based on md5 hash of source, options and device 287 # but I'll do so as well just to save some data munging time. 290 # so we don't really need to cache things for ourselves. I'll do so 291 # anyway just to save some data munging time. 288 292 tag = generate.tag_source(source) 289 293 key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else "")) 290 # Check timestamp on program .294 # Check timestamp on program 291 295 program, program_timestamp = self.compiled.get(key, (None, np.inf)) 292 296 if program_timestamp < timestamp: … … 301 305 return program 302 306 307 def free_buffer(self, key): 308 if key in self.cache: 309 self.cache[key].release() 310 del self.cache[key] 311 312 def __del__(self): 313 for v in self.cache.values(): 314 release = getattr(v, 'release', lambda: None) 315 release() 316 self.cache = {} 317 318 _CURRENT_ID = 0 319 def unique_id(): 320 global _CURRENT_ID 321 _CURRENT_ID += 1 322 return _CURRENT_ID 303 323 304 324 def _create_some_context(): … … 313 333 which one (and not a CUDA device, or no GPU). 314 334 """ 315 # Assume we do not get here if SAS_OPENCL is None or CUDA .335 # Assume we do not get here if SAS_OPENCL is None or CUDA 316 336 sas_opencl = os.environ.get('SAS_OPENCL', 'opencl') 317 337 if sas_opencl.lower() != 'opencl': 318 # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context .338 # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 319 339 os.environ["PYOPENCL_CTX"] = sas_opencl 320 340 … … 324 344 except Exception as exc: 325 345 warnings.warn(str(exc)) 326 warnings.warn("pyopencl.create_some_context() failed. The " 327 "environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might " 328 "not be set correctly") 346 warnings.warn("pyopencl.create_some_context() failed") 347 warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly") 329 348 330 349 return _get_default_context() 331 332 350 333 351 def _get_default_context(): … … 342 360 # is running may increase throughput. 343 361 # 344 # Mac Book Pro, base install:362 # Macbook pro, base install: 345 363 # {'Apple': [Intel CPU, NVIDIA GPU]} 346 # Mac Book Pro, base install:364 # Macbook pro, base install: 347 365 # {'Apple': [Intel CPU, Intel GPU]} 348 # 2 x NVIDIA 295 with Intel and NVIDIA opencl drivers install:366 # 2 x nvidia 295 with Intel and NVIDIA opencl drivers installed 349 367 # {'Intel': [CPU], 'NVIDIA': [GPU, GPU, GPU, GPU]} 350 368 gpu, cpu = None, None … … 369 387 else: 370 388 # System has cl.device_type.ACCELERATOR or cl.device_type.CUSTOM 371 # Intel Phi for example registers as an accelerator .389 # Intel Phi for example registers as an accelerator 372 390 # Since the user installed a custom device on their system 373 391 # and went through the pain of sorting out OpenCL drivers for … … 376 394 gpu = device 377 395 378 # Order the devices by gpu then by cpu; when searching for an available396 # order the devices by gpu then by cpu; when searching for an available 379 397 # device by data type they will be checked in this order, which means 380 398 # that if the gpu supports double then the cpu will never be used (though … … 403 421 that the compiler is allowed to take shortcuts. 404 422 """ 405 info = None # type: ModelInfo406 source = "" # type: str407 dtype = None # type: np.dtype408 fast = False # type: bool409 _program = None # type: cl.Program410 _kernels = None # type: Dict[str, cl.Kernel]411 412 423 def __init__(self, source, model_info, dtype=generate.F32, fast=False): 413 424 # type: (Dict[str,str], ModelInfo, np.dtype, bool) -> None … … 416 427 self.dtype = dtype 417 428 self.fast = fast 429 self.timestamp = generate.ocl_timestamp(self.info) 430 self._cache_key = unique_id() 418 431 419 432 def __getstate__(self): … … 424 437 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 425 438 self.info, self.source, self.dtype, self.fast = state 426 self._program = self._kernels = None427 439 428 440 def make_kernel(self, q_vectors): … … 430 442 return GpuKernel(self, q_vectors) 431 443 432 def get_function(self, name): 444 @property 445 def Iq(self): 446 return self._fetch_kernel('Iq') 447 448 def fetch_kernel(self, name): 433 449 # type: (str) -> cl.Kernel 434 450 """ … … 436 452 does not already exist. 437 453 """ 438 if self._program is None: 439 self._prepare_program() 440 return self._kernels[name] 441 442 def _prepare_program(self): 443 # type: (str) -> None 444 env = environment() 445 timestamp = generate.ocl_timestamp(self.info) 446 program = env.compile_program( 447 self.info.name, 448 self.source['opencl'], 449 self.dtype, 450 self.fast, 451 timestamp) 452 variants = ['Iq', 'Iqxy', 'Imagnetic'] 453 names = [generate.kernel_name(self.info, k) for k in variants] 454 functions = [getattr(program, k) for k in names] 455 self._kernels = {k: v for k, v in zip(variants, functions)} 456 # Keep a handle to program so GC doesn't collect. 457 self._program = program 458 459 460 # TODO: Check that we don't need a destructor for buffers which go out of scope. 454 gpu = environment() 455 key = self._cache_key 456 if key not in gpu.cache: 457 program = gpu.compile_program( 458 self.info.name, 459 self.source['opencl'], 460 self.dtype, 461 self.fast, 462 self.timestamp) 463 variants = ['Iq', 'Iqxy', 'Imagnetic'] 464 names = [generate.kernel_name(self.info, k) for k in variants] 465 kernels = [getattr(program, k) for k in names] 466 data = dict((k, v) for k, v in zip(variants, kernels)) 467 # keep a handle to program so GC doesn't collect 468 data['program'] = program 469 gpu.cache[key] = data 470 else: 471 data = gpu.cache[key] 472 return data[name] 473 474 # TODO: check that we don't need a destructor for buffers which go out of scope 461 475 class GpuInput(object): 462 476 """ … … 480 494 def __init__(self, q_vectors, dtype=generate.F32): 481 495 # type: (List[np.ndarray], np.dtype) -> None 482 # TODO: Do we ever need double precision q?496 # TODO: do we ever need double precision q? 483 497 self.nq = q_vectors[0].size 484 498 self.dtype = np.dtype(dtype) 485 499 self.is_2d = (len(q_vectors) == 2) 486 # TODO: Stretch input based on get_warp().487 # Not doing it now since warp depends on kernel, which is not known500 # TODO: stretch input based on get_warp() 501 # not doing it now since warp depends on kernel, which is not known 488 502 # at this point, so instead using 32, which is good on the set of 489 503 # architectures tested so far. … … 498 512 self.q[:self.nq] = q_vectors[0] 499 513 self.global_size = [self.q.shape[0]] 500 #print("creating inputs of size", self.global_size) 501 502 # Transfer input value to GPU. 514 self._cache_key = unique_id() 515 516 @property 517 def q_b(self): 518 """Lazy creation of q buffer so it can survive context reset""" 503 519 env = environment() 504 context = env.context[self.dtype] 505 self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 506 hostbuf=self.q) 520 key = self._cache_key 521 if key not in env.cache: 522 context = env.context[self.dtype] 523 #print("creating inputs of size", self.global_size) 524 buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 525 hostbuf=self.q) 526 env.cache[key] = buffer 527 return env.cache[key] 507 528 508 529 def release(self): 509 530 # type: () -> None 510 531 """ 511 Free the buffer associated with the q value. 512 """ 513 if self.q_b is not None: 514 self.q_b.release() 515 self.q_b = None 532 Free the buffer associated with the q value 533 """ 534 environment().free_buffer(id(self)) 516 535 517 536 def __del__(self): … … 519 538 self.release() 520 539 521 522 540 class GpuKernel(Kernel): 523 541 """ … … 526 544 *model* is the GpuModel object to call 527 545 528 The kernel is derived from :class:`Kernel`, providing the 529 :meth:`call_kernel` method to evaluate the kernel for a given set of 530 parameters. Because of the need to move the q values to the GPU before 531 evaluation, the kernel is instantiated for a particular set of q vectors, 532 and can be called many times without transfering q each time. 546 The following attributes are defined: 547 548 *info* is the module information 549 550 *dtype* is the kernel precision 551 552 *dim* is '1d' or '2d' 553 554 *result* is a vector to contain the results of the call 555 556 The resulting call method takes the *pars*, a list of values for 557 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 558 vectors for the polydisperse parameters. *cutoff* determines the 559 integration limits: any points with combined weight less than *cutoff* 560 will not be calculated. 533 561 534 562 Call :meth:`release` when done with the kernel instance. 535 563 """ 536 #: SAS model information structure.537 info = None # type: ModelInfo538 #: Kernel precision.539 dtype = None # type: np.dtype540 #: Kernel dimensions (1d or 2d).541 dim = "" # type: str542 #: Calculation results, updated after each call to :meth:`_call_kernel`.543 result = None # type: np.ndarray544 545 564 def __init__(self, model, q_vectors): 546 # type: ( GpuModel, List[np.ndarray]) -> None565 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 547 566 dtype = model.dtype 548 567 self.q_input = GpuInput(q_vectors, dtype) 549 568 self._model = model 550 551 # Attributes accessed from the outside. 569 # F16 isn't sufficient, so don't support it 570 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 571 self._cache_key = unique_id() 572 573 # attributes accessed from the outside 552 574 self.dim = '2d' if self.q_input.is_2d else '1d' 553 575 self.info = model.info 554 self.dtype = dtype 555 556 # Converter to translate input to target type. 557 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 558 559 # Holding place for the returned value. 576 self.dtype = model.dtype 577 578 # holding place for the returned value 560 579 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 561 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 562 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 563 564 # Allocate result value on GPU. 580 extra_q = 4 # total weight, form volume, shell volume and R_eff 581 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 582 583 @property 584 def _result_b(self): 585 """Lazy creation of result buffer so it can survive context reset""" 565 586 env = environment() 566 context = env.context[self.dtype] 567 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 568 self._result_b = cl.Buffer(context, mf.READ_WRITE, width) 569 570 def _call_kernel(self, call_details, values, cutoff, magnetic, 571 effective_radius_type): 572 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 587 key = self._cache_key 588 if key not in env.cache: 589 context = env.context[self.dtype] 590 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 591 buffer = cl.Buffer(context, mf.READ_WRITE, width) 592 env.cache[key] = buffer 593 return env.cache[key] 594 595 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 596 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 573 597 env = environment() 574 598 queue = env.queue[self._model.dtype] 575 599 context = queue.context 576 600 577 # Arrange data transfer to card. 601 # Arrange data transfer to/from card 602 q_b = self.q_input.q_b 603 result_b = self._result_b 578 604 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 579 605 hostbuf=call_details.buffer) … … 581 607 hostbuf=values) 582 608 583 # Setup kernel function and arguments.584 609 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 585 kernel = self._model. get_function(name)610 kernel = self._model.fetch_kernel(name) 586 611 kernel_args = [ 587 np.uint32(self.q_input.nq), # Number of inputs. 588 None, # Placeholder for pd_start. 589 None, # Placeholder for pd_stop. 590 details_b, # Problem definition. 591 values_b, # Parameter values. 592 self.q_input.q_b, # Q values. 593 self._result_b, # Result storage. 594 self._as_dtype(cutoff), # Probability cutoff. 595 np.uint32(effective_radius_type), # R_eff mode. 612 np.uint32(self.q_input.nq), None, None, 613 details_b, values_b, q_b, result_b, 614 self._as_dtype(cutoff), 615 np.uint32(effective_radius_type), 596 616 ] 597 598 # Call kernel and retrieve results.599 617 #print("Calling OpenCL") 600 618 #call_details.show(values) 619 #Call kernel and retrieve results 601 620 wait_for = None 602 621 last_nap = clock() … … 609 628 *kernel_args, wait_for=wait_for)] 610 629 if stop < call_details.num_eval: 611 # Allow other processes to run .630 # Allow other processes to run 612 631 wait_for[0].wait() 613 632 current_time = clock() … … 615 634 time.sleep(0.001) 616 635 last_nap = current_time 617 cl.enqueue_copy(queue, self.result, self._result_b, wait_for=wait_for)636 cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) 618 637 #print("result", self.result) 619 638 620 # Free buffers. 621 details_b.release() 622 values_b.release() 639 # Free buffers 640 for v in (details_b, values_b): 641 if v is not None: 642 v.release() 623 643 624 644 def release(self): … … 627 647 Release resources associated with the kernel. 628 648 """ 649 environment().free_buffer(id(self)) 629 650 self.q_input.release() 630 if self._result_b is not None:631 self._result_b.release()632 self._result_b = None633 651 634 652 def __del__(self): -
sasmodels/kernelcuda.py
rfa26e78 rf872fd1 63 63 import time 64 64 import re 65 import atexit66 65 67 66 import numpy as np # type: ignore 68 67 69 68 70 # Attempt to setup CUDA. This may fail if the pycuda package is not69 # Attempt to setup cuda. This may fail if the pycuda package is not 71 70 # installed or if it is installed but there are no devices available. 72 71 try: … … 108 107 MAX_LOOPS = 2048 109 108 110 111 109 def use_cuda(): 112 sas_opencl = os.environ.get("SAS_OPENCL", "CUDA").lower() 113 return HAVE_CUDA and sas_opencl.startswith("cuda") 114 110 env = os.environ.get("SAS_OPENCL", "").lower() 111 return HAVE_CUDA and (env == "" or env.startswith("cuda")) 115 112 116 113 ENV = None … … 124 121 ENV.release() 125 122 ENV = GpuEnvironment() if use_cuda() else None 126 127 123 128 124 def environment(): … … 142 138 return ENV 143 139 144 145 # PyTest is not freeing ENV, so make sure it gets freed.146 atexit.register(lambda: ENV.release() if ENV is not None else None)147 148 149 140 def has_type(dtype): 150 141 # type: (np.dtype) -> bool … … 152 143 Return true if device supports the requested precision. 153 144 """ 154 # Assume the NVIDIAcard supports 32-bit and 64-bit floats.155 # TODO: Check if pycuda support F16.145 # Assume the nvidia card supports 32-bit and 64-bit floats. 146 # TODO: check if pycuda support F16 156 147 return dtype in (generate.F32, generate.F64) 157 148 158 149 159 150 FUNCTION_PATTERN = re.compile(r"""^ 160 (?P<space>\s*) # Initial space.161 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # One or more qualifiers before function.162 (?P<function>\s*\b\w+\b\s*[(]) # Function name plus open parens.151 (?P<space>\s*) # initial space 152 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # one or more qualifiers before function 153 (?P<function>\s*\b\w+\b\s*[(]) # function name plus open parens 163 154 """, re.VERBOSE|re.MULTILINE) 164 155 … … 167 158 """, re.VERBOSE|re.MULTILINE) 168 159 169 170 160 def _add_device_tag(match): 171 161 # type: (None) -> str 172 # Note: Should be re.Match, but that isn't a simple type.162 # Note: should be re.Match, but that isn't a simple type 173 163 """ 174 164 replace qualifiers with __device__ qualifiers if needed … … 183 173 return "".join((space, "__device__ ", qualifiers, function)) 184 174 185 186 175 def mark_device_functions(source): 187 176 # type: (str) -> str … … 190 179 """ 191 180 return FUNCTION_PATTERN.sub(_add_device_tag, source) 192 193 181 194 182 def show_device_functions(source): … … 200 188 print(match.group('qualifiers').replace('\n',r'\n'), match.group('function'), '(') 201 189 return source 202 203 190 204 191 def compile_model(source, dtype, fast=False): … … 225 212 #options = ['--verbose', '-E'] 226 213 options = ['--use_fast_math'] if fast else None 227 program = SourceModule(source, no_extern_c=True, options=options) # , include_dirs=[...])214 program = SourceModule(source, no_extern_c=True, options=options) # include_dirs=[...] 228 215 229 216 #print("done with "+program) … … 231 218 232 219 233 # For now, this returns one device in the context.234 # TODO: Create a context that contains all devices on all platforms.220 # for now, this returns one device in the context 221 # TODO: create a context that contains all devices on all platforms 235 222 class GpuEnvironment(object): 236 223 """ 237 GPU context for CUDA.224 GPU context, with possibly many devices, and one queue per device. 238 225 """ 239 226 context = None # type: cuda.Context 240 227 def __init__(self, devnum=None): 241 228 # type: (int) -> None 229 # Byte boundary for data alignment 230 #self.data_boundary = max(d.min_data_type_align_size 231 # for d in self.context.devices) 232 self.compiled = {} 242 233 env = os.environ.get("SAS_OPENCL", "").lower() 243 234 if devnum is None and env.startswith("cuda:"): 244 235 devnum = int(env[5:]) 245 246 236 # Set the global context to the particular device number if one is 247 237 # given, otherwise use the default context. Perhaps this will be set … … 252 242 self.context = make_default_context() 253 243 254 ## Byte boundary for data alignment.255 #self.data_boundary = max(d.min_data_type_align_size256 # for d in self.context.devices)257 258 # Cache for compiled programs, and for items in context.259 self.compiled = {}260 261 244 def release(self): 262 245 if self.context is not None: … … 279 262 Compile the program for the device in the given context. 280 263 """ 281 # Note: PyCuda (probably) caches but I'll do so as well just to 282 # save some data munging time. 264 # Note: PyOpenCL caches based on md5 hash of source, options and device 265 # so we don't really need to cache things for ourselves. I'll do so 266 # anyway just to save some data munging time. 283 267 tag = generate.tag_source(source) 284 268 key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else "")) 285 # Check timestamp on program .269 # Check timestamp on program 286 270 program, program_timestamp = self.compiled.get(key, (None, np.inf)) 287 271 if program_timestamp < timestamp: … … 293 277 return program 294 278 295 296 279 class GpuModel(KernelModel): 297 280 """ … … 309 292 that the compiler is allowed to take shortcuts. 310 293 """ 311 info = None 312 source = "" 313 dtype = None 314 fast = False 315 _program = None# type: SourceModule316 _kernels = None # type: Dict[str,cuda.Function]294 info = None # type: ModelInfo 295 source = "" # type: str 296 dtype = None # type: np.dtype 297 fast = False # type: bool 298 program = None # type: SourceModule 299 _kernels = None # type: List[cuda.Function] 317 300 318 301 def __init__(self, source, model_info, dtype=generate.F32, fast=False): … … 322 305 self.dtype = dtype 323 306 self.fast = fast 307 self.program = None # delay program creation 308 self._kernels = None 324 309 325 310 def __getstate__(self): … … 330 315 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 331 316 self.info, self.source, self.dtype, self.fast = state 332 self. _program = self._kernels= None317 self.program = None 333 318 334 319 def make_kernel(self, q_vectors): 335 320 # type: (List[np.ndarray]) -> "GpuKernel" 336 return GpuKernel(self, q_vectors) 337 338 def get_function(self, name): 339 # type: (str) -> cuda.Function 340 """ 341 Fetch the kernel from the environment by name, compiling it if it 342 does not already exist. 343 """ 344 if self._program is None: 345 self._prepare_program() 346 return self._kernels[name] 347 348 def _prepare_program(self): 349 # type: (str) -> None 350 env = environment() 351 timestamp = generate.ocl_timestamp(self.info) 352 program = env.compile_program( 353 self.info.name, 354 self.source['opencl'], 355 self.dtype, 356 self.fast, 357 timestamp) 358 variants = ['Iq', 'Iqxy', 'Imagnetic'] 359 names = [generate.kernel_name(self.info, k) for k in variants] 360 functions = [program.get_function(k) for k in names] 361 self._kernels = {k: v for k, v in zip(variants, functions)} 362 # Keep a handle to program so GC doesn't collect. 363 self._program = program 364 365 366 # TODO: Check that we don't need a destructor for buffers which go out of scope. 321 if self.program is None: 322 compile_program = environment().compile_program 323 timestamp = generate.ocl_timestamp(self.info) 324 self.program = compile_program( 325 self.info.name, 326 self.source['opencl'], 327 self.dtype, 328 self.fast, 329 timestamp) 330 variants = ['Iq', 'Iqxy', 'Imagnetic'] 331 names = [generate.kernel_name(self.info, k) for k in variants] 332 kernels = [self.program.get_function(k) for k in names] 333 self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 334 is_2d = len(q_vectors) == 2 335 if is_2d: 336 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 337 else: 338 kernel = [self._kernels['Iq']]*2 339 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 340 341 def release(self): 342 # type: () -> None 343 """ 344 Free the resources associated with the model. 345 """ 346 if self.program is not None: 347 self.program = None 348 349 def __del__(self): 350 # type: () -> None 351 self.release() 352 353 # TODO: check that we don't need a destructor for buffers which go out of scope 367 354 class GpuInput(object): 368 355 """ … … 386 373 def __init__(self, q_vectors, dtype=generate.F32): 387 374 # type: (List[np.ndarray], np.dtype) -> None 388 # TODO: Do we ever need double precision q?375 # TODO: do we ever need double precision q? 389 376 self.nq = q_vectors[0].size 390 377 self.dtype = np.dtype(dtype) 391 378 self.is_2d = (len(q_vectors) == 2) 392 # TODO: Stretch input based on get_warp().393 # Not doing it now since warp depends on kernel, which is not known379 # TODO: stretch input based on get_warp() 380 # not doing it now since warp depends on kernel, which is not known 394 381 # at this point, so instead using 32, which is good on the set of 395 382 # architectures tested so far. 396 383 if self.is_2d: 397 width = ((self.nq+15)//16)*16 384 # Note: 16 rather than 15 because result is 1 longer than input. 385 width = ((self.nq+16)//16)*16 398 386 self.q = np.empty((width, 2), dtype=dtype) 399 387 self.q[:self.nq, 0] = q_vectors[0] 400 388 self.q[:self.nq, 1] = q_vectors[1] 401 389 else: 402 width = ((self.nq+31)//32)*32 390 # Note: 32 rather than 31 because result is 1 longer than input. 391 width = ((self.nq+32)//32)*32 403 392 self.q = np.empty(width, dtype=dtype) 404 393 self.q[:self.nq] = q_vectors[0] 405 394 self.global_size = [self.q.shape[0]] 406 395 #print("creating inputs of size", self.global_size) 407 408 # Transfer input value to GPU.409 396 self.q_b = cuda.to_device(self.q) 410 397 … … 412 399 # type: () -> None 413 400 """ 414 Free the buffer associated with the q value.401 Free the memory. 415 402 """ 416 403 if self.q_b is not None: … … 422 409 self.release() 423 410 424 425 411 class GpuKernel(Kernel): 426 412 """ 427 413 Callable SAS kernel. 428 414 429 *model* is the GpuModel object to call 430 431 The kernel is derived from :class:`Kernel`, providing the 432 :meth:`call_kernel` method to evaluate the kernel for a given set of 433 parameters. Because of the need to move the q values to the GPU before 434 evaluation, the kernel is instantiated for a particular set of q vectors, 435 and can be called many times without transfering q each time. 415 *kernel* is the GpuKernel object to call 416 417 *model_info* is the module information 418 419 *q_vectors* is the q vectors at which the kernel should be evaluated 420 421 *dtype* is the kernel precision 422 423 The resulting call method takes the *pars*, a list of values for 424 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 425 vectors for the polydisperse parameters. *cutoff* determines the 426 integration limits: any points with combined weight less than *cutoff* 427 will not be calculated. 436 428 437 429 Call :meth:`release` when done with the kernel instance. 438 430 """ 439 #: SAS model information structure. 440 info = None # type: ModelInfo 441 #: Kernel precision. 442 dtype = None # type: np.dtype 443 #: Kernel dimensions (1d or 2d). 444 dim = "" # type: str 445 #: Calculation results, updated after each call to :meth:`_call_kernel`. 446 result = None # type: np.ndarray 447 448 def __init__(self, model, q_vectors): 449 # type: (GpuModel, List[np.ndarray]) -> None 450 dtype = model.dtype 431 def __init__(self, kernel, dtype, model_info, q_vectors): 432 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 451 433 self.q_input = GpuInput(q_vectors, dtype) 452 self._model = model 453 454 # Attributes accessed from the outside. 434 self.kernel = kernel 435 # F16 isn't sufficient, so don't support it 436 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 437 438 # attributes accessed from the outside 455 439 self.dim = '2d' if self.q_input.is_2d else '1d' 456 self.info = model .info440 self.info = model_info 457 441 self.dtype = dtype 458 442 459 # Converter to translate input to target type. 460 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 461 462 # Holding place for the returned value. 443 # holding place for the returned value 463 444 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 464 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 465 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 466 467 # Allocate result value on GPU. 445 extra_q = 4 # total weight, form volume, shell volume and R_eff 446 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 447 448 # Inputs and outputs for each kernel call 449 # Note: res may be shorter than res_b if global_size != nq 468 450 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 469 self._result_b = cuda.mem_alloc(width) 470 471 def _call_kernel(self, call_details, values, cutoff, magnetic, 472 effective_radius_type): 473 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 474 475 # Arrange data transfer to card. 451 self.result_b = cuda.mem_alloc(width) 452 self._need_release = [self.result_b] 453 454 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 455 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 456 # Arrange data transfer to card 476 457 details_b = cuda.to_device(call_details.buffer) 477 458 values_b = cuda.to_device(values) 478 459 479 # Setup kernel function and arguments. 480 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 481 kernel = self._model.get_function(name) 482 kernel_args = [ 483 np.uint32(self.q_input.nq), # Number of inputs. 484 None, # Placeholder for pd_start. 485 None, # Placeholder for pd_stop. 486 details_b, # Problem definition. 487 values_b, # Parameter values. 488 self.q_input.q_b, # Q values. 489 self._result_b, # Result storage. 490 self._as_dtype(cutoff), # Probability cutoff. 491 np.uint32(effective_radius_type), # R_eff mode. 460 kernel = self.kernel[1 if magnetic else 0] 461 args = [ 462 np.uint32(self.q_input.nq), None, None, 463 details_b, values_b, self.q_input.q_b, self.result_b, 464 self._as_dtype(cutoff), 465 np.uint32(effective_radius_type), 492 466 ] 493 467 grid = partition(self.q_input.nq) 494 495 # Call kernel and retrieve results. 496 #print("Calling CUDA") 468 #print("Calling OpenCL") 497 469 #call_details.show(values) 470 # Call kernel and retrieve results 498 471 last_nap = time.clock() 499 472 step = 100000000//self.q_input.nq + 1 … … 502 475 stop = min(start + step, call_details.num_eval) 503 476 #print("queuing",start,stop) 504 kernel_args[1:3] = [np.int32(start), np.int32(stop)]505 kernel(* kernel_args, **grid)477 args[1:3] = [np.int32(start), np.int32(stop)] 478 kernel(*args, **grid) 506 479 if stop < call_details.num_eval: 507 480 sync() 508 # Allow other processes to run .481 # Allow other processes to run 509 482 current_time = time.clock() 510 483 if current_time - last_nap > 0.5: … … 512 485 last_nap = current_time 513 486 sync() 514 cuda.memcpy_dtoh(self.result, self. _result_b)487 cuda.memcpy_dtoh(self.result, self.result_b) 515 488 #print("result", self.result) 516 489 … … 523 496 Release resources associated with the kernel. 524 497 """ 525 self.q_input.release() 526 if self._result_b is not None: 527 self._result_b.free() 528 self._result_b = None 498 for p in self._need_release: 499 p.free() 500 self._need_release = [] 529 501 530 502 def __del__(self): … … 540 512 Note: Maybe context.synchronize() is sufficient. 541 513 """ 542 # Create an event with which to synchronize. 514 #return # The following works in C++; don't know what pycuda is doing 515 # Create an event with which to synchronize 543 516 done = cuda.Event() 544 517 … … 546 519 done.record() 547 520 548 # Make sure we don't hog resource while waiting to sync.521 #line added to not hog resources 549 522 while not done.query(): 550 523 time.sleep(0.01) … … 552 525 # Block until the GPU executes the kernel. 553 526 done.synchronize() 554 555 527 # Clean up the event; I don't think they can be reused. 556 528 del done -
sasmodels/kerneldll.py
r3199b17 re44432d 100 100 # pylint: enable=unused-import 101 101 102 # Compiler output is a byte stream that needs to be decode in python 3 .102 # Compiler output is a byte stream that needs to be decode in python 3 103 103 decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8')) 104 104 … … 115 115 COMPILER = "tinycc" 116 116 elif "VCINSTALLDIR" in os.environ: 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the 118 # environment and we can use the MSVC compiler. Otherwise, if 119 # tinycc is available then use it. Otherwise, hope that mingw 120 # is available. 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment 118 # and we can use the MSVC compiler. Otherwise, if tinycc is available 119 # the use it. Otherwise, hope that mingw is available. 121 120 COMPILER = "msvc" 122 121 else: … … 125 124 COMPILER = "unix" 126 125 127 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 .126 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 128 127 if COMPILER == "unix": 129 # Generic unix compile .130 # On Mac users will need the X code command line tools installed.128 # Generic unix compile 129 # On mac users will need the X code command line tools installed 131 130 #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp" 132 131 CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split() 133 # Add OpenMP support if not running on a Mac.132 # add openmp support if not running on a mac 134 133 if sys.platform != "darwin": 135 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) .134 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) 136 135 # Shut it off for all unix until we can investigate. 137 136 #CC.append("-fopenmp") … … 145 144 # vcomp90.dll on the path. One may be found here: 146 145 # C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll 147 # Copy this to the python directory and uncomment the OpenMP COMPILE .148 # TODO: Remove intermediate OBJ file created in the directory.149 # TODO: Maybe don't use randomized name for the c file.150 # TODO: Maybe ask distutils to find MSVC.146 # Copy this to the python directory and uncomment the OpenMP COMPILE 147 # TODO: remove intermediate OBJ file created in the directory 148 # TODO: maybe don't use randomized name for the c file 149 # TODO: maybe ask distutils to find MSVC 151 150 CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split() 152 151 if "SAS_OPENMP" in os.environ: … … 173 172 ALLOW_SINGLE_PRECISION_DLLS = True 174 173 175 176 174 def compile(source, output): 177 175 # type: (str, str) -> None … … 185 183 logging.info(command_str) 186 184 try: 187 # Need shell=True on windows to keep console box from popping up.185 # need shell=True on windows to keep console box from popping up 188 186 shell = (os.name == 'nt') 189 187 subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT) … … 194 192 raise RuntimeError("compile failed. File is in %r"%source) 195 193 196 197 194 def dll_name(model_info, dtype): 198 195 # type: (ModelInfo, np.dtype) -> str … … 205 202 basename += ARCH + ".so" 206 203 207 # Hack to find precompiled dlls .204 # Hack to find precompiled dlls 208 205 path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename) 209 206 if os.path.exists(path): … … 245 242 raise ValueError("16 bit floats not supported") 246 243 if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS: 247 dtype = F64 # Force 64-bit dll .248 # Note: dtype may be F128 for long double precision .244 dtype = F64 # Force 64-bit dll 245 # Note: dtype may be F128 for long double precision 249 246 250 247 dll = dll_path(model_info, dtype) … … 257 254 need_recompile = dll_time < newest_source 258 255 if need_recompile: 259 # Make sure the DLL path exists .256 # Make sure the DLL path exists 260 257 if not os.path.exists(SAS_DLL_PATH): 261 258 os.makedirs(SAS_DLL_PATH) … … 266 263 file_handle.write(source) 267 264 compile(source=filename, output=dll) 268 # Comment the following to keep the generated C file.269 # Note: If there is a syntax error then compile raises an error265 # comment the following to keep the generated c file 266 # Note: if there is a syntax error then compile raises an error 270 267 # and the source file will not be deleted. 271 268 os.unlink(filename) … … 306 303 self.dllpath = dllpath 307 304 self._dll = None # type: ct.CDLL 308 self._kernels = None 305 self._kernels = None # type: List[Callable, Callable] 309 306 self.dtype = np.dtype(dtype) 310 307 … … 341 338 # type: (List[np.ndarray]) -> DllKernel 342 339 q_input = PyInput(q_vectors, self.dtype) 343 # Note: DLL is lazy loaded.340 # Note: pickle not supported for DllKernel 344 341 if self._dll is None: 345 342 self._load_dll() … … 361 358 self._dll = None 362 359 363 364 360 class DllKernel(Kernel): 365 361 """ … … 383 379 def __init__(self, kernel, model_info, q_input): 384 380 # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None 385 dtype = q_input.dtype 381 #,model_info,q_input) 382 self.kernel = kernel 383 self.info = model_info 386 384 self.q_input = q_input 387 self.kernel = kernel 388 389 # Attributes accessed from the outside. 385 self.dtype = q_input.dtype 390 386 self.dim = '2d' if q_input.is_2d else '1d' 391 self.info = model_info 392 self.dtype = dtype 393 394 # Converter to translate input to target type. 395 self._as_dtype = (np.float32 if dtype == generate.F32 396 else np.float64 if dtype == generate.F64 397 else np.float128) 398 399 # Holding place for the returned value. 387 # leave room for f1/f2 results in case we need to compute beta for 1d models 400 388 nout = 2 if self.info.have_Fq else 1 401 extra_q = 4 # Total weight, form volume, shell volume and R_eff.402 self.result = np.empty( self.q_input.nq*nout + extra_q,dtype)403 404 def _call_kernel(self, call_details, values, cutoff, magnetic,405 e ffective_radius_type):406 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 407 408 # Setup kernel function and arguments.389 # +4 for total weight, shell volume, effective radius, form volume 390 self.result = np.empty(q_input.nq*nout + 4, self.dtype) 391 self.real = (np.float32 if self.q_input.dtype == generate.F32 392 else np.float64 if self.q_input.dtype == generate.F64 393 else np.float128) 394 395 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 396 # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray 409 397 kernel = self.kernel[1 if magnetic else 0] 410 kernel_args = [411 self.q_input.nq, # Number of inputs.412 None, # Placeholder for pd_start.413 None, # Placeholder for pd_stop.414 call_details.buffer.ctypes.data, # Problem definition.415 values.ctypes.data, # Parameter values.416 self.q_input.q.ctypes.data, # Q values.417 self.result.ctypes.data, # Result storage.418 self. _as_dtype(cutoff), # Probability cutoff.419 effective_radius_type, # R_eff mode.398 args = [ 399 self.q_input.nq, # nq 400 None, # pd_start 401 None, # pd_stop pd_stride[MAX_PD] 402 call_details.buffer.ctypes.data, # problem 403 values.ctypes.data, # pars 404 self.q_input.q.ctypes.data, # q 405 self.result.ctypes.data, # results 406 self.real(cutoff), # cutoff 407 effective_radius_type, # cutoff 420 408 ] 421 422 # Call kernel and retrieve results.423 409 #print("Calling DLL") 424 410 #call_details.show(values) 425 411 step = 100 426 # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops?427 412 for start in range(0, call_details.num_eval, step): 428 413 stop = min(start + step, call_details.num_eval) 429 kernel_args[1:3] = [start, stop]430 kernel(* kernel_args) # type: ignore414 args[1:3] = [start, stop] 415 kernel(*args) # type: ignore 431 416 432 417 def release(self): 433 418 # type: () -> None 434 419 """ 435 Release resources associated with the kernel.420 Release any resources associated with the kernel. 436 421 """ 437 # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release. 438 # Should we be doing the same for DLL? 439 #self.q_input.release() 440 pass 441 442 def __del__(self): 443 # type: () -> None 444 self.release() 422 self.q_input.release() -
sasmodels/kernelpy.py
r3199b17 re44432d 33 33 logger = logging.getLogger(__name__) 34 34 35 36 35 class PyModel(KernelModel): 37 36 """ … … 39 38 """ 40 39 def __init__(self, model_info): 41 # Make sure Iq is available and vectorized .40 # Make sure Iq is available and vectorized 42 41 _create_default_functions(model_info) 43 42 self.info = model_info … … 54 53 """ 55 54 pass 56 57 55 58 56 class PyInput(object): … … 93 91 self.q = None 94 92 95 96 93 class PyKernel(Kernel): 97 94 """ … … 134 131 parameter_vector = np.empty(len(partable.call_parameters)-2, 'd') 135 132 136 # Create views into the array to hold the arguments .133 # Create views into the array to hold the arguments 137 134 offset = 0 138 135 kernel_args, volume_args = [], [] … … 177 174 else (lambda mode: 1.0)) 178 175 176 177 179 178 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 180 179 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray … … 196 195 self.q_input.release() 197 196 self.q_input = None 198 199 197 200 198 def _loops(parameters, # type: np.ndarray … … 256 254 total = np.zeros(nq, 'd') 257 255 for loop_index in range(call_details.num_eval): 258 # Update polydispersity parameter values.256 # update polydispersity parameter values 259 257 if p0_index == p0_length: 260 258 pd_index = (loop_index//pd_stride)%pd_length … … 267 265 p0_index += 1 268 266 if weight > cutoff: 269 # Call the scattering function .267 # Call the scattering function 270 268 # Assume that NaNs are only generated if the parameters are bad; 271 269 # exclude all q for that NaN. Even better would be to have an … … 275 273 continue 276 274 277 # Update value and norm.275 # update value and norm 278 276 total += weight * Iq 279 277 weight_norm += weight … … 295 293 any functions that are not already marked as vectorized. 296 294 """ 297 # Note: Must call create_vector_Iq before create_vector_Iqxy.295 # Note: must call create_vector_Iq before create_vector_Iqxy 298 296 _create_vector_Iq(model_info) 299 297 _create_vector_Iqxy(model_info) -
sasmodels/model_test.py
r00afc15 r5024a56 167 167 # test using cuda if desired and available 168 168 if 'cuda' in loaders and use_cuda(): 169 test_name = "%s-cuda" % model_info.id169 test_name = "%s-cuda"%model_name 170 170 test_method_name = "test_%s_cuda" % model_info.id 171 171 # Using dtype=None so that the models that are only -
sasmodels/models/rpa.c
r19dc29e7 r71b751d 25 25 double S0ba,Pbb,S0bb,Pbc,S0bc,Pbd,S0bd; 26 26 double S0ca,S0cb,Pcc,S0cc,Pcd,S0cd; 27 //double S0da,S0db,S0dc;27 double S0da,S0db,S0dc; 28 28 double Pdd,S0dd; 29 29 double Kaa,Kbb,Kcc; 30 30 double Kba,Kca,Kcb; 31 //double Kda,Kdb,Kdc,Kdd;31 double Kda,Kdb,Kdc,Kdd; 32 32 double Zaa,Zab,Zac,Zba,Zbb,Zbc,Zca,Zcb,Zcc; 33 33 double DenT,T11,T12,T13,T21,T22,T23,T31,T32,T33; … … 36 36 double N11,N12,N13,N21,N22,N23,N31,N32,N33; 37 37 double M11,M12,M13,M21,M22,M23,M31,M32,M33; 38 double S11,S12,S22,S23,S13,S33; 39 //double S21,S31,S32,S44; 40 //double S14,S24,S34,S41,S42,S43; 38 double S11,S12,S13,S14,S21,S22,S23,S24; 39 double S31,S32,S33,S34,S41,S42,S43,S44; 41 40 double Lad,Lbd,Lcd,Nav,Intg; 42 41 … … 116 115 S0cd=(Phicd*vcd*Ncd)*Pcd; 117 116 118 //S0da=S0ad;119 //S0db=S0bd;120 //S0dc=S0cd;117 S0da=S0ad; 118 S0db=S0bd; 119 S0dc=S0cd; 121 120 Pdd=2.0*(exp(-Xd)-1.0+Xd)/(Xd*Xd); // free D chain 122 121 S0dd=N[3]*Phi[3]*v[3]*Pdd; … … 199 198 S0ca=S0ac; 200 199 S0cb=S0bc; 201 //S0da=S0ad;202 //S0db=S0bd;203 //S0dc=S0cd;200 S0da=S0ad; 201 S0db=S0bd; 202 S0dc=S0cd; 204 203 205 204 // self chi parameter is 0 ... of course … … 207 206 Kbb=0.0; 208 207 Kcc=0.0; 209 //Kdd=0.0;208 Kdd=0.0; 210 209 211 210 Kba=Kab; 212 211 Kca=Kac; 213 212 Kcb=Kbc; 214 //Kda=Kad;215 //Kdb=Kbd;216 //Kdc=Kcd;213 Kda=Kad; 214 Kdb=Kbd; 215 Kdc=Kcd; 217 216 218 217 Zaa=Kaa-Kad-Kad; … … 295 294 S12= Q12*S0aa + Q22*S0ab + Q32*S0ac; 296 295 S13= Q13*S0aa + Q23*S0ab + Q33*S0ac; 296 S14=-S11-S12-S13; 297 S21= Q11*S0ba + Q21*S0bb + Q31*S0bc; 297 298 S22= Q12*S0ba + Q22*S0bb + Q32*S0bc; 298 299 S23= Q13*S0ba + Q23*S0bb + Q33*S0bc; 300 S24=-S21-S22-S23; 301 S31= Q11*S0ca + Q21*S0cb + Q31*S0cc; 302 S32= Q12*S0ca + Q22*S0cb + Q32*S0cc; 299 303 S33= Q13*S0ca + Q23*S0cb + Q33*S0cc; 300 //S21= Q11*S0ba + Q21*S0bb + Q31*S0bc; 301 //S31= Q11*S0ca + Q21*S0cb + Q31*S0cc; 302 //S32= Q12*S0ca + Q22*S0cb + Q32*S0cc; 303 //S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23; 304 //S14=-S11-S12-S13; 305 //S24=-S21-S22-S23; 306 //S34=-S31-S32-S33; 307 //S41=S14; 308 //S42=S24; 309 //S43=S34; 304 S34=-S31-S32-S33; 305 S41=S14; 306 S42=S24; 307 S43=S34; 308 S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23; 310 309 311 310 //calculate contrast where L[i] is the scattering length of i and D is the matrix -
sasmodels/resolution.py
rda3638f re2592f0 498 498 if q_min < 0: 499 499 q_min = q[0]*MINIMUM_ABSOLUTE_Q 500 n_low = int(np.ceil(log_delta_q * (log(q[0])-log(q_min))))501 q_low = np.logspace(log10(q_min), log10(q[0]), n_low+1)[:-1]500 n_low = log_delta_q * (log(q[0])-log(q_min)) 501 q_low = np.logspace(log10(q_min), log10(q[0]), int(np.ceil(n_low))+1)[:-1] 502 502 else: 503 503 q_low = [] 504 504 if q_max > q[-1]: 505 n_high = int(np.ceil(log_delta_q * (log(q_max)-log(q[-1]))))506 q_high = np.logspace(log10(q[-1]), log10(q_max), n_high+1)[1:]505 n_high = log_delta_q * (log(q_max)-log(q[-1])) 506 q_high = np.logspace(log10(q[-1]), log10(q_max), int(np.ceil(n_high))+1)[1:] 507 507 else: 508 508 q_high = []
Note: See TracChangeset
for help on using the changeset viewer.