Changes in / [36a2418:e5bbe64] in sasmodels
- Location:
- sasmodels
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel.py
r3199b17 rcd28947 23 23 # pylint: enable=unused-import 24 24 25 26 25 class KernelModel(object): 27 26 info = None # type: ModelInfo … … 34 33 # type: () -> None 35 34 pass 36 37 35 38 36 class Kernel(object): -
sasmodels/kernelcl.py
r3199b17 rf872fd1 61 61 62 62 63 # Attempt to setup OpenCL. This may fail if the pyopencl package is not63 # Attempt to setup opencl. This may fail if the pyopencl package is not 64 64 # installed or if it is installed but there are no devices available. 65 65 try: … … 67 67 from pyopencl import mem_flags as mf 68 68 from pyopencl.characterize import get_fast_inaccurate_build_options 69 # Ask OpenCL for the default context so that we know that one exists .69 # Ask OpenCL for the default context so that we know that one exists 70 70 cl.create_some_context(interactive=False) 71 71 HAVE_OPENCL = True … … 88 88 # pylint: enable=unused-import 89 89 90 91 # CRUFT: pyopencl < 2017.1 (as of June 2016 needs quotes around include path). 90 # CRUFT: pyopencl < 2017.1 (as of June 2016 needs quotes around include path) 92 91 def quote_path(v): 93 92 """ … … 100 99 return '"'+v+'"' if v and ' ' in v and not v[0] in "\"'-" else v 101 100 102 103 101 def fix_pyopencl_include(): 104 102 """ … … 107 105 import pyopencl as cl 108 106 if hasattr(cl, '_DEFAULT_INCLUDE_OPTIONS'): 109 cl._DEFAULT_INCLUDE_OPTIONS = [ 110 quote_path(v) for v in cl._DEFAULT_INCLUDE_OPTIONS 111 ] 112 107 cl._DEFAULT_INCLUDE_OPTIONS = [quote_path(v) for v in cl._DEFAULT_INCLUDE_OPTIONS] 113 108 114 109 if HAVE_OPENCL: … … 123 118 MAX_LOOPS = 2048 124 119 120 125 121 # Pragmas for enable OpenCL features. Be sure to protect them so that they 126 122 # still compile even if OpenCL is not present. … … 137 133 """ 138 134 139 140 135 def use_opencl(): 141 136 sas_opencl = os.environ.get("SAS_OPENCL", "OpenCL").lower() 142 137 return HAVE_OPENCL and sas_opencl != "none" and not sas_opencl.startswith("cuda") 143 138 144 145 139 ENV = None 146 140 def reset_environment(): … … 150 144 global ENV 151 145 ENV = GpuEnvironment() if use_opencl() else None 152 153 146 154 147 def environment(): … … 168 161 return ENV 169 162 170 171 163 def has_type(device, dtype): 172 164 # type: (cl.Device, np.dtype) -> bool … … 179 171 return "cl_khr_fp64" in device.extensions 180 172 else: 181 # Not supporting F16 type since it isn't accurate enough .173 # Not supporting F16 type since it isn't accurate enough 182 174 return False 183 184 175 185 176 def get_warp(kernel, queue): … … 191 182 cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE, 192 183 queue.device) 193 194 184 195 185 def compile_model(context, source, dtype, fast=False): … … 213 203 source_list.insert(0, _F64_PRAGMA) 214 204 215 # Note: USE_SINCOS makes the Intel CPU slower under OpenCL.205 # Note: USE_SINCOS makes the intel cpu slower under opencl 216 206 if context.devices[0].type == cl.device_type.GPU: 217 207 source_list.insert(0, "#define USE_SINCOS\n") … … 220 210 source = "\n".join(source_list) 221 211 program = cl.Program(context, source).build(options=options) 222 223 212 #print("done with "+program) 224 213 return program 225 214 226 215 227 # For now, this returns one device in the context.228 # TODO: Create a context that contains all devices on all platforms.216 # for now, this returns one device in the context 217 # TODO: create a context that contains all devices on all platforms 229 218 class GpuEnvironment(object): 230 219 """ 231 GPU context for OpenCL, with possibly many devices and one queue per device. 220 GPU context, with possibly many devices, and one queue per device. 221 222 Because the environment can be reset during a live program (e.g., if the 223 user changes the active GPU device in the GUI), everything associated 224 with the device context must be cached in the environment and recreated 225 if the environment changes. The *cache* attribute is a simple dictionary 226 which holds keys and references to objects, such as compiled kernels and 227 allocated buffers. The running program should check in the cache for 228 long lived objects and create them if they are not there. The program 229 should not hold onto cached objects, but instead only keep them active 230 for the duration of a function call. When the environment is destroyed 231 then the *release* method for each active cache item is called before 232 the environment is freed. This means that each cl buffer should be 233 in its own cache entry. 232 234 """ 233 235 def __init__(self): 234 236 # type: () -> None 235 # Find gpu context.237 # find gpu context 236 238 context_list = _create_some_context() 237 239 … … 247 249 self.context[dtype] = None 248 250 249 # Build a queue for each context .251 # Build a queue for each context 250 252 self.queue = {} 251 253 context = self.context[F32] … … 257 259 self.queue[F64] = cl.CommandQueue(context, context.devices[0]) 258 260 259 # # Byte boundary for data alignment.261 # Byte boundary for data alignment 260 262 #self.data_boundary = max(context.devices[0].min_data_type_align_size 261 263 # for context in self.context.values()) 262 264 263 # Cache for compiled programs, and for items in context .265 # Cache for compiled programs, and for items in context 264 266 self.compiled = {} 267 self.cache = {} 265 268 266 269 def has_type(self, dtype): … … 277 280 """ 278 281 # Note: PyOpenCL caches based on md5 hash of source, options and device 279 # but I'll do so as well just to save some data munging time. 282 # so we don't really need to cache things for ourselves. I'll do so 283 # anyway just to save some data munging time. 280 284 tag = generate.tag_source(source) 281 285 key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else "")) 282 # Check timestamp on program .286 # Check timestamp on program 283 287 program, program_timestamp = self.compiled.get(key, (None, np.inf)) 284 288 if program_timestamp < timestamp: … … 293 297 return program 294 298 299 def free_buffer(self, key): 300 if key in self.cache: 301 self.cache[key].release() 302 del self.cache[key] 303 304 def __del__(self): 305 for v in self.cache.values(): 306 release = getattr(v, 'release', lambda: None) 307 release() 308 self.cache = {} 309 310 _CURRENT_ID = 0 311 def unique_id(): 312 global _CURRENT_ID 313 _CURRENT_ID += 1 314 return _CURRENT_ID 295 315 296 316 def _create_some_context(): … … 305 325 which one (and not a CUDA device, or no GPU). 306 326 """ 307 # Assume we do not get here if SAS_OPENCL is None or CUDA .327 # Assume we do not get here if SAS_OPENCL is None or CUDA 308 328 sas_opencl = os.environ.get('SAS_OPENCL', 'opencl') 309 329 if sas_opencl.lower() != 'opencl': 310 # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context .330 # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 311 331 os.environ["PYOPENCL_CTX"] = sas_opencl 312 332 … … 316 336 except Exception as exc: 317 337 warnings.warn(str(exc)) 318 warnings.warn("pyopencl.create_some_context() failed. The " 319 "environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might " 320 "not be set correctly") 338 warnings.warn("pyopencl.create_some_context() failed") 339 warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly") 321 340 322 341 return _get_default_context() 323 324 342 325 343 def _get_default_context(): … … 334 352 # is running may increase throughput. 335 353 # 336 # Mac Book Pro, base install:354 # Macbook pro, base install: 337 355 # {'Apple': [Intel CPU, NVIDIA GPU]} 338 # Mac Book Pro, base install:356 # Macbook pro, base install: 339 357 # {'Apple': [Intel CPU, Intel GPU]} 340 # 2 x NVIDIA 295 with Intel and NVIDIA opencl drivers install:358 # 2 x nvidia 295 with Intel and NVIDIA opencl drivers installed 341 359 # {'Intel': [CPU], 'NVIDIA': [GPU, GPU, GPU, GPU]} 342 360 gpu, cpu = None, None … … 361 379 else: 362 380 # System has cl.device_type.ACCELERATOR or cl.device_type.CUSTOM 363 # Intel Phi for example registers as an accelerator .381 # Intel Phi for example registers as an accelerator 364 382 # Since the user installed a custom device on their system 365 383 # and went through the pain of sorting out OpenCL drivers for … … 368 386 gpu = device 369 387 370 # Order the devices by gpu then by cpu; when searching for an available388 # order the devices by gpu then by cpu; when searching for an available 371 389 # device by data type they will be checked in this order, which means 372 390 # that if the gpu supports double then the cpu will never be used (though … … 395 413 that the compiler is allowed to take shortcuts. 396 414 """ 397 info = None # type: ModelInfo398 source = "" # type: str399 dtype = None # type: np.dtype400 fast = False # type: bool401 _program = None # type: cl.Program402 _kernels = None # type: Dict[str, cl.Kernel]403 404 415 def __init__(self, source, model_info, dtype=generate.F32, fast=False): 405 416 # type: (Dict[str,str], ModelInfo, np.dtype, bool) -> None … … 408 419 self.dtype = dtype 409 420 self.fast = fast 421 self.timestamp = generate.ocl_timestamp(self.info) 422 self._cache_key = unique_id() 410 423 411 424 def __getstate__(self): … … 416 429 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 417 430 self.info, self.source, self.dtype, self.fast = state 418 self._program = self._kernels = None419 431 420 432 def make_kernel(self, q_vectors): … … 422 434 return GpuKernel(self, q_vectors) 423 435 424 def get_function(self, name): 436 @property 437 def Iq(self): 438 return self._fetch_kernel('Iq') 439 440 def fetch_kernel(self, name): 425 441 # type: (str) -> cl.Kernel 426 442 """ … … 428 444 does not already exist. 429 445 """ 430 if self._program is None: 431 self._prepare_program() 432 return self._kernels[name] 433 434 def _prepare_program(self): 435 # type: (str) -> None 436 env = environment() 437 timestamp = generate.ocl_timestamp(self.info) 438 program = env.compile_program( 439 self.info.name, 440 self.source['opencl'], 441 self.dtype, 442 self.fast, 443 timestamp) 444 variants = ['Iq', 'Iqxy', 'Imagnetic'] 445 names = [generate.kernel_name(self.info, k) for k in variants] 446 functions = [getattr(program, k) for k in names] 447 self._kernels = {k: v for k, v in zip(variants, functions)} 448 # Keep a handle to program so GC doesn't collect. 449 self._program = program 450 451 452 # TODO: Check that we don't need a destructor for buffers which go out of scope. 446 gpu = environment() 447 key = self._cache_key 448 if key not in gpu.cache: 449 program = gpu.compile_program( 450 self.info.name, 451 self.source['opencl'], 452 self.dtype, 453 self.fast, 454 self.timestamp) 455 variants = ['Iq', 'Iqxy', 'Imagnetic'] 456 names = [generate.kernel_name(self.info, k) for k in variants] 457 kernels = [getattr(program, k) for k in names] 458 data = dict((k, v) for k, v in zip(variants, kernels)) 459 # keep a handle to program so GC doesn't collect 460 data['program'] = program 461 gpu.cache[key] = data 462 else: 463 data = gpu.cache[key] 464 return data[name] 465 466 # TODO: check that we don't need a destructor for buffers which go out of scope 453 467 class GpuInput(object): 454 468 """ … … 472 486 def __init__(self, q_vectors, dtype=generate.F32): 473 487 # type: (List[np.ndarray], np.dtype) -> None 474 # TODO: Do we ever need double precision q?488 # TODO: do we ever need double precision q? 475 489 self.nq = q_vectors[0].size 476 490 self.dtype = np.dtype(dtype) 477 491 self.is_2d = (len(q_vectors) == 2) 478 # TODO: Stretch input based on get_warp().479 # Not doing it now since warp depends on kernel, which is not known492 # TODO: stretch input based on get_warp() 493 # not doing it now since warp depends on kernel, which is not known 480 494 # at this point, so instead using 32, which is good on the set of 481 495 # architectures tested so far. … … 490 504 self.q[:self.nq] = q_vectors[0] 491 505 self.global_size = [self.q.shape[0]] 492 #print("creating inputs of size", self.global_size) 493 494 # Transfer input value to GPU. 506 self._cache_key = unique_id() 507 508 @property 509 def q_b(self): 510 """Lazy creation of q buffer so it can survive context reset""" 495 511 env = environment() 496 context = env.context[self.dtype] 497 self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 498 hostbuf=self.q) 512 key = self._cache_key 513 if key not in env.cache: 514 context = env.context[self.dtype] 515 #print("creating inputs of size", self.global_size) 516 buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 517 hostbuf=self.q) 518 env.cache[key] = buffer 519 return env.cache[key] 499 520 500 521 def release(self): 501 522 # type: () -> None 502 523 """ 503 Free the buffer associated with the q value. 504 """ 505 if self.q_b is not None: 506 self.q_b.release() 507 self.q_b = None 524 Free the buffer associated with the q value 525 """ 526 environment().free_buffer(id(self)) 508 527 509 528 def __del__(self): … … 511 530 self.release() 512 531 513 514 532 class GpuKernel(Kernel): 515 533 """ … … 518 536 *model* is the GpuModel object to call 519 537 520 The kernel is derived from :class:`Kernel`, providing the 521 :meth:`call_kernel` method to evaluate the kernel for a given set of 522 parameters. Because of the need to move the q values to the GPU before 523 evaluation, the kernel is instantiated for a particular set of q vectors, 524 and can be called many times without transfering q each time. 538 The following attributes are defined: 539 540 *info* is the module information 541 542 *dtype* is the kernel precision 543 544 *dim* is '1d' or '2d' 545 546 *result* is a vector to contain the results of the call 547 548 The resulting call method takes the *pars*, a list of values for 549 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 550 vectors for the polydisperse parameters. *cutoff* determines the 551 integration limits: any points with combined weight less than *cutoff* 552 will not be calculated. 525 553 526 554 Call :meth:`release` when done with the kernel instance. 527 555 """ 528 #: SAS model information structure.529 info = None # type: ModelInfo530 #: Kernel precision.531 dtype = None # type: np.dtype532 #: Kernel dimensions (1d or 2d).533 dim = "" # type: str534 #: Calculation results, updated after each call to :meth:`_call_kernel`.535 result = None # type: np.ndarray536 537 556 def __init__(self, model, q_vectors): 538 # type: ( GpuModel, List[np.ndarray]) -> None557 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 539 558 dtype = model.dtype 540 559 self.q_input = GpuInput(q_vectors, dtype) 541 560 self._model = model 542 543 # Attributes accessed from the outside. 561 # F16 isn't sufficient, so don't support it 562 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 563 self._cache_key = unique_id() 564 565 # attributes accessed from the outside 544 566 self.dim = '2d' if self.q_input.is_2d else '1d' 545 567 self.info = model.info 546 self.dtype = dtype 547 548 # Converter to translate input to target type. 549 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 550 551 # Holding place for the returned value. 568 self.dtype = model.dtype 569 570 # holding place for the returned value 552 571 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 553 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 554 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 555 556 # Allocate result value on GPU. 572 extra_q = 4 # total weight, form volume, shell volume and R_eff 573 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 574 575 @property 576 def _result_b(self): 577 """Lazy creation of result buffer so it can survive context reset""" 557 578 env = environment() 558 context = env.context[self.dtype] 559 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 560 self._result_b = cl.Buffer(context, mf.READ_WRITE, width) 561 562 def _call_kernel(self, call_details, values, cutoff, magnetic, 563 effective_radius_type): 564 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 579 key = self._cache_key 580 if key not in env.cache: 581 context = env.context[self.dtype] 582 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 583 buffer = cl.Buffer(context, mf.READ_WRITE, width) 584 env.cache[key] = buffer 585 return env.cache[key] 586 587 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 588 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 565 589 env = environment() 566 590 queue = env.queue[self._model.dtype] 567 591 context = queue.context 568 592 569 # Arrange data transfer to card. 593 # Arrange data transfer to/from card 594 q_b = self.q_input.q_b 595 result_b = self._result_b 570 596 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 571 597 hostbuf=call_details.buffer) … … 573 599 hostbuf=values) 574 600 575 # Setup kernel function and arguments.576 601 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 577 kernel = self._model. get_function(name)602 kernel = self._model.fetch_kernel(name) 578 603 kernel_args = [ 579 np.uint32(self.q_input.nq), # Number of inputs. 580 None, # Placeholder for pd_start. 581 None, # Placeholder for pd_stop. 582 details_b, # Problem definition. 583 values_b, # Parameter values. 584 self.q_input.q_b, # Q values. 585 self._result_b, # Result storage. 586 self._as_dtype(cutoff), # Probability cutoff. 587 np.uint32(effective_radius_type), # R_eff mode. 604 np.uint32(self.q_input.nq), None, None, 605 details_b, values_b, q_b, result_b, 606 self._as_dtype(cutoff), 607 np.uint32(effective_radius_type), 588 608 ] 589 590 # Call kernel and retrieve results.591 609 #print("Calling OpenCL") 592 610 #call_details.show(values) 611 #Call kernel and retrieve results 593 612 wait_for = None 594 613 last_nap = time.clock() … … 601 620 *kernel_args, wait_for=wait_for)] 602 621 if stop < call_details.num_eval: 603 # Allow other processes to run .622 # Allow other processes to run 604 623 wait_for[0].wait() 605 624 current_time = time.clock() … … 607 626 time.sleep(0.001) 608 627 last_nap = current_time 609 cl.enqueue_copy(queue, self.result, self._result_b, wait_for=wait_for)628 cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) 610 629 #print("result", self.result) 611 630 612 # Free buffers. 613 details_b.release() 614 values_b.release() 631 # Free buffers 632 for v in (details_b, values_b): 633 if v is not None: 634 v.release() 615 635 616 636 def release(self): … … 619 639 Release resources associated with the kernel. 620 640 """ 641 environment().free_buffer(id(self)) 621 642 self.q_input.release() 622 if self._result_b is not None:623 self._result_b.release()624 self._result_b = None625 643 626 644 def __del__(self): -
sasmodels/kernelcuda.py
rfa26e78 rf872fd1 63 63 import time 64 64 import re 65 import atexit66 65 67 66 import numpy as np # type: ignore 68 67 69 68 70 # Attempt to setup CUDA. This may fail if the pycuda package is not69 # Attempt to setup cuda. This may fail if the pycuda package is not 71 70 # installed or if it is installed but there are no devices available. 72 71 try: … … 108 107 MAX_LOOPS = 2048 109 108 110 111 109 def use_cuda(): 112 sas_opencl = os.environ.get("SAS_OPENCL", "CUDA").lower() 113 return HAVE_CUDA and sas_opencl.startswith("cuda") 114 110 env = os.environ.get("SAS_OPENCL", "").lower() 111 return HAVE_CUDA and (env == "" or env.startswith("cuda")) 115 112 116 113 ENV = None … … 124 121 ENV.release() 125 122 ENV = GpuEnvironment() if use_cuda() else None 126 127 123 128 124 def environment(): … … 142 138 return ENV 143 139 144 145 # PyTest is not freeing ENV, so make sure it gets freed.146 atexit.register(lambda: ENV.release() if ENV is not None else None)147 148 149 140 def has_type(dtype): 150 141 # type: (np.dtype) -> bool … … 152 143 Return true if device supports the requested precision. 153 144 """ 154 # Assume the NVIDIAcard supports 32-bit and 64-bit floats.155 # TODO: Check if pycuda support F16.145 # Assume the nvidia card supports 32-bit and 64-bit floats. 146 # TODO: check if pycuda support F16 156 147 return dtype in (generate.F32, generate.F64) 157 148 158 149 159 150 FUNCTION_PATTERN = re.compile(r"""^ 160 (?P<space>\s*) # Initial space.161 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # One or more qualifiers before function.162 (?P<function>\s*\b\w+\b\s*[(]) # Function name plus open parens.151 (?P<space>\s*) # initial space 152 (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # one or more qualifiers before function 153 (?P<function>\s*\b\w+\b\s*[(]) # function name plus open parens 163 154 """, re.VERBOSE|re.MULTILINE) 164 155 … … 167 158 """, re.VERBOSE|re.MULTILINE) 168 159 169 170 160 def _add_device_tag(match): 171 161 # type: (None) -> str 172 # Note: Should be re.Match, but that isn't a simple type.162 # Note: should be re.Match, but that isn't a simple type 173 163 """ 174 164 replace qualifiers with __device__ qualifiers if needed … … 183 173 return "".join((space, "__device__ ", qualifiers, function)) 184 174 185 186 175 def mark_device_functions(source): 187 176 # type: (str) -> str … … 190 179 """ 191 180 return FUNCTION_PATTERN.sub(_add_device_tag, source) 192 193 181 194 182 def show_device_functions(source): … … 200 188 print(match.group('qualifiers').replace('\n',r'\n'), match.group('function'), '(') 201 189 return source 202 203 190 204 191 def compile_model(source, dtype, fast=False): … … 225 212 #options = ['--verbose', '-E'] 226 213 options = ['--use_fast_math'] if fast else None 227 program = SourceModule(source, no_extern_c=True, options=options) # , include_dirs=[...])214 program = SourceModule(source, no_extern_c=True, options=options) # include_dirs=[...] 228 215 229 216 #print("done with "+program) … … 231 218 232 219 233 # For now, this returns one device in the context.234 # TODO: Create a context that contains all devices on all platforms.220 # for now, this returns one device in the context 221 # TODO: create a context that contains all devices on all platforms 235 222 class GpuEnvironment(object): 236 223 """ 237 GPU context for CUDA.224 GPU context, with possibly many devices, and one queue per device. 238 225 """ 239 226 context = None # type: cuda.Context 240 227 def __init__(self, devnum=None): 241 228 # type: (int) -> None 229 # Byte boundary for data alignment 230 #self.data_boundary = max(d.min_data_type_align_size 231 # for d in self.context.devices) 232 self.compiled = {} 242 233 env = os.environ.get("SAS_OPENCL", "").lower() 243 234 if devnum is None and env.startswith("cuda:"): 244 235 devnum = int(env[5:]) 245 246 236 # Set the global context to the particular device number if one is 247 237 # given, otherwise use the default context. Perhaps this will be set … … 252 242 self.context = make_default_context() 253 243 254 ## Byte boundary for data alignment.255 #self.data_boundary = max(d.min_data_type_align_size256 # for d in self.context.devices)257 258 # Cache for compiled programs, and for items in context.259 self.compiled = {}260 261 244 def release(self): 262 245 if self.context is not None: … … 279 262 Compile the program for the device in the given context. 280 263 """ 281 # Note: PyCuda (probably) caches but I'll do so as well just to 282 # save some data munging time. 264 # Note: PyOpenCL caches based on md5 hash of source, options and device 265 # so we don't really need to cache things for ourselves. I'll do so 266 # anyway just to save some data munging time. 283 267 tag = generate.tag_source(source) 284 268 key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else "")) 285 # Check timestamp on program .269 # Check timestamp on program 286 270 program, program_timestamp = self.compiled.get(key, (None, np.inf)) 287 271 if program_timestamp < timestamp: … … 293 277 return program 294 278 295 296 279 class GpuModel(KernelModel): 297 280 """ … … 309 292 that the compiler is allowed to take shortcuts. 310 293 """ 311 info = None 312 source = "" 313 dtype = None 314 fast = False 315 _program = None# type: SourceModule316 _kernels = None # type: Dict[str,cuda.Function]294 info = None # type: ModelInfo 295 source = "" # type: str 296 dtype = None # type: np.dtype 297 fast = False # type: bool 298 program = None # type: SourceModule 299 _kernels = None # type: List[cuda.Function] 317 300 318 301 def __init__(self, source, model_info, dtype=generate.F32, fast=False): … … 322 305 self.dtype = dtype 323 306 self.fast = fast 307 self.program = None # delay program creation 308 self._kernels = None 324 309 325 310 def __getstate__(self): … … 330 315 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 331 316 self.info, self.source, self.dtype, self.fast = state 332 self. _program = self._kernels= None317 self.program = None 333 318 334 319 def make_kernel(self, q_vectors): 335 320 # type: (List[np.ndarray]) -> "GpuKernel" 336 return GpuKernel(self, q_vectors) 337 338 def get_function(self, name): 339 # type: (str) -> cuda.Function 340 """ 341 Fetch the kernel from the environment by name, compiling it if it 342 does not already exist. 343 """ 344 if self._program is None: 345 self._prepare_program() 346 return self._kernels[name] 347 348 def _prepare_program(self): 349 # type: (str) -> None 350 env = environment() 351 timestamp = generate.ocl_timestamp(self.info) 352 program = env.compile_program( 353 self.info.name, 354 self.source['opencl'], 355 self.dtype, 356 self.fast, 357 timestamp) 358 variants = ['Iq', 'Iqxy', 'Imagnetic'] 359 names = [generate.kernel_name(self.info, k) for k in variants] 360 functions = [program.get_function(k) for k in names] 361 self._kernels = {k: v for k, v in zip(variants, functions)} 362 # Keep a handle to program so GC doesn't collect. 363 self._program = program 364 365 366 # TODO: Check that we don't need a destructor for buffers which go out of scope. 321 if self.program is None: 322 compile_program = environment().compile_program 323 timestamp = generate.ocl_timestamp(self.info) 324 self.program = compile_program( 325 self.info.name, 326 self.source['opencl'], 327 self.dtype, 328 self.fast, 329 timestamp) 330 variants = ['Iq', 'Iqxy', 'Imagnetic'] 331 names = [generate.kernel_name(self.info, k) for k in variants] 332 kernels = [self.program.get_function(k) for k in names] 333 self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 334 is_2d = len(q_vectors) == 2 335 if is_2d: 336 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 337 else: 338 kernel = [self._kernels['Iq']]*2 339 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 340 341 def release(self): 342 # type: () -> None 343 """ 344 Free the resources associated with the model. 345 """ 346 if self.program is not None: 347 self.program = None 348 349 def __del__(self): 350 # type: () -> None 351 self.release() 352 353 # TODO: check that we don't need a destructor for buffers which go out of scope 367 354 class GpuInput(object): 368 355 """ … … 386 373 def __init__(self, q_vectors, dtype=generate.F32): 387 374 # type: (List[np.ndarray], np.dtype) -> None 388 # TODO: Do we ever need double precision q?375 # TODO: do we ever need double precision q? 389 376 self.nq = q_vectors[0].size 390 377 self.dtype = np.dtype(dtype) 391 378 self.is_2d = (len(q_vectors) == 2) 392 # TODO: Stretch input based on get_warp().393 # Not doing it now since warp depends on kernel, which is not known379 # TODO: stretch input based on get_warp() 380 # not doing it now since warp depends on kernel, which is not known 394 381 # at this point, so instead using 32, which is good on the set of 395 382 # architectures tested so far. 396 383 if self.is_2d: 397 width = ((self.nq+15)//16)*16 384 # Note: 16 rather than 15 because result is 1 longer than input. 385 width = ((self.nq+16)//16)*16 398 386 self.q = np.empty((width, 2), dtype=dtype) 399 387 self.q[:self.nq, 0] = q_vectors[0] 400 388 self.q[:self.nq, 1] = q_vectors[1] 401 389 else: 402 width = ((self.nq+31)//32)*32 390 # Note: 32 rather than 31 because result is 1 longer than input. 391 width = ((self.nq+32)//32)*32 403 392 self.q = np.empty(width, dtype=dtype) 404 393 self.q[:self.nq] = q_vectors[0] 405 394 self.global_size = [self.q.shape[0]] 406 395 #print("creating inputs of size", self.global_size) 407 408 # Transfer input value to GPU.409 396 self.q_b = cuda.to_device(self.q) 410 397 … … 412 399 # type: () -> None 413 400 """ 414 Free the buffer associated with the q value.401 Free the memory. 415 402 """ 416 403 if self.q_b is not None: … … 422 409 self.release() 423 410 424 425 411 class GpuKernel(Kernel): 426 412 """ 427 413 Callable SAS kernel. 428 414 429 *model* is the GpuModel object to call 430 431 The kernel is derived from :class:`Kernel`, providing the 432 :meth:`call_kernel` method to evaluate the kernel for a given set of 433 parameters. Because of the need to move the q values to the GPU before 434 evaluation, the kernel is instantiated for a particular set of q vectors, 435 and can be called many times without transfering q each time. 415 *kernel* is the GpuKernel object to call 416 417 *model_info* is the module information 418 419 *q_vectors* is the q vectors at which the kernel should be evaluated 420 421 *dtype* is the kernel precision 422 423 The resulting call method takes the *pars*, a list of values for 424 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 425 vectors for the polydisperse parameters. *cutoff* determines the 426 integration limits: any points with combined weight less than *cutoff* 427 will not be calculated. 436 428 437 429 Call :meth:`release` when done with the kernel instance. 438 430 """ 439 #: SAS model information structure. 440 info = None # type: ModelInfo 441 #: Kernel precision. 442 dtype = None # type: np.dtype 443 #: Kernel dimensions (1d or 2d). 444 dim = "" # type: str 445 #: Calculation results, updated after each call to :meth:`_call_kernel`. 446 result = None # type: np.ndarray 447 448 def __init__(self, model, q_vectors): 449 # type: (GpuModel, List[np.ndarray]) -> None 450 dtype = model.dtype 431 def __init__(self, kernel, dtype, model_info, q_vectors): 432 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 451 433 self.q_input = GpuInput(q_vectors, dtype) 452 self._model = model 453 454 # Attributes accessed from the outside. 434 self.kernel = kernel 435 # F16 isn't sufficient, so don't support it 436 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 437 438 # attributes accessed from the outside 455 439 self.dim = '2d' if self.q_input.is_2d else '1d' 456 self.info = model .info440 self.info = model_info 457 441 self.dtype = dtype 458 442 459 # Converter to translate input to target type. 460 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 461 462 # Holding place for the returned value. 443 # holding place for the returned value 463 444 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 464 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 465 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 466 467 # Allocate result value on GPU. 445 extra_q = 4 # total weight, form volume, shell volume and R_eff 446 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 447 448 # Inputs and outputs for each kernel call 449 # Note: res may be shorter than res_b if global_size != nq 468 450 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 469 self._result_b = cuda.mem_alloc(width) 470 471 def _call_kernel(self, call_details, values, cutoff, magnetic, 472 effective_radius_type): 473 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 474 475 # Arrange data transfer to card. 451 self.result_b = cuda.mem_alloc(width) 452 self._need_release = [self.result_b] 453 454 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 455 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 456 # Arrange data transfer to card 476 457 details_b = cuda.to_device(call_details.buffer) 477 458 values_b = cuda.to_device(values) 478 459 479 # Setup kernel function and arguments. 480 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 481 kernel = self._model.get_function(name) 482 kernel_args = [ 483 np.uint32(self.q_input.nq), # Number of inputs. 484 None, # Placeholder for pd_start. 485 None, # Placeholder for pd_stop. 486 details_b, # Problem definition. 487 values_b, # Parameter values. 488 self.q_input.q_b, # Q values. 489 self._result_b, # Result storage. 490 self._as_dtype(cutoff), # Probability cutoff. 491 np.uint32(effective_radius_type), # R_eff mode. 460 kernel = self.kernel[1 if magnetic else 0] 461 args = [ 462 np.uint32(self.q_input.nq), None, None, 463 details_b, values_b, self.q_input.q_b, self.result_b, 464 self._as_dtype(cutoff), 465 np.uint32(effective_radius_type), 492 466 ] 493 467 grid = partition(self.q_input.nq) 494 495 # Call kernel and retrieve results. 496 #print("Calling CUDA") 468 #print("Calling OpenCL") 497 469 #call_details.show(values) 470 # Call kernel and retrieve results 498 471 last_nap = time.clock() 499 472 step = 100000000//self.q_input.nq + 1 … … 502 475 stop = min(start + step, call_details.num_eval) 503 476 #print("queuing",start,stop) 504 kernel_args[1:3] = [np.int32(start), np.int32(stop)]505 kernel(* kernel_args, **grid)477 args[1:3] = [np.int32(start), np.int32(stop)] 478 kernel(*args, **grid) 506 479 if stop < call_details.num_eval: 507 480 sync() 508 # Allow other processes to run .481 # Allow other processes to run 509 482 current_time = time.clock() 510 483 if current_time - last_nap > 0.5: … … 512 485 last_nap = current_time 513 486 sync() 514 cuda.memcpy_dtoh(self.result, self. _result_b)487 cuda.memcpy_dtoh(self.result, self.result_b) 515 488 #print("result", self.result) 516 489 … … 523 496 Release resources associated with the kernel. 524 497 """ 525 self.q_input.release() 526 if self._result_b is not None: 527 self._result_b.free() 528 self._result_b = None 498 for p in self._need_release: 499 p.free() 500 self._need_release = [] 529 501 530 502 def __del__(self): … … 540 512 Note: Maybe context.synchronize() is sufficient. 541 513 """ 542 # Create an event with which to synchronize. 514 #return # The following works in C++; don't know what pycuda is doing 515 # Create an event with which to synchronize 543 516 done = cuda.Event() 544 517 … … 546 519 done.record() 547 520 548 # Make sure we don't hog resource while waiting to sync.521 #line added to not hog resources 549 522 while not done.query(): 550 523 time.sleep(0.01) … … 552 525 # Block until the GPU executes the kernel. 553 526 done.synchronize() 554 555 527 # Clean up the event; I don't think they can be reused. 556 528 del done -
sasmodels/kerneldll.py
r3199b17 re44432d 100 100 # pylint: enable=unused-import 101 101 102 # Compiler output is a byte stream that needs to be decode in python 3 .102 # Compiler output is a byte stream that needs to be decode in python 3 103 103 decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8')) 104 104 … … 115 115 COMPILER = "tinycc" 116 116 elif "VCINSTALLDIR" in os.environ: 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the 118 # environment and we can use the MSVC compiler. Otherwise, if 119 # tinycc is available then use it. Otherwise, hope that mingw 120 # is available. 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment 118 # and we can use the MSVC compiler. Otherwise, if tinycc is available 119 # the use it. Otherwise, hope that mingw is available. 121 120 COMPILER = "msvc" 122 121 else: … … 125 124 COMPILER = "unix" 126 125 127 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 .126 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 128 127 if COMPILER == "unix": 129 # Generic unix compile .130 # On Mac users will need the X code command line tools installed.128 # Generic unix compile 129 # On mac users will need the X code command line tools installed 131 130 #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp" 132 131 CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split() 133 # Add OpenMP support if not running on a Mac.132 # add openmp support if not running on a mac 134 133 if sys.platform != "darwin": 135 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) .134 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) 136 135 # Shut it off for all unix until we can investigate. 137 136 #CC.append("-fopenmp") … … 145 144 # vcomp90.dll on the path. One may be found here: 146 145 # C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll 147 # Copy this to the python directory and uncomment the OpenMP COMPILE .148 # TODO: Remove intermediate OBJ file created in the directory.149 # TODO: Maybe don't use randomized name for the c file.150 # TODO: Maybe ask distutils to find MSVC.146 # Copy this to the python directory and uncomment the OpenMP COMPILE 147 # TODO: remove intermediate OBJ file created in the directory 148 # TODO: maybe don't use randomized name for the c file 149 # TODO: maybe ask distutils to find MSVC 151 150 CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split() 152 151 if "SAS_OPENMP" in os.environ: … … 173 172 ALLOW_SINGLE_PRECISION_DLLS = True 174 173 175 176 174 def compile(source, output): 177 175 # type: (str, str) -> None … … 185 183 logging.info(command_str) 186 184 try: 187 # Need shell=True on windows to keep console box from popping up.185 # need shell=True on windows to keep console box from popping up 188 186 shell = (os.name == 'nt') 189 187 subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT) … … 194 192 raise RuntimeError("compile failed. File is in %r"%source) 195 193 196 197 194 def dll_name(model_info, dtype): 198 195 # type: (ModelInfo, np.dtype) -> str … … 205 202 basename += ARCH + ".so" 206 203 207 # Hack to find precompiled dlls .204 # Hack to find precompiled dlls 208 205 path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename) 209 206 if os.path.exists(path): … … 245 242 raise ValueError("16 bit floats not supported") 246 243 if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS: 247 dtype = F64 # Force 64-bit dll .248 # Note: dtype may be F128 for long double precision .244 dtype = F64 # Force 64-bit dll 245 # Note: dtype may be F128 for long double precision 249 246 250 247 dll = dll_path(model_info, dtype) … … 257 254 need_recompile = dll_time < newest_source 258 255 if need_recompile: 259 # Make sure the DLL path exists .256 # Make sure the DLL path exists 260 257 if not os.path.exists(SAS_DLL_PATH): 261 258 os.makedirs(SAS_DLL_PATH) … … 266 263 file_handle.write(source) 267 264 compile(source=filename, output=dll) 268 # Comment the following to keep the generated C file.269 # Note: If there is a syntax error then compile raises an error265 # comment the following to keep the generated c file 266 # Note: if there is a syntax error then compile raises an error 270 267 # and the source file will not be deleted. 271 268 os.unlink(filename) … … 306 303 self.dllpath = dllpath 307 304 self._dll = None # type: ct.CDLL 308 self._kernels = None 305 self._kernels = None # type: List[Callable, Callable] 309 306 self.dtype = np.dtype(dtype) 310 307 … … 341 338 # type: (List[np.ndarray]) -> DllKernel 342 339 q_input = PyInput(q_vectors, self.dtype) 343 # Note: DLL is lazy loaded.340 # Note: pickle not supported for DllKernel 344 341 if self._dll is None: 345 342 self._load_dll() … … 361 358 self._dll = None 362 359 363 364 360 class DllKernel(Kernel): 365 361 """ … … 383 379 def __init__(self, kernel, model_info, q_input): 384 380 # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None 385 dtype = q_input.dtype 381 #,model_info,q_input) 382 self.kernel = kernel 383 self.info = model_info 386 384 self.q_input = q_input 387 self.kernel = kernel 388 389 # Attributes accessed from the outside. 385 self.dtype = q_input.dtype 390 386 self.dim = '2d' if q_input.is_2d else '1d' 391 self.info = model_info 392 self.dtype = dtype 393 394 # Converter to translate input to target type. 395 self._as_dtype = (np.float32 if dtype == generate.F32 396 else np.float64 if dtype == generate.F64 397 else np.float128) 398 399 # Holding place for the returned value. 387 # leave room for f1/f2 results in case we need to compute beta for 1d models 400 388 nout = 2 if self.info.have_Fq else 1 401 extra_q = 4 # Total weight, form volume, shell volume and R_eff.402 self.result = np.empty( self.q_input.nq*nout + extra_q,dtype)403 404 def _call_kernel(self, call_details, values, cutoff, magnetic,405 e ffective_radius_type):406 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 407 408 # Setup kernel function and arguments.389 # +4 for total weight, shell volume, effective radius, form volume 390 self.result = np.empty(q_input.nq*nout + 4, self.dtype) 391 self.real = (np.float32 if self.q_input.dtype == generate.F32 392 else np.float64 if self.q_input.dtype == generate.F64 393 else np.float128) 394 395 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 396 # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray 409 397 kernel = self.kernel[1 if magnetic else 0] 410 kernel_args = [411 self.q_input.nq, # Number of inputs.412 None, # Placeholder for pd_start.413 None, # Placeholder for pd_stop.414 call_details.buffer.ctypes.data, # Problem definition.415 values.ctypes.data, # Parameter values.416 self.q_input.q.ctypes.data, # Q values.417 self.result.ctypes.data, # Result storage.418 self. _as_dtype(cutoff), # Probability cutoff.419 effective_radius_type, # R_eff mode.398 args = [ 399 self.q_input.nq, # nq 400 None, # pd_start 401 None, # pd_stop pd_stride[MAX_PD] 402 call_details.buffer.ctypes.data, # problem 403 values.ctypes.data, # pars 404 self.q_input.q.ctypes.data, # q 405 self.result.ctypes.data, # results 406 self.real(cutoff), # cutoff 407 effective_radius_type, # cutoff 420 408 ] 421 422 # Call kernel and retrieve results.423 409 #print("Calling DLL") 424 410 #call_details.show(values) 425 411 step = 100 426 # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops?427 412 for start in range(0, call_details.num_eval, step): 428 413 stop = min(start + step, call_details.num_eval) 429 kernel_args[1:3] = [start, stop]430 kernel(* kernel_args) # type: ignore414 args[1:3] = [start, stop] 415 kernel(*args) # type: ignore 431 416 432 417 def release(self): 433 418 # type: () -> None 434 419 """ 435 Release resources associated with the kernel.420 Release any resources associated with the kernel. 436 421 """ 437 # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release. 438 # Should we be doing the same for DLL? 439 #self.q_input.release() 440 pass 441 442 def __del__(self): 443 # type: () -> None 444 self.release() 422 self.q_input.release() -
sasmodels/kernelpy.py
r3199b17 re44432d 33 33 logger = logging.getLogger(__name__) 34 34 35 36 35 class PyModel(KernelModel): 37 36 """ … … 39 38 """ 40 39 def __init__(self, model_info): 41 # Make sure Iq is available and vectorized .40 # Make sure Iq is available and vectorized 42 41 _create_default_functions(model_info) 43 42 self.info = model_info … … 54 53 """ 55 54 pass 56 57 55 58 56 class PyInput(object): … … 93 91 self.q = None 94 92 95 96 93 class PyKernel(Kernel): 97 94 """ … … 134 131 parameter_vector = np.empty(len(partable.call_parameters)-2, 'd') 135 132 136 # Create views into the array to hold the arguments .133 # Create views into the array to hold the arguments 137 134 offset = 0 138 135 kernel_args, volume_args = [], [] … … 177 174 else (lambda mode: 1.0)) 178 175 176 177 179 178 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 180 179 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray … … 196 195 self.q_input.release() 197 196 self.q_input = None 198 199 197 200 198 def _loops(parameters, # type: np.ndarray … … 256 254 total = np.zeros(nq, 'd') 257 255 for loop_index in range(call_details.num_eval): 258 # Update polydispersity parameter values.256 # update polydispersity parameter values 259 257 if p0_index == p0_length: 260 258 pd_index = (loop_index//pd_stride)%pd_length … … 267 265 p0_index += 1 268 266 if weight > cutoff: 269 # Call the scattering function .267 # Call the scattering function 270 268 # Assume that NaNs are only generated if the parameters are bad; 271 269 # exclude all q for that NaN. Even better would be to have an … … 275 273 continue 276 274 277 # Update value and norm.275 # update value and norm 278 276 total += weight * Iq 279 277 weight_norm += weight … … 295 293 any functions that are not already marked as vectorized. 296 294 """ 297 # Note: Must call create_vector_Iq before create_vector_Iqxy.295 # Note: must call create_vector_Iq before create_vector_Iqxy 298 296 _create_vector_Iq(model_info) 299 297 _create_vector_Iqxy(model_info) -
sasmodels/model_test.py
r00afc15 r5024a56 167 167 # test using cuda if desired and available 168 168 if 'cuda' in loaders and use_cuda(): 169 test_name = "%s-cuda" % model_info.id169 test_name = "%s-cuda"%model_name 170 170 test_method_name = "test_%s_cuda" % model_info.id 171 171 # Using dtype=None so that the models that are only -
sasmodels/models/rpa.c
r19dc29e7 r71b751d 25 25 double S0ba,Pbb,S0bb,Pbc,S0bc,Pbd,S0bd; 26 26 double S0ca,S0cb,Pcc,S0cc,Pcd,S0cd; 27 //double S0da,S0db,S0dc;27 double S0da,S0db,S0dc; 28 28 double Pdd,S0dd; 29 29 double Kaa,Kbb,Kcc; 30 30 double Kba,Kca,Kcb; 31 //double Kda,Kdb,Kdc,Kdd;31 double Kda,Kdb,Kdc,Kdd; 32 32 double Zaa,Zab,Zac,Zba,Zbb,Zbc,Zca,Zcb,Zcc; 33 33 double DenT,T11,T12,T13,T21,T22,T23,T31,T32,T33; … … 36 36 double N11,N12,N13,N21,N22,N23,N31,N32,N33; 37 37 double M11,M12,M13,M21,M22,M23,M31,M32,M33; 38 double S11,S12,S22,S23,S13,S33; 39 //double S21,S31,S32,S44; 40 //double S14,S24,S34,S41,S42,S43; 38 double S11,S12,S13,S14,S21,S22,S23,S24; 39 double S31,S32,S33,S34,S41,S42,S43,S44; 41 40 double Lad,Lbd,Lcd,Nav,Intg; 42 41 … … 116 115 S0cd=(Phicd*vcd*Ncd)*Pcd; 117 116 118 //S0da=S0ad;119 //S0db=S0bd;120 //S0dc=S0cd;117 S0da=S0ad; 118 S0db=S0bd; 119 S0dc=S0cd; 121 120 Pdd=2.0*(exp(-Xd)-1.0+Xd)/(Xd*Xd); // free D chain 122 121 S0dd=N[3]*Phi[3]*v[3]*Pdd; … … 199 198 S0ca=S0ac; 200 199 S0cb=S0bc; 201 //S0da=S0ad;202 //S0db=S0bd;203 //S0dc=S0cd;200 S0da=S0ad; 201 S0db=S0bd; 202 S0dc=S0cd; 204 203 205 204 // self chi parameter is 0 ... of course … … 207 206 Kbb=0.0; 208 207 Kcc=0.0; 209 //Kdd=0.0;208 Kdd=0.0; 210 209 211 210 Kba=Kab; 212 211 Kca=Kac; 213 212 Kcb=Kbc; 214 //Kda=Kad;215 //Kdb=Kbd;216 //Kdc=Kcd;213 Kda=Kad; 214 Kdb=Kbd; 215 Kdc=Kcd; 217 216 218 217 Zaa=Kaa-Kad-Kad; … … 295 294 S12= Q12*S0aa + Q22*S0ab + Q32*S0ac; 296 295 S13= Q13*S0aa + Q23*S0ab + Q33*S0ac; 296 S14=-S11-S12-S13; 297 S21= Q11*S0ba + Q21*S0bb + Q31*S0bc; 297 298 S22= Q12*S0ba + Q22*S0bb + Q32*S0bc; 298 299 S23= Q13*S0ba + Q23*S0bb + Q33*S0bc; 300 S24=-S21-S22-S23; 301 S31= Q11*S0ca + Q21*S0cb + Q31*S0cc; 302 S32= Q12*S0ca + Q22*S0cb + Q32*S0cc; 299 303 S33= Q13*S0ca + Q23*S0cb + Q33*S0cc; 300 //S21= Q11*S0ba + Q21*S0bb + Q31*S0bc; 301 //S31= Q11*S0ca + Q21*S0cb + Q31*S0cc; 302 //S32= Q12*S0ca + Q22*S0cb + Q32*S0cc; 303 //S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23; 304 //S14=-S11-S12-S13; 305 //S24=-S21-S22-S23; 306 //S34=-S31-S32-S33; 307 //S41=S14; 308 //S42=S24; 309 //S43=S34; 304 S34=-S31-S32-S33; 305 S41=S14; 306 S42=S24; 307 S43=S34; 308 S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23; 310 309 311 310 //calculate contrast where L[i] is the scattering length of i and D is the matrix
Note: See TracChangeset
for help on using the changeset viewer.