# Changeset 2a12d8d8 in sasmodels

Ignore:
Timestamp:
Oct 25, 2018 1:01:45 PM (16 months ago)
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
c11d09f, 599993b9
Parents:
95f62aa (diff), df87acf (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into ticket-1015-gpu-mem-error

Files:
3 edited

### Legend:

Unmodified
 rbefe905 ===========   ================================================================ M0:sld       $D_M M_0$ mtheta:sld   $\theta_M$ mphi:sld     $\phi_M$ up:angle     $\theta_\mathrm{up}$ up:frac_i    $u_i$ = (spin up)/(spin up + spin down) *before* the sample up:frac_f    $u_f$ = (spin up)/(spin up + spin down) *after* the sample sld_M0       $D_M M_0$ sld_mtheta   $\theta_M$ sld_mphi     $\phi_M$ up_frac_i    $u_i$ = (spin up)/(spin up + spin down) *before* the sample up_frac_f    $u_f$ = (spin up)/(spin up + spin down) *after* the sample up_angle     $\theta_\mathrm{up}$ ===========   ================================================================ .. note:: The values of the 'up:frac_i' and 'up:frac_f' must be in the range 0 to 1. The values of the 'up_frac_i' and 'up_frac_f' must be in the range 0 to 1. *Document History*
 r475ff58 where $x=q/q_0$, $q_0$ is the peak position, $I_{max}$ is the intensity at $q_0$ (parameterised as the $scale$ parameter), and $B$ is a flat background. The spinodal wavelength is given by $2\pi/q_0$. background. The spinodal wavelength, $\Lambda$, is given by $2\pi/q_0$. The definition of $I_{max}$ in the literature varies. Hashimoto *et al* (1991) define it as .. math:: I_{max} = \Lambda^3\Delta\rho^2 whereas Meier & Strobl (1987) give .. math:: I_{max} = V_z\Delta\rho^2 where $V_z$ is the volume per monomer unit. The exponent $\gamma$ is equal to $d+1$ for off-critical concentration H. Furukawa. Dynamics-scaling theory for phase-separating unmixing mixtures: Growth rates of droplets and scaling properties of autocorrelation functions. Physica A 123,497 (1984). Growth rates of droplets and scaling properties of autocorrelation functions. Physica A 123, 497 (1984). H. Meier & G. Strobl. Small-Angle X-ray Scattering Study of Spinodal Decomposition in Polystyrene/Poly(styrene-co-bromostyrene) Blends. Macromolecules 20, 649-654 (1987). T. Hashimoto, M. Takenaka & H. Jinnai. Scattering Studies of Self-Assembling Processes of Polymer Blends in Spinodal Decomposition. J. Appl. Cryst. 24, 457-466 (1991). Revision History * **Author:**  Dirk Honecker **Date:** Oct 7, 2016 * **Revised:** Steve King    **Date:** Sep 7, 2018 * **Revised:** Steve King    **Date:** Oct 25, 2018 """
 rd86f0fc from . import generate from .generate import F32, F64 from .kernel import KernelModel, Kernel Return true if device supports the requested precision. """ if dtype == generate.F32: if dtype == F32: return True elif dtype == generate.F64: """ GPU context, with possibly many devices, and one queue per device. Because the environment can be reset during a live program (e.g., if the user changes the active GPU device in the GUI), everything associated with the device context must be cached in the environment and recreated if the environment changes.  The *cache* attribute is a simple dictionary which holds keys and references to objects, such as compiled kernels and allocated buffers.  The running program should check in the cache for long lived objects and create them if they are not there.  The program should not hold onto cached objects, but instead only keep them active for the duration of a function call.  When the environment is destroyed then the *release* method for each active cache item is called before the environment is freed.  This means that each cl buffer should be in its own cache entry. """ def __init__(self): # type: () -> None # find gpu context #self.context = cl.create_some_context() self.context = None if 'SAS_OPENCL' in os.environ: #Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context os.environ["PYOPENCL_CTX"] = os.environ["SAS_OPENCL"] if 'PYOPENCL_CTX' in os.environ: self._create_some_context() if not self.context: self.context = _get_default_context() context_list = _create_some_context() # Find a context for F32 and for F64 (maybe the same one). # F16 isn't good enough. self.context = {} for dtype in (F32, F64): for context in context_list: if has_type(context.devices[0], dtype): self.context[dtype] = context break else: self.context[dtype] = None # Build a queue for each context self.queue = {} context = self.context[F32] self.queue[F32] = cl.CommandQueue(context, context.devices[0]) if self.context[F64] == self.context[F32]: self.queue[F64] = self.queue[F32] else: context = self.context[F64] self.queue[F64] = cl.CommandQueue(context, context.devices[0]) # Byte boundary for data alignment #self.data_boundary = max(d.min_data_type_align_size #                         for d in self.context.devices) self.queues = [cl.CommandQueue(context, context.devices[0]) for context in self.context] #self.data_boundary = max(context.devices[0].min_data_type_align_size #                         for context in self.context.values()) # Cache for compiled programs, and for items in context self.compiled = {} self.cache = {} def has_type(self, dtype): Return True if all devices support a given type. """ return any(has_type(d, dtype) for context in self.context for d in context.devices) def get_queue(self, dtype): # type: (np.dtype) -> cl.CommandQueue """ Return a command queue for the kernels of type dtype. """ for context, queue in zip(self.context, self.queues): if all(has_type(d, dtype) for d in context.devices): return queue def get_context(self, dtype): # type: (np.dtype) -> cl.Context """ Return a OpenCL context for the kernels of type dtype. """ for context in self.context: if all(has_type(d, dtype) for d in context.devices): return context def _create_some_context(self): # type: () -> cl.Context """ Protected call to cl.create_some_context without interactivity.  Use this if SAS_OPENCL is set in the environment.  Sets the *context* attribute. """ try: self.context = [cl.create_some_context(interactive=False)] except Exception as exc: warnings.warn(str(exc)) warnings.warn("pyopencl.create_some_context() failed") warnings.warn("the environment variable 'SAS_OPENCL' might not be set correctly") return self.context.get(dtype, None) is not None def compile_program(self, name, source, dtype, fast, timestamp): del self.compiled[key] if key not in self.compiled: context = self.get_context(dtype) context = self.context[dtype] logging.info("building %s for OpenCL %s", key, context.devices[0].name.strip()) program = compile_model(self.get_context(dtype), program = compile_model(self.context[dtype], str(source), dtype, fast) self.compiled[key] = (program, timestamp) return program def free_buffer(self, key): if key in self.cache: self.cache[key].release() del self.cache[key] def __del__(self): for v in self.cache.values(): release = getattr(v, 'release', lambda: None) release() self.cache = {} _CURRENT_ID = 0 def unique_id(): global _CURRENT_ID _CURRENT_ID += 1 return _CURRENT_ID def _create_some_context(): # type: () -> cl.Context """ Protected call to cl.create_some_context without interactivity. Uses SAS_OPENCL or PYOPENCL_CTX if they are set in the environment, otherwise scans for the most appropriate device using :func:_get_default_context """ if 'SAS_OPENCL' in os.environ: #Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context os.environ["PYOPENCL_CTX"] = os.environ["SAS_OPENCL"] if 'PYOPENCL_CTX' in os.environ: try: return [cl.create_some_context(interactive=False)] except Exception as exc: warnings.warn(str(exc)) warnings.warn("pyopencl.create_some_context() failed") warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly") return _get_default_context() def _get_default_context(): self.dtype = dtype self.fast = fast self.program = None # delay program creation self._kernels = None self.timestamp = generate.ocl_timestamp(self.info) self._cache_key = unique_id() def __getstate__(self): # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None self.info, self.source, self.dtype, self.fast = state self.program = None def make_kernel(self, q_vectors): # type: (List[np.ndarray]) -> "GpuKernel" if self.program is None: compile_program = environment().compile_program timestamp = generate.ocl_timestamp(self.info) self.program = compile_program( return GpuKernel(self, q_vectors) @property def Iq(self): return self._fetch_kernel('Iq') def fetch_kernel(self, name): # type: (str) -> cl.Kernel """ Fetch the kernel from the environment by name, compiling it if it does not already exist. """ gpu = environment() key = self._cache_key if key not in gpu.cache: program = gpu.compile_program( self.info.name, self.source['opencl'], self.dtype, self.fast, timestamp) self.timestamp) variants = ['Iq', 'Iqxy', 'Imagnetic'] names = [generate.kernel_name(self.info, k) for k in variants] kernels = [getattr(self.program, k) for k in names] self._kernels = dict((k, v) for k, v in zip(variants, kernels)) is_2d = len(q_vectors) == 2 if is_2d: kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] kernels = [getattr(program, k) for k in names] data = dict((k, v) for k, v in zip(variants, kernels)) # keep a handle to program so GC doesn't collect data['program'] = program gpu.cache[key] = data else: kernel = [self._kernels['Iq']]*2 return GpuKernel(kernel, self.dtype, self.info, q_vectors) def release(self): # type: () -> None """ Free the resources associated with the model. """ if self.program is not None: self.program = None def __del__(self): # type: () -> None self.release() data = gpu.cache[key] return data[name] # TODO: check that we don't need a destructor for buffers which go out of scope # type: (List[np.ndarray], np.dtype) -> None # TODO: do we ever need double precision q? env = environment() self.nq = q_vectors[0].size self.dtype = np.dtype(dtype) self.q[:self.nq] = q_vectors[0] self.global_size = [self.q.shape[0]] context = env.get_context(self.dtype) #print("creating inputs of size", self.global_size) self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.q) self._cache_key = unique_id() @property def q_b(self): """Lazy creation of q buffer so it can survive context reset""" env = environment() key = self._cache_key if key not in env.cache: context = env.context[self.dtype] #print("creating inputs of size", self.global_size) buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.q) env.cache[key] = buffer return env.cache[key] def release(self): # type: () -> None """ Free the memory. """ if self.q_b is not None: self.q_b.release() self.q_b = None Free the buffer associated with the q value """ environment().free_buffer(id(self)) def __del__(self): Callable SAS kernel. *kernel* is the GpuKernel object to call *model_info* is the module information *q_vectors* is the q vectors at which the kernel should be evaluated *model* is the GpuModel object to call The following attributes are defined: *info* is the module information *dtype* is the kernel precision *dim* is '1d' or '2d' *result* is a vector to contain the results of the call The resulting call method takes the *pars*, a list of values for Call :meth:release when done with the kernel instance. """ def __init__(self, kernel, dtype, model_info, q_vectors): def __init__(self, model, q_vectors): # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None q_input = GpuInput(q_vectors, dtype) self.kernel = kernel self.info = model_info self.dtype = dtype self.dim = '2d' if q_input.is_2d else '1d' # plus three for the normalization values self.result = np.empty(q_input.nq+1, dtype) # Inputs and outputs for each kernel call # Note: res may be shorter than res_b if global_size != nq dtype = model.dtype self.q_input = GpuInput(q_vectors, dtype) self._model = model self._as_dtype = (np.float32 if dtype == generate.F32 else np.float64 if dtype == generate.F64 else np.float16 if dtype == generate.F16 else np.float32)  # will never get here, so use np.float32 self._cache_key = unique_id() # attributes accessed from the outside self.dim = '2d' if self.q_input.is_2d else '1d' self.info = model.info self.dtype = model.dtype # holding place for the returned value # plus one for the normalization values self.result = np.empty(self.q_input.nq+1, dtype) @property def _result_b(self): """Lazy creation of result buffer so it can survive context reset""" env = environment() self.queue = env.get_queue(dtype) self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, q_input.global_size[0] * dtype.itemsize) self.q_input = q_input # allocated by GpuInput above self._need_release = [self.result_b, self.q_input] self.real = (np.float32 if dtype == generate.F32 else np.float64 if dtype == generate.F64 else np.float16 if dtype == generate.F16 else np.float32)  # will never get here, so use np.float32 key = self._cache_key if key not in env.cache: context = env.context[self.dtype] #print("creating inputs of size", self.global_size) buffer = cl.Buffer(context, mf.READ_WRITE, self.q_input.global_size[0] * self.dtype.itemsize) env.cache[key] = buffer return env.cache[key] def __call__(self, call_details, values, cutoff, magnetic): # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray context = self.queue.context # Arrange data transfer to card env = environment() queue = env.queue[self._model.dtype] context = queue.context # Arrange data transfer to/from card q_b = self.q_input.q_b result_b = self._result_b details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=call_details.buffer) hostbuf=values) kernel = self.kernel[1 if magnetic else 0] args = [ name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' kernel = self._model.fetch_kernel(name) kernel_args = [ np.uint32(self.q_input.nq), None, None, details_b, values_b, self.q_input.q_b, self.result_b, self.real(cutoff), details_b, values_b, q_b, result_b, self._as_dtype(cutoff), ] #print("Calling OpenCL") stop = min(start + step, call_details.num_eval) #print("queuing",start,stop) args[1:3] = [np.int32(start), np.int32(stop)] wait_for = [kernel(self.queue, self.q_input.global_size, None, *args, wait_for=wait_for)] kernel_args[1:3] = [np.int32(start), np.int32(stop)] wait_for = [kernel(queue, self.q_input.global_size, None, *kernel_args, wait_for=wait_for)] if stop < call_details.num_eval: # Allow other processes to run time.sleep(0.05) last_nap = current_time cl.enqueue_copy(self.queue, self.result, self.result_b) cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) #print("result", self.result) Release resources associated with the kernel. """ for v in self._need_release: v.release() self._need_release = [] environment().free_buffer(id(self)) self.q_input.release() def __del__(self):