Diff [225bf94c769a67d6fdab4091c85fde04b9f7db52:3b6567f156f21f0c5866b675dcbf2146e412d42a] for / – SasView

.travis.yml

-                      rf3767c2
+                      r5c36bf1
   - os: linux
     env:
     - PY=3.7
+    - PY=3.6
   - os: osx
     language: generic
 …
     language: generic
     env:
     - PY=3.7
+    - PY=3.5
 branches:
   only:

doc/guide/plugin.rst

-                      r94bfa42
+                      r81751c2
 structure factor to account for interactions between particles.  See
 `Form_Factors`_ for more details.
-**model_info = ...** lets you define a model directly, for example, by
-loading and modifying existing models.  This is done implicitly by
-:func:`sasmodels.core.load_model_info`, which can create a mixture model
-from a pair of existing models.  For example::
-    from sasmodels.core import load_model_info
-    model_info = load_model_info('sphere+cylinder')
-See :class:`sasmodels.modelinfo.ModelInfo` for details about the model
-attributes that are defined.
 Model Parameters
 …
              - \frac{\sin(x)}{x}\left(\frac{1}{x} - \frac{3!}{x^3} + \frac{5!}{x^5} - \frac{7!}{x^7}\right)
         For small arguments,
+        For small arguments ,
         .. math::

example/multiscatfit.py

-                      r2c4a190
+                      r49d1f8b8
     # Show the model without fitting
     PYTHONPATH=..:../../bumps:../../sasview/src python multiscatfit.py
+    PYTHONPATH=..:../explore:../../bumps:../../sasview/src python multiscatfit.py
     # Run the fit
     PYTHONPATH=..:../../bumps:../../sasview/src ../../bumps/run.py \
+    PYTHONPATH=..:../explore:../../bumps:../../sasview/src ../../bumps/run.py \
     multiscatfit.py --store=/tmp/t1
 …
+    )
-# Tie the model to the data
-M = Experiment(data=data, model=model)
-# Stack mulitple scattering on top of the existing resolution function.
-M.resolution = MultipleScattering(resolution=M.resolution, probability=0.)
 # SET THE FITTING PARAMETERS
 model.radius_polar.range(15, 3000)
 …
 model.scale.range(0, 0.1)
+# The multiple scattering probability parameter is in the resolution function
+# instead of the scattering function, so access it through M.resolution
+M.scattering_probability.range(0.0, 0.9)
+# Mulitple scattering probability parameter
+# HACK: the probability is stuffed in as an extra parameter to the experiment.
+probability = Parameter(name="probability", value=0.0)
+probability.range(0.0, 0.9)
+# Let bumps know that we are fitting this experiment
+M = Experiment(data=data, model=model, extra_pars={'probability': probability})
+# Stack mulitple scattering on top of the existing resolution function.
+# Because resolution functions in sasview don't have fitting parameters,
+# we instead allow the multiple scattering calculator to take a function
+# instead of a probability.  This function returns the current value of
+# the parameter. ** THIS IS TEMPORARY ** when multiple scattering is
+# properly integrated into sasmodels and sasview, its fittable parameter
+# will be treated like the model parameters.
+M.resolution = MultipleScattering(resolution=M.resolution,
+                                  probability=lambda: probability.value,
+                                  )
+M._kernel_inputs = M.resolution.q_calc
 problem = FitProblem(M)

sasmodels/init.py

r37f38ff	ra1ec908
14	14	defining new models.
15	15	"""
16		__version__ = "0.99"
	16	__version__ = "0.98"
17	17
18	18	def data_files():

sasmodels/bumps_model.py

-                      r2c4a190
+                      r49d1f8b8
     # when bumps is not on the path.
     from bumps.names import Parameter # type: ignore
-    from bumps.parameter import Reference # type: ignore
 except ImportError:
     pass
 …
     def __init__(self, data, model, cutoff=1e-5, name=None, extra_pars=None):
         # type: (Data, Model, float) -> None
-        # Allow resolution function to define fittable parameters.  We do this
-        # by creating reference parameters within the resolution object rather
-        # than modifying the object itself to use bumps parameters.  We need
-        # to reset the parameters each time the object has changed.  These
-        # additional parameters need to be returned from the fitting engine.
-        # To make them available to the user, they are added as top-level
-        # attributes to the experiment object.  The only change to the
-        # resolution function is that it needs an optional 'fittable' attribute
-        # which maps the internal name to the user visible name for the
-        # for the parameter.
-        self._resolution = None
-        self._resolution_pars = {}
         # remember inputs so we can inspect from outside
         self.name = data.filename if name is None else name
 …
         self._interpret_data(data, model.sasmodel)
         self._cache = {}
-        # CRUFT: no longer need extra parameters
-        # Multiple scattering probability is now retrieved directly from the
-        # multiple scattering resolution function.
         self.extra_pars = extra_pars
 …
         return len(self.Iq)
-    @property
-    def resolution(self):
-        return self._resolution
-    @resolution.setter
-    def resolution(self, value):
-        self._resolution = value
-        # Remove old resolution fitting parameters from experiment
-        for name in self._resolution_pars:
-            delattr(self, name)
-        # Create new resolution fitting parameters
-        res_pars = getattr(self._resolution, 'fittable', {})
-        self._resolution_pars = {
-            name: Reference(self._resolution, refname, name=name)
-            for refname, name in res_pars.items()
+        }
-        # Add new resolution fitting parameters as experiment attributes
-        for name, ref in self._resolution_pars.items():
-            setattr(self, name, ref)
     def parameters(self):
         # type: () -> Dict[str, Parameter]
 …
         """
         pars = self.model.parameters()
         if self.extra_pars is not None:
+        if self.extra_pars:
             pars.update(self.extra_pars)
-        pars.update(self._resolution_pars)
         return pars

sasmodels/convert.py

-                      r21c93c3
+                      r610ef23
                     translation[newid+str(k)] = oldid+str(k)
     # Remove control parameter from the result
+    control_pars = [p.id for p in model_info.parameters.kernel_parameters
+                    if p.is_control]
+    if control_pars:
+        control_id = control_pars[0]
+        translation[control_id] = "CONTROL"
+    if model_info.control:
+        translation[model_info.control] = "CONTROL"
     return translation

sasmodels/core.py

-                      rd92182f
+                      rb0de252
         * all: all models
         * py: python models only
+        * c: c models only
+        * single: c models which support single precision
+        * double: c models which require double precision
+        * opencl: c models which run in opencl
+        * dll: c models which do not run in opencl
+        * 1d: models without orientation
+        * 2d: models with orientation
+        * magnetic: models supporting magnetic sld
+        * nommagnetic: models without magnetic parameter
+        * c: compiled models only
+        * single: models which support single precision
+        * double: models which require double precision
+        * opencl: controls if OpenCL is supperessed
+        * 1d: models which are 1D only, or 2D using abs(q)
+        * 2d: models which can be 2D
+        * magnetic: models with an sld
+        * nommagnetic: models without an sld
     For multiple conditions, combine with plus.  For example, *c+single+2d*
 …
     info = load_model_info(name)
     pars = info.parameters.kernel_parameters
+    # TODO: may be adding Fq to the list at some point
+    is_pure_py = callable(info.Iq)
+    if kind == "py":
+        return is_pure_py
+    elif kind == "c":
+        return not is_pure_py
+    elif kind == "double":
+        return not info.single and not is_pure_py
+    elif kind == "single":
+        return info.single and not is_pure_py
+    elif kind == "opencl":
+        return info.opencl
+    elif kind == "dll":
+        return not info.opencl and not is_pure_py
+    elif kind == "2d":
+        return any(p.type == 'orientation' for p in pars)
+    elif kind == "1d":
+        return all(p.type != 'orientation' for p in pars)
+    elif kind == "magnetic":
+        return any(p.type == 'sld' for p in pars)
+    elif kind == "nonmagnetic":
+        return not any(p.type == 'sld' for p in pars)
+    if kind == "py" and callable(info.Iq):
+        return True
+    elif kind == "c" and not callable(info.Iq):
+        return True
+    elif kind == "double" and not info.single:
+        return True
+    elif kind == "single" and info.single:
+        return True
+    elif kind == "opencl" and info.opencl:
+        return True
+    elif kind == "2d" and any(p.type == 'orientation' for p in pars):
+        return True
+    elif kind == "1d" and all(p.type != 'orientation' for p in pars):
+        return True
+    elif kind == "magnetic" and any(p.type == 'sld' for p in pars):
+        return True
+    elif kind == "nonmagnetic" and any(p.type != 'sld' for p in pars):
+        return True
     return False
 …
     return numpy_dtype, fast, platform
+def list_models_main():
+    # type: () -> None
+    """
+    Run list_models as a main program.  See :func:`list_models` for the
+    kinds of models that can be requested on the command line.
+    """
+    import sys
+    kind = sys.argv[1] if len(sys.argv) > 1 else "all"
+    print("\n".join(list_models(kind)))
 def test_composite_order():
     def test_models(fst, snd):
 …
     assert target == actual, "%s != %s"%(target, actual)
-def list_models_main():
-    # type: () -> None
-    """
-    Run list_models as a main program.  See :func:`list_models` for the
-    kinds of models that can be requested on the command line.
-    """
-    import sys
-    kind = sys.argv[1] if len(sys.argv) > 1 else "all"
-    try:
-        models = list_models(kind)
-    except Exception as exc:
-        print(list_models.__doc__)
-        return 1
-    print("\n".join(list_models(kind)))
 if __name__ == "__main__":
     list_models_main()

sasmodels/direct_model.py

-                      r2c4a190
+                      r5024a56
             else:
                 Iq, dIq = None, None
+            #self._theory = np.zeros_like(q)
+            q_vectors = [res.q_calc]
         elif self.data_type == 'Iqxy':
             #if not model.info.parameters.has_2d:
 …
             res = resolution2d.Pinhole2D(data=data, index=index,
                                          nsigma=3.0, accuracy=accuracy)
+            #self._theory = np.zeros_like(self.Iq)
+            q_vectors = res.q_calc
         elif self.data_type == 'Iq':
             index = (data.x >= data.qmin) & (data.x <= data.qmax)
 …
             else:
                 res = resolution.Perfect1D(data.x[index])
+            #self._theory = np.zeros_like(self.Iq)
+            q_vectors = [res.q_calc]
         elif self.data_type == 'Iq-oriented':
             index = (data.x >= data.qmin) & (data.x <= data.qmax)
 …
                                       qx_width=data.dxw[index],
                                       qy_width=data.dxl[index])
+            q_vectors = res.q_calc
         else:
             raise ValueError("Unknown data type") # never gets here
 …
         # Remember function inputs so we can delay loading the function and
         # so we can save/restore state
+        self._kernel_inputs = q_vectors
         self._kernel = None
         self.Iq, self.dIq, self.index = Iq, dIq, index
 …
         # type: (ParameterSet, float) -> np.ndarray
         if self._kernel is None:
+            # TODO: change interfaces so that resolution returns kernel inputs
+            # Maybe have resolution always return a tuple, or maybe have
+            # make_kernel accept either an ndarray or a pair of ndarrays.
+            kernel_inputs = self.resolution.q_calc
+            if isinstance(kernel_inputs, np.ndarray):
+                kernel_inputs = (kernel_inputs,)
+            self._kernel = self._model.make_kernel(kernel_inputs)
+            self._kernel = self._model.make_kernel(self._kernel_inputs)
         # Need to pull background out of resolution for multiple scattering

sasmodels/kernel.py

-                      r3199b17
+                      rcd28947
 # pylint: enable=unused-import
 class KernelModel(object):
     info = None  # type: ModelInfo
 …
         # type: () -> None
         pass
 class Kernel(object):

sasmodels/kernelcl.py

-                      r3199b17
+                      rf872fd1
 import numpy as np  # type: ignore
 # Attempt to setup OpenCL. This may fail if the pyopencl package is not
+# Attempt to setup opencl. This may fail if the pyopencl package is not
 # installed or if it is installed but there are no devices available.
 try:
 …
     from pyopencl import mem_flags as mf
     from pyopencl.characterize import get_fast_inaccurate_build_options
     # Ask OpenCL for the default context so that we know that one exists.
+    # Ask OpenCL for the default context so that we know that one exists
     cl.create_some_context(interactive=False)
     HAVE_OPENCL = True
 …
 # pylint: enable=unused-import
+# CRUFT: pyopencl < 2017.1 (as of June 2016 needs quotes around include path).
+# CRUFT: pyopencl < 2017.1  (as of June 2016 needs quotes around include path)
 def quote_path(v):
     """
 …
     return '"'+v+'"' if v and ' ' in v and not v[0] in "\"'-" else v
 def fix_pyopencl_include():
     """
 …
     import pyopencl as cl
     if hasattr(cl, '_DEFAULT_INCLUDE_OPTIONS'):
+        cl._DEFAULT_INCLUDE_OPTIONS = [
+            quote_path(v) for v in cl._DEFAULT_INCLUDE_OPTIONS
+            ]
+        cl._DEFAULT_INCLUDE_OPTIONS = [quote_path(v) for v in cl._DEFAULT_INCLUDE_OPTIONS]
 if HAVE_OPENCL:
 …
 MAX_LOOPS = 2048
 # Pragmas for enable OpenCL features.  Be sure to protect them so that they
 # still compile even if OpenCL is not present.
 …
 """
 def use_opencl():
     sas_opencl = os.environ.get("SAS_OPENCL", "OpenCL").lower()
     return HAVE_OPENCL and sas_opencl != "none" and not sas_opencl.startswith("cuda")
 ENV = None
 def reset_environment():
 …
     global ENV
     ENV = GpuEnvironment() if use_opencl() else None
 def environment():
 …
     return ENV
 def has_type(device, dtype):
     # type: (cl.Device, np.dtype) -> bool
 …
         return "cl_khr_fp64" in device.extensions
     else:
         # Not supporting F16 type since it isn't accurate enough.
+        # Not supporting F16 type since it isn't accurate enough
         return False
 def get_warp(kernel, queue):
 …
         cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
         queue.device)
 def compile_model(context, source, dtype, fast=False):
 …
         source_list.insert(0, _F64_PRAGMA)
     # Note: USE_SINCOS makes the Intel CPU slower under OpenCL.
+    # Note: USE_SINCOS makes the intel cpu slower under opencl
     if context.devices[0].type == cl.device_type.GPU:
         source_list.insert(0, "#define USE_SINCOS\n")
 …
     source = "\n".join(source_list)
     program = cl.Program(context, source).build(options=options)
     #print("done with "+program)
     return program
 # For now, this returns one device in the context.
 # TODO: Create a context that contains all devices on all platforms.
+# for now, this returns one device in the context
+# TODO: create a context that contains all devices on all platforms
 class GpuEnvironment(object):
     """
+    GPU context for OpenCL, with possibly many devices and one queue per device.
+    GPU context, with possibly many devices, and one queue per device.
+    Because the environment can be reset during a live program (e.g., if the
+    user changes the active GPU device in the GUI), everything associated
+    with the device context must be cached in the environment and recreated
+    if the environment changes.  The *cache* attribute is a simple dictionary
+    which holds keys and references to objects, such as compiled kernels and
+    allocated buffers.  The running program should check in the cache for
+    long lived objects and create them if they are not there.  The program
+    should not hold onto cached objects, but instead only keep them active
+    for the duration of a function call.  When the environment is destroyed
+    then the *release* method for each active cache item is called before
+    the environment is freed.  This means that each cl buffer should be
+    in its own cache entry.
     """
     def __init__(self):
         # type: () -> None
         # Find gpu context.
+        # find gpu context
         context_list = _create_some_context()
 …
                 self.context[dtype] = None
         # Build a queue for each context.
+        # Build a queue for each context
         self.queue = {}
         context = self.context[F32]
 …
             self.queue[F64] = cl.CommandQueue(context, context.devices[0])
         ## Byte boundary for data alignment.
+        # Byte boundary for data alignment
         #self.data_boundary = max(context.devices[0].min_data_type_align_size
         #                         for context in self.context.values())
         # Cache for compiled programs, and for items in context.
+        # Cache for compiled programs, and for items in context
         self.compiled = {}
+        self.cache = {}
     def has_type(self, dtype):
 …
         """
         # Note: PyOpenCL caches based on md5 hash of source, options and device
+        # but I'll do so as well just to save some data munging time.
+        # so we don't really need to cache things for ourselves.  I'll do so
+        # anyway just to save some data munging time.
         tag = generate.tag_source(source)
         key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else ""))
         # Check timestamp on program.
+        # Check timestamp on program
         program, program_timestamp = self.compiled.get(key, (None, np.inf))
         if program_timestamp < timestamp:
 …
         return program
+    def free_buffer(self, key):
+        if key in self.cache:
+            self.cache[key].release()
+            del self.cache[key]
+    def __del__(self):
+        for v in self.cache.values():
+            release = getattr(v, 'release', lambda: None)
+            release()
+        self.cache = {}
+_CURRENT_ID = 0
+def unique_id():
+    global _CURRENT_ID
+    _CURRENT_ID += 1
+    return _CURRENT_ID
 def _create_some_context():
 …
     which one (and not a CUDA device, or no GPU).
     """
     # Assume we do not get here if SAS_OPENCL is None or CUDA.
+    # Assume we do not get here if SAS_OPENCL is None or CUDA
     sas_opencl = os.environ.get('SAS_OPENCL', 'opencl')
     if sas_opencl.lower() != 'opencl':
         # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context.
+        # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context
         os.environ["PYOPENCL_CTX"] = sas_opencl
 …
         except Exception as exc:
             warnings.warn(str(exc))
+            warnings.warn("pyopencl.create_some_context() failed.  The "
+                "environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might "
+                "not be set correctly")
+            warnings.warn("pyopencl.create_some_context() failed")
+            warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly")
     return _get_default_context()
 def _get_default_context():
 …
     # is running may increase throughput.
+    #
     # MacBook Pro, base install:
+    # Macbook pro, base install:
     #     {'Apple': [Intel CPU, NVIDIA GPU]}
     # MacBook Pro, base install:
+    # Macbook pro, base install:
     #     {'Apple': [Intel CPU, Intel GPU]}
     # 2 x NVIDIA 295 with Intel and NVIDIA opencl drivers install:
+    # 2 x nvidia 295 with Intel and NVIDIA opencl drivers installed
     #     {'Intel': [CPU], 'NVIDIA': [GPU, GPU, GPU, GPU]}
     gpu, cpu = None, None
 …
             else:
                 # System has cl.device_type.ACCELERATOR or cl.device_type.CUSTOM
                 # Intel Phi for example registers as an accelerator.
+                # Intel Phi for example registers as an accelerator
                 # Since the user installed a custom device on their system
                 # and went through the pain of sorting out OpenCL drivers for
 …
                 gpu = device
     # Order the devices by gpu then by cpu; when searching for an available
+    # order the devices by gpu then by cpu; when searching for an available
     # device by data type they will be checked in this order, which means
     # that if the gpu supports double then the cpu will never be used (though
 …
     that the compiler is allowed to take shortcuts.
     """
-    info = None  # type: ModelInfo
-    source = ""  # type: str
-    dtype = None  # type: np.dtype
-    fast = False  # type: bool
-    _program = None  # type: cl.Program
-    _kernels = None  # type: Dict[str, cl.Kernel]
     def __init__(self, source, model_info, dtype=generate.F32, fast=False):
         # type: (Dict[str,str], ModelInfo, np.dtype, bool) -> None
 …
         self.dtype = dtype
         self.fast = fast
+        self.timestamp = generate.ocl_timestamp(self.info)
+        self._cache_key = unique_id()
     def __getstate__(self):
 …
         # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None
         self.info, self.source, self.dtype, self.fast = state
-        self._program = self._kernels = None
     def make_kernel(self, q_vectors):
 …
         return GpuKernel(self, q_vectors)
+    def get_function(self, name):
+    @property
+    def Iq(self):
+        return self._fetch_kernel('Iq')
+    def fetch_kernel(self, name):
         # type: (str) -> cl.Kernel
         """
 …
         does not already exist.
         """
+        if self._program is None:
+            self._prepare_program()
+        return self._kernels[name]
+    def _prepare_program(self):
+        # type: (str) -> None
+        env = environment()
+        timestamp = generate.ocl_timestamp(self.info)
+        program = env.compile_program(
+            self.info.name,
+            self.source['opencl'],
+            self.dtype,
+            self.fast,
+            timestamp)
+        variants = ['Iq', 'Iqxy', 'Imagnetic']
+        names = [generate.kernel_name(self.info, k) for k in variants]
+        functions = [getattr(program, k) for k in names]
+        self._kernels = {k: v for k, v in zip(variants, functions)}
+        # Keep a handle to program so GC doesn't collect.
+        self._program = program
+# TODO: Check that we don't need a destructor for buffers which go out of scope.
+        gpu = environment()
+        key = self._cache_key
+        if key not in gpu.cache:
+            program = gpu.compile_program(
+                self.info.name,
+                self.source['opencl'],
+                self.dtype,
+                self.fast,
+                self.timestamp)
+            variants = ['Iq', 'Iqxy', 'Imagnetic']
+            names = [generate.kernel_name(self.info, k) for k in variants]
+            kernels = [getattr(program, k) for k in names]
+            data = dict((k, v) for k, v in zip(variants, kernels))
+            # keep a handle to program so GC doesn't collect
+            data['program'] = program
+            gpu.cache[key] = data
+        else:
+            data = gpu.cache[key]
+        return data[name]
+# TODO: check that we don't need a destructor for buffers which go out of scope
 class GpuInput(object):
     """
 …
     def __init__(self, q_vectors, dtype=generate.F32):
         # type: (List[np.ndarray], np.dtype) -> None
         # TODO: Do we ever need double precision q?
+        # TODO: do we ever need double precision q?
         self.nq = q_vectors[0].size
         self.dtype = np.dtype(dtype)
         self.is_2d = (len(q_vectors) == 2)
         # TODO: Stretch input based on get_warp().
         # Not doing it now since warp depends on kernel, which is not known
+        # TODO: stretch input based on get_warp()
+        # not doing it now since warp depends on kernel, which is not known
         # at this point, so instead using 32, which is good on the set of
         # architectures tested so far.
 …
             self.q[:self.nq] = q_vectors[0]
         self.global_size = [self.q.shape[0]]
+        #print("creating inputs of size", self.global_size)
+        # Transfer input value to GPU.
+        self._cache_key = unique_id()
+    @property
+    def q_b(self):
+        """Lazy creation of q buffer so it can survive context reset"""
         env = environment()
+        context = env.context[self.dtype]
+        self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR,
+                             hostbuf=self.q)
+        key = self._cache_key
+        if key not in env.cache:
+            context = env.context[self.dtype]
+            #print("creating inputs of size", self.global_size)
+            buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR,
+                               hostbuf=self.q)
+            env.cache[key] = buffer
+        return env.cache[key]
     def release(self):
         # type: () -> None
         """
+        Free the buffer associated with the q value.
+        """
+        if self.q_b is not None:
+            self.q_b.release()
+            self.q_b = None
+        Free the buffer associated with the q value
+        """
+        environment().free_buffer(id(self))
     def __del__(self):
 …
         self.release()
 class GpuKernel(Kernel):
     """
 …
     *model* is the GpuModel object to call
+    The kernel is derived from :class:`Kernel`, providing the
+    :meth:`call_kernel` method to evaluate the kernel for a given set of
+    parameters.  Because of the need to move the q values to the GPU before
+    evaluation, the kernel is instantiated for a particular set of q vectors,
+    and can be called many times without transfering q each time.
+    The following attributes are defined:
+    *info* is the module information
+    *dtype* is the kernel precision
+    *dim* is '1d' or '2d'
+    *result* is a vector to contain the results of the call
+    The resulting call method takes the *pars*, a list of values for
+    the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight)
+    vectors for the polydisperse parameters.  *cutoff* determines the
+    integration limits: any points with combined weight less than *cutoff*
+    will not be calculated.
     Call :meth:`release` when done with the kernel instance.
     """
-    #: SAS model information structure.
-    info = None  # type: ModelInfo
-    #: Kernel precision.
-    dtype = None  # type: np.dtype
-    #: Kernel dimensions (1d or 2d).
-    dim = ""  # type: str
-    #: Calculation results, updated after each call to :meth:`_call_kernel`.
-    result = None  # type: np.ndarray
     def __init__(self, model, q_vectors):
         # type: (GpuModel, List[np.ndarray]) -> None
+        # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None
         dtype = model.dtype
         self.q_input = GpuInput(q_vectors, dtype)
         self._model = model
+        # Attributes accessed from the outside.
+        # F16 isn't sufficient, so don't support it
+        self._as_dtype = np.float64 if dtype == generate.F64 else np.float32
+        self._cache_key = unique_id()
+        # attributes accessed from the outside
         self.dim = '2d' if self.q_input.is_2d else '1d'
         self.info = model.info
+        self.dtype = dtype
+        # Converter to translate input to target type.
+        self._as_dtype = np.float64 if dtype == generate.F64 else np.float32
+        # Holding place for the returned value.
+        self.dtype = model.dtype
+        # holding place for the returned value
         nout = 2 if self.info.have_Fq and self.dim == '1d' else 1
+        extra_q = 4  # Total weight, form volume, shell volume and R_eff.
+        self.result = np.empty(self.q_input.nq*nout + extra_q, dtype)
+        # Allocate result value on GPU.
+        extra_q = 4  # total weight, form volume, shell volume and R_eff
+        self.result = np.empty(self.q_input.nq*nout+extra_q, dtype)
+    @property
+    def _result_b(self):
+        """Lazy creation of result buffer so it can survive context reset"""
         env = environment()
+        context = env.context[self.dtype]
+        width = ((self.result.size+31)//32)*32 * self.dtype.itemsize
+        self._result_b = cl.Buffer(context, mf.READ_WRITE, width)
+    def _call_kernel(self, call_details, values, cutoff, magnetic,
+                     effective_radius_type):
+        # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray
+        key = self._cache_key
+        if key not in env.cache:
+            context = env.context[self.dtype]
+            width = ((self.result.size+31)//32)*32 * self.dtype.itemsize
+            buffer = cl.Buffer(context, mf.READ_WRITE, width)
+            env.cache[key] = buffer
+        return env.cache[key]
+    def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type):
+        # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray
         env = environment()
         queue = env.queue[self._model.dtype]
         context = queue.context
+        # Arrange data transfer to card.
+        # Arrange data transfer to/from card
+        q_b = self.q_input.q_b
+        result_b = self._result_b
         details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR,
                               hostbuf=call_details.buffer)
 …
                              hostbuf=values)
-        # Setup kernel function and arguments.
         name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy'
         kernel = self._model.get_function(name)
+        kernel = self._model.fetch_kernel(name)
         kernel_args = [
+            np.uint32(self.q_input.nq),  # Number of inputs.
+            None,  # Placeholder for pd_start.
+            None,  # Placeholder for pd_stop.
+            details_b,  # Problem definition.
+            values_b,  # Parameter values.
+            self.q_input.q_b,  # Q values.
+            self._result_b,   # Result storage.
+            self._as_dtype(cutoff),  # Probability cutoff.
+            np.uint32(effective_radius_type),  # R_eff mode.
+            np.uint32(self.q_input.nq), None, None,
+            details_b, values_b, q_b, result_b,
+            self._as_dtype(cutoff),
+            np.uint32(effective_radius_type),
+        ]
-        # Call kernel and retrieve results.
         #print("Calling OpenCL")
         #call_details.show(values)
+        #Call kernel and retrieve results
         wait_for = None
         last_nap = clock()
 …
                                *kernel_args, wait_for=wait_for)]
             if stop < call_details.num_eval:
                 # Allow other processes to run.
+                # Allow other processes to run
                 wait_for[0].wait()
                 current_time = clock()
 …
                     time.sleep(0.001)
                     last_nap = current_time
         cl.enqueue_copy(queue, self.result, self._result_b, wait_for=wait_for)
+        cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for)
         #print("result", self.result)
+        # Free buffers.
+        details_b.release()
+        values_b.release()
+        # Free buffers
+        for v in (details_b, values_b):
+            if v is not None:
+                v.release()
     def release(self):
 …
         Release resources associated with the kernel.
         """
+        environment().free_buffer(id(self))
         self.q_input.release()
-        if self._result_b is not None:
-            self._result_b.release()
-            self._result_b = None
     def __del__(self):

sasmodels/kernelcuda.py

-                      rfa26e78
+                      rf872fd1
 import time
 import re
-import atexit
 import numpy as np  # type: ignore
 # Attempt to setup CUDA. This may fail if the pycuda package is not
+# Attempt to setup cuda. This may fail if the pycuda package is not
 # installed or if it is installed but there are no devices available.
 try:
 …
 MAX_LOOPS = 2048
 def use_cuda():
+    sas_opencl = os.environ.get("SAS_OPENCL", "CUDA").lower()
+    return HAVE_CUDA and sas_opencl.startswith("cuda")
+    env = os.environ.get("SAS_OPENCL", "").lower()
+    return HAVE_CUDA and (env == "" or env.startswith("cuda"))
 ENV = None
 …
         ENV.release()
     ENV = GpuEnvironment() if use_cuda() else None
 def environment():
 …
     return ENV
-# PyTest is not freeing ENV, so make sure it gets freed.
-atexit.register(lambda: ENV.release() if ENV is not None else None)
 def has_type(dtype):
     # type: (np.dtype) -> bool
 …
     Return true if device supports the requested precision.
     """
     # Assume the NVIDIA card supports 32-bit and 64-bit floats.
     # TODO: Check if pycuda support F16.
+    # Assume the nvidia card supports 32-bit and 64-bit floats.
+    # TODO: check if pycuda support F16
     return dtype in (generate.F32, generate.F64)
 FUNCTION_PATTERN = re.compile(r"""^
   (?P<space>\s*)                       # Initial space.
   (?P<qualifiers>^(?:\s*\b\w+\b\s*)+)  # One or more qualifiers before function.
   (?P<function>\s*\b\w+\b\s*[(])       # Function name plus open parens.
+  (?P<space>\s*)                   # initial space
+  (?P<qualifiers>^(?:\s*\b\w+\b\s*)+) # one or more qualifiers before function
+  (?P<function>\s*\b\w+\b\s*[(])      # function name plus open parens
   """, re.VERBOSE|re.MULTILINE)
 …
   """, re.VERBOSE|re.MULTILINE)
 def _add_device_tag(match):
     # type: (None) -> str
     # Note: Should be re.Match, but that isn't a simple type.
+    # Note: should be re.Match, but that isn't a simple type
     """
     replace qualifiers with __device__ qualifiers if needed
 …
         return "".join((space, "__device__ ", qualifiers, function))
 def mark_device_functions(source):
     # type: (str) -> str
 …
     """
     return FUNCTION_PATTERN.sub(_add_device_tag, source)
 def show_device_functions(source):
 …
         print(match.group('qualifiers').replace('\n',r'\n'), match.group('function'), '(')
     return source
 def compile_model(source, dtype, fast=False):
 …
     #options = ['--verbose', '-E']
     options = ['--use_fast_math'] if fast else None
     program = SourceModule(source, no_extern_c=True, options=options) #, include_dirs=[...])
+    program = SourceModule(source, no_extern_c=True, options=options) # include_dirs=[...]
     #print("done with "+program)
 …
 # For now, this returns one device in the context.
 # TODO: Create a context that contains all devices on all platforms.
+# for now, this returns one device in the context
+# TODO: create a context that contains all devices on all platforms
 class GpuEnvironment(object):
     """
     GPU context for CUDA.
+    GPU context, with possibly many devices, and one queue per device.
     """
     context = None # type: cuda.Context
     def __init__(self, devnum=None):
         # type: (int) -> None
+        # Byte boundary for data alignment
+        #self.data_boundary = max(d.min_data_type_align_size
+        #                         for d in self.context.devices)
+        self.compiled = {}
         env = os.environ.get("SAS_OPENCL", "").lower()
         if devnum is None and env.startswith("cuda:"):
             devnum = int(env[5:])
         # Set the global context to the particular device number if one is
         # given, otherwise use the default context.  Perhaps this will be set
 …
             self.context = make_default_context()
-        ## Byte boundary for data alignment.
-        #self.data_boundary = max(d.min_data_type_align_size
-        #                         for d in self.context.devices)
-        # Cache for compiled programs, and for items in context.
-        self.compiled = {}
     def release(self):
         if self.context is not None:
 …
         Compile the program for the device in the given context.
         """
+        # Note: PyCuda (probably) caches but I'll do so as well just to
+        # save some data munging time.
+        # Note: PyOpenCL caches based on md5 hash of source, options and device
+        # so we don't really need to cache things for ourselves.  I'll do so
+        # anyway just to save some data munging time.
         tag = generate.tag_source(source)
         key = "%s-%s-%s%s"%(name, dtype, tag, ("-fast" if fast else ""))
         # Check timestamp on program.
+        # Check timestamp on program
         program, program_timestamp = self.compiled.get(key, (None, np.inf))
         if program_timestamp < timestamp:
 …
         return program
 class GpuModel(KernelModel):
     """
 …
     that the compiler is allowed to take shortcuts.
     """
     info = None  # type: ModelInfo
     source = ""  # type: str
     dtype = None  # type: np.dtype
     fast = False  # type: bool
     _program = None  # type: SourceModule
     _kernels = None  # type: Dict[str, cuda.Function]
+    info = None # type: ModelInfo
+    source = "" # type: str
+    dtype = None # type: np.dtype
+    fast = False # type: bool
+    program = None # type: SourceModule
+    _kernels = None # type: List[cuda.Function]
     def __init__(self, source, model_info, dtype=generate.F32, fast=False):
 …
         self.dtype = dtype
         self.fast = fast
+        self.program = None # delay program creation
+        self._kernels = None
     def __getstate__(self):
 …
         # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None
         self.info, self.source, self.dtype, self.fast = state
         self._program = self._kernels = None
+        self.program = None
     def make_kernel(self, q_vectors):
         # type: (List[np.ndarray]) -> "GpuKernel"
+        return GpuKernel(self, q_vectors)
+    def get_function(self, name):
+        # type: (str) -> cuda.Function
+        """
+        Fetch the kernel from the environment by name, compiling it if it
+        does not already exist.
+        """
+        if self._program is None:
+            self._prepare_program()
+        return self._kernels[name]
+    def _prepare_program(self):
+        # type: (str) -> None
+        env = environment()
+        timestamp = generate.ocl_timestamp(self.info)
+        program = env.compile_program(
+            self.info.name,
+            self.source['opencl'],
+            self.dtype,
+            self.fast,
+            timestamp)
+        variants = ['Iq', 'Iqxy', 'Imagnetic']
+        names = [generate.kernel_name(self.info, k) for k in variants]
+        functions = [program.get_function(k) for k in names]
+        self._kernels = {k: v for k, v in zip(variants, functions)}
+        # Keep a handle to program so GC doesn't collect.
+        self._program = program
+# TODO: Check that we don't need a destructor for buffers which go out of scope.
+        if self.program is None:
+            compile_program = environment().compile_program
+            timestamp = generate.ocl_timestamp(self.info)
+            self.program = compile_program(
+                self.info.name,
+                self.source['opencl'],
+                self.dtype,
+                self.fast,
+                timestamp)
+            variants = ['Iq', 'Iqxy', 'Imagnetic']
+            names = [generate.kernel_name(self.info, k) for k in variants]
+            kernels = [self.program.get_function(k) for k in names]
+            self._kernels = dict((k, v) for k, v in zip(variants, kernels))
+        is_2d = len(q_vectors) == 2
+        if is_2d:
+            kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']]
+        else:
+            kernel = [self._kernels['Iq']]*2
+        return GpuKernel(kernel, self.dtype, self.info, q_vectors)
+    def release(self):
+        # type: () -> None
+        """
+        Free the resources associated with the model.
+        """
+        if self.program is not None:
+            self.program = None
+    def __del__(self):
+        # type: () -> None
+        self.release()
+# TODO: check that we don't need a destructor for buffers which go out of scope
 class GpuInput(object):
     """
 …
     def __init__(self, q_vectors, dtype=generate.F32):
         # type: (List[np.ndarray], np.dtype) -> None
         # TODO: Do we ever need double precision q?
+        # TODO: do we ever need double precision q?
         self.nq = q_vectors[0].size
         self.dtype = np.dtype(dtype)
         self.is_2d = (len(q_vectors) == 2)
         # TODO: Stretch input based on get_warp().
         # Not doing it now since warp depends on kernel, which is not known
+        # TODO: stretch input based on get_warp()
+        # not doing it now since warp depends on kernel, which is not known
         # at this point, so instead using 32, which is good on the set of
         # architectures tested so far.
         if self.is_2d:
+            width = ((self.nq+15)//16)*16
+            # Note: 16 rather than 15 because result is 1 longer than input.
+            width = ((self.nq+16)//16)*16
             self.q = np.empty((width, 2), dtype=dtype)
             self.q[:self.nq, 0] = q_vectors[0]
             self.q[:self.nq, 1] = q_vectors[1]
         else:
+            width = ((self.nq+31)//32)*32
+            # Note: 32 rather than 31 because result is 1 longer than input.
+            width = ((self.nq+32)//32)*32
             self.q = np.empty(width, dtype=dtype)
             self.q[:self.nq] = q_vectors[0]
         self.global_size = [self.q.shape[0]]
         #print("creating inputs of size", self.global_size)
-        # Transfer input value to GPU.
         self.q_b = cuda.to_device(self.q)
 …
         # type: () -> None
         """
         Free the buffer associated with the q value.
+        Free the memory.
         """
         if self.q_b is not None:
 …
         self.release()
 class GpuKernel(Kernel):
     """
     Callable SAS kernel.
+    *model* is the GpuModel object to call
+    The kernel is derived from :class:`Kernel`, providing the
+    :meth:`call_kernel` method to evaluate the kernel for a given set of
+    parameters.  Because of the need to move the q values to the GPU before
+    evaluation, the kernel is instantiated for a particular set of q vectors,
+    and can be called many times without transfering q each time.
+    *kernel* is the GpuKernel object to call
+    *model_info* is the module information
+    *q_vectors* is the q vectors at which the kernel should be evaluated
+    *dtype* is the kernel precision
+    The resulting call method takes the *pars*, a list of values for
+    the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight)
+    vectors for the polydisperse parameters.  *cutoff* determines the
+    integration limits: any points with combined weight less than *cutoff*
+    will not be calculated.
     Call :meth:`release` when done with the kernel instance.
     """
+    #: SAS model information structure.
+    info = None  # type: ModelInfo
+    #: Kernel precision.
+    dtype = None  # type: np.dtype
+    #: Kernel dimensions (1d or 2d).
+    dim = ""  # type: str
+    #: Calculation results, updated after each call to :meth:`_call_kernel`.
+    result = None  # type: np.ndarray
+    def __init__(self, model, q_vectors):
+        # type: (GpuModel, List[np.ndarray]) -> None
+        dtype = model.dtype
+    def __init__(self, kernel, dtype, model_info, q_vectors):
+        # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None
         self.q_input = GpuInput(q_vectors, dtype)
+        self._model = model
+        # Attributes accessed from the outside.
+        self.kernel = kernel
+        # F16 isn't sufficient, so don't support it
+        self._as_dtype = np.float64 if dtype == generate.F64 else np.float32
+        # attributes accessed from the outside
         self.dim = '2d' if self.q_input.is_2d else '1d'
         self.info = model.info
+        self.info = model_info
         self.dtype = dtype
+        # Converter to translate input to target type.
+        self._as_dtype = np.float64 if dtype == generate.F64 else np.float32
+        # Holding place for the returned value.
+        # holding place for the returned value
         nout = 2 if self.info.have_Fq and self.dim == '1d' else 1
+        extra_q = 4  # Total weight, form volume, shell volume and R_eff.
+        self.result = np.empty(self.q_input.nq*nout + extra_q, dtype)
+        # Allocate result value on GPU.
+        extra_q = 4  # total weight, form volume, shell volume and R_eff
+        self.result = np.empty(self.q_input.nq*nout+extra_q, dtype)
+        # Inputs and outputs for each kernel call
+        # Note: res may be shorter than res_b if global_size != nq
         width = ((self.result.size+31)//32)*32 * self.dtype.itemsize
+        self._result_b = cuda.mem_alloc(width)
+    def _call_kernel(self, call_details, values, cutoff, magnetic,
+                     effective_radius_type):
+        # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray
+        # Arrange data transfer to card.
+        self.result_b = cuda.mem_alloc(width)
+        self._need_release = [self.result_b]
+    def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type):
+        # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray
+        # Arrange data transfer to card
         details_b = cuda.to_device(call_details.buffer)
         values_b = cuda.to_device(values)
+        # Setup kernel function and arguments.
+        name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy'
+        kernel = self._model.get_function(name)
+        kernel_args = [
+            np.uint32(self.q_input.nq),  # Number of inputs.
+            None,  # Placeholder for pd_start.
+            None,  # Placeholder for pd_stop.
+            details_b,  # Problem definition.
+            values_b,  # Parameter values.
+            self.q_input.q_b,  # Q values.
+            self._result_b,   # Result storage.
+            self._as_dtype(cutoff),  # Probability cutoff.
+            np.uint32(effective_radius_type),  # R_eff mode.
+        kernel = self.kernel[1 if magnetic else 0]
+        args = [
+            np.uint32(self.q_input.nq), None, None,
+            details_b, values_b, self.q_input.q_b, self.result_b,
+            self._as_dtype(cutoff),
+            np.uint32(effective_radius_type),
+        ]
         grid = partition(self.q_input.nq)
+        # Call kernel and retrieve results.
+        #print("Calling CUDA")
+        #print("Calling OpenCL")
         #call_details.show(values)
+        # Call kernel and retrieve results
         last_nap = time.clock()
         step = 100000000//self.q_input.nq + 1
 …
             stop = min(start + step, call_details.num_eval)
             #print("queuing",start,stop)
             kernel_args[1:3] = [np.int32(start), np.int32(stop)]
             kernel(*kernel_args, **grid)
+            args[1:3] = [np.int32(start), np.int32(stop)]
+            kernel(*args, **grid)
             if stop < call_details.num_eval:
                 sync()
                 # Allow other processes to run.
+                # Allow other processes to run
                 current_time = time.clock()
                 if current_time - last_nap > 0.5:
 …
                     last_nap = current_time
         sync()
         cuda.memcpy_dtoh(self.result, self._result_b)
+        cuda.memcpy_dtoh(self.result, self.result_b)
         #print("result", self.result)
 …
         Release resources associated with the kernel.
         """
+        self.q_input.release()
+        if self._result_b is not None:
+            self._result_b.free()
+            self._result_b = None
+        for p in self._need_release:
+            p.free()
+        self._need_release = []
     def __del__(self):
 …
     Note: Maybe context.synchronize() is sufficient.
     """
+    # Create an event with which to synchronize.
+    #return # The following works in C++; don't know what pycuda is doing
+    # Create an event with which to synchronize
     done = cuda.Event()
 …
     done.record()
     # Make sure we don't hog resource while waiting to sync.
+    #line added to not hog resources
     while not done.query():
         time.sleep(0.01)
 …
     # Block until the GPU executes the kernel.
     done.synchronize()
     # Clean up the event; I don't think they can be reused.
     del done

sasmodels/kerneldll.py

-                      r3199b17
+                      re44432d
 # pylint: enable=unused-import
 # Compiler output is a byte stream that needs to be decode in python 3.
+# Compiler output is a byte stream that needs to be decode in python 3
 decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8'))
 …
         COMPILER = "tinycc"
     elif "VCINSTALLDIR" in os.environ:
+        # If vcvarsall.bat has been called, then VCINSTALLDIR is in the
+        # environment and we can use the MSVC compiler.  Otherwise, if
+        # tinycc is available then use it.  Otherwise, hope that mingw
+        # is available.
+        # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment
+        # and we can use the MSVC compiler.  Otherwise, if tinycc is available
+        # the use it.  Otherwise, hope that mingw is available.
         COMPILER = "msvc"
     else:
 …
     COMPILER = "unix"
 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86"  # 4 byte pointers on x86.
+ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86"  # 4 byte pointers on x86
 if COMPILER == "unix":
     # Generic unix compile.
     # On Mac users will need the X code command line tools installed.
+    # Generic unix compile
+    # On mac users will need the X code command line tools installed
     #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp"
     CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split()
     # Add OpenMP support if not running on a Mac.
+    # add openmp support if not running on a mac
     if sys.platform != "darwin":
         # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9).
+        # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9)
         # Shut it off for all unix until we can investigate.
         #CC.append("-fopenmp")
 …
     # vcomp90.dll on the path.  One may be found here:
     #       C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll
     # Copy this to the python directory and uncomment the OpenMP COMPILE.
     # TODO: Remove intermediate OBJ file created in the directory.
     # TODO: Maybe don't use randomized name for the c file.
     # TODO: Maybe ask distutils to find MSVC.
+    # Copy this to the python directory and uncomment the OpenMP COMPILE
+    # TODO: remove intermediate OBJ file created in the directory
+    # TODO: maybe don't use randomized name for the c file
+    # TODO: maybe ask distutils to find MSVC
     CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split()
     if "SAS_OPENMP" in os.environ:
 …
 ALLOW_SINGLE_PRECISION_DLLS = True
 def compile(source, output):
     # type: (str, str) -> None
 …
     logging.info(command_str)
     try:
         # Need shell=True on windows to keep console box from popping up.
+        # need shell=True on windows to keep console box from popping up
         shell = (os.name == 'nt')
         subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT)
 …
         raise RuntimeError("compile failed.  File is in %r"%source)
 def dll_name(model_info, dtype):
     # type: (ModelInfo, np.dtype) ->  str
 …
     basename += ARCH + ".so"
     # Hack to find precompiled dlls.
+    # Hack to find precompiled dlls
     path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename)
     if os.path.exists(path):
 …
         raise ValueError("16 bit floats not supported")
     if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS:
         dtype = F64  # Force 64-bit dll.
     # Note: dtype may be F128 for long double precision.
+        dtype = F64  # Force 64-bit dll
+    # Note: dtype may be F128 for long double precision
     dll = dll_path(model_info, dtype)
 …
         need_recompile = dll_time < newest_source
     if need_recompile:
         # Make sure the DLL path exists.
+        # Make sure the DLL path exists
         if not os.path.exists(SAS_DLL_PATH):
             os.makedirs(SAS_DLL_PATH)
 …
             file_handle.write(source)
         compile(source=filename, output=dll)
         # Comment the following to keep the generated C file.
         # Note: If there is a syntax error then compile raises an error
+        # comment the following to keep the generated c file
+        # Note: if there is a syntax error then compile raises an error
         # and the source file will not be deleted.
         os.unlink(filename)
 …
         self.dllpath = dllpath
         self._dll = None  # type: ct.CDLL
         self._kernels = None  # type: List[Callable, Callable]
+        self._kernels = None # type: List[Callable, Callable]
         self.dtype = np.dtype(dtype)
 …
         # type: (List[np.ndarray]) -> DllKernel
         q_input = PyInput(q_vectors, self.dtype)
         # Note: DLL is lazy loaded.
+        # Note: pickle not supported for DllKernel
         if self._dll is None:
             self._load_dll()
 …
         self._dll = None
 class DllKernel(Kernel):
     """
 …
     def __init__(self, kernel, model_info, q_input):
         # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None
+        dtype = q_input.dtype
+        #,model_info,q_input)
+        self.kernel = kernel
+        self.info = model_info
         self.q_input = q_input
+        self.kernel = kernel
+        # Attributes accessed from the outside.
+        self.dtype = q_input.dtype
         self.dim = '2d' if q_input.is_2d else '1d'
+        self.info = model_info
+        self.dtype = dtype
+        # Converter to translate input to target type.
+        self._as_dtype = (np.float32 if dtype == generate.F32
+                          else np.float64 if dtype == generate.F64
+                          else np.float128)
+        # Holding place for the returned value.
+        # leave room for f1/f2 results in case we need to compute beta for 1d models
         nout = 2 if self.info.have_Fq else 1
         extra_q = 4  # Total weight, form volume, shell volume and R_eff.
         self.result = np.empty(self.q_input.nq*nout + extra_q, dtype)
     def _call_kernel(self, call_details, values, cutoff, magnetic,
                      effective_radius_type):
+        # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray
         # Setup kernel function and arguments.
+        # +4 for total weight, shell volume, effective radius, form volume
+        self.result = np.empty(q_input.nq*nout + 4, self.dtype)
+        self.real = (np.float32 if self.q_input.dtype == generate.F32
+                     else np.float64 if self.q_input.dtype == generate.F64
+                     else np.float128)
+    def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type):
+        # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray
         kernel = self.kernel[1 if magnetic else 0]
         kernel_args = [
             self.q_input.nq,  # Number of inputs.
             None,  # Placeholder for pd_start.
             None,  # Placeholder for pd_stop.
             call_details.buffer.ctypes.data,  # Problem definition.
             values.ctypes.data,  # Parameter values.
             self.q_input.q.ctypes.data,  # Q values.
             self.result.ctypes.data,   # Result storage.
             self._as_dtype(cutoff),  # Probability cutoff.
             effective_radius_type,  # R_eff mode.
+        args = [
+            self.q_input.nq, # nq
+            None, # pd_start
+            None, # pd_stop pd_stride[MAX_PD]
+            call_details.buffer.ctypes.data, # problem
+            values.ctypes.data,  # pars
+            self.q_input.q.ctypes.data, # q
+            self.result.ctypes.data,   # results
+            self.real(cutoff), # cutoff
+            effective_radius_type, # cutoff
+        ]
-        # Call kernel and retrieve results.
         #print("Calling DLL")
         #call_details.show(values)
         step = 100
-        # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops?
         for start in range(0, call_details.num_eval, step):
             stop = min(start + step, call_details.num_eval)
             kernel_args[1:3] = [start, stop]
             kernel(*kernel_args) # type: ignore
+            args[1:3] = [start, stop]
+            kernel(*args) # type: ignore
     def release(self):
         # type: () -> None
         """
         Release resources associated with the kernel.
+        Release any resources associated with the kernel.
         """
+        # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release.
+        # Should we be doing the same for DLL?
+        #self.q_input.release()
+        pass
+    def __del__(self):
+        # type: () -> None
+        self.release()
+        self.q_input.release()

sasmodels/kernelpy.py

-                      r3199b17
+                      re44432d
 logger = logging.getLogger(__name__)
 class PyModel(KernelModel):
     """
 …
     """
     def __init__(self, model_info):
         # Make sure Iq is available and vectorized.
+        # Make sure Iq is available and vectorized
         _create_default_functions(model_info)
         self.info = model_info
 …
         """
         pass
 class PyInput(object):
 …
         self.q = None
 class PyKernel(Kernel):
     """
 …
         parameter_vector = np.empty(len(partable.call_parameters)-2, 'd')
         # Create views into the array to hold the arguments.
+        # Create views into the array to hold the arguments
         offset = 0
         kernel_args, volume_args = [], []
 …
                         else (lambda mode: 1.0))
     def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type):
         # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray
 …
         self.q_input.release()
         self.q_input = None
 def _loops(parameters,    # type: np.ndarray
 …
         total = np.zeros(nq, 'd')
         for loop_index in range(call_details.num_eval):
             # Update polydispersity parameter values.
+            # update polydispersity parameter values
             if p0_index == p0_length:
                 pd_index = (loop_index//pd_stride)%pd_length
 …
             p0_index += 1
             if weight > cutoff:
                 # Call the scattering function.
+                # Call the scattering function
                 # Assume that NaNs are only generated if the parameters are bad;
                 # exclude all q for that NaN.  Even better would be to have an
 …
                     continue
                 # Update value and norm.
+                # update value and norm
                 total += weight * Iq
                 weight_norm += weight
 …
     any functions that are not already marked as vectorized.
     """
     # Note: Must call create_vector_Iq before create_vector_Iqxy.
+    # Note: must call create_vector_Iq before create_vector_Iqxy
     _create_vector_Iq(model_info)
     _create_vector_Iqxy(model_info)

sasmodels/model_test.py

-                      rd92182f
+                      r5024a56
 Usage::
+    python -m sasmodels.model_test [opencl|cuda|dll|all] model1 model2 ...
+If model1 is 'all', then all except the remaining models will be tested.
+Subgroups are also possible, such as 'py', 'single' or '1d'.  See
+:func:`core.list_models` for details.
+    python -m sasmodels.model_test [opencl|cuda|dll] model1 model2 ...
+    if model1 is 'all', then all except the remaining models will be tested
 Each model is tested using the default parameters at q=0.1, (qx, qy)=(0.1, 0.1),
 …
 from __future__ import print_function
-import argparse
 import sys
 import unittest
 …
     suite = unittest.TestSuite()
+    try:
+        # See if the first model parses as a model group
+        group = list_models(models[0])
+    if models[0] in core.KINDS:
         skip = models[1:]
         models = group
     except Exception:
+        models = list_models(models[0])
+    else:
         skip = []
     for model_name in models:
 …
         # test using cuda if desired and available
         if 'cuda' in loaders and use_cuda():
             test_name = "%s-cuda" % model_info.id
+            test_name = "%s-cuda"%model_name
             test_method_name = "test_%s_cuda" % model_info.id
             # Using dtype=None so that the models that are only
 …
+def main(*models):
+    # type: (*str) -> int
+    """
+    Run tests given is models.
+    Returns 0 if success or 1 if any tests fail.
+    """
+    try:
+        from xmlrunner import XMLTestRunner as TestRunner
+        test_args = {'output': 'logs'}
+    except ImportError:
+        from unittest import TextTestRunner as TestRunner
+        test_args = {}
+    if models and models[0] == '-v':
+        verbosity = 2
+        models = models[1:]
+    else:
+        verbosity = 1
+    if models and models[0] == 'opencl':
+        if not use_opencl():
+            print("opencl is not available")
+            return 1
+        loaders = ['opencl']
+        models = models[1:]
+    elif models and models[0] == 'cuda':
+        if not use_cuda():
+            print("cuda is not available")
+            return 1
+        loaders = ['cuda']
+        models = models[1:]
+    elif models and models[0] == 'dll':
+        # TODO: test if compiler is available?
+        loaders = ['dll']
+        models = models[1:]
+    else:
+        loaders = ['dll']
+        if use_opencl():
+            loaders.append('opencl')
+        if use_cuda():
+            loaders.append('cuda')
+    if not models:
+        print("""\
+usage:
+  python -m sasmodels.model_test [-v] [opencl|cuda|dll] model1 model2 ...
+If -v is included on the command line, then use verbose output.
+If no platform is specified, then models will be tested with dll, and
+if available, OpenCL and CUDA; the compute target is ignored for pure python models.
+If model1 is 'all', then all except the remaining models will be tested.
+""")
+        return 1
+    runner = TestRunner(verbosity=verbosity, **test_args)
+    result = runner.run(make_suite(loaders, models))
+    return 1 if result.failures or result.errors else 0
 def model_tests():
     # type: () -> Iterator[Callable[[], None]]
 …
-def main():
-    # type: (*str) -> int
-    """
-    Run tests given is models.
-    Returns 0 if success or 1 if any tests fail.
-    """
-    try:
-        from xmlrunner import XMLTestRunner as TestRunner
-        test_args = {'output': 'logs'}
-    except ImportError:
-        from unittest import TextTestRunner as TestRunner
-        test_args = {}
-    parser = argparse.ArgumentParser(description="Test SasModels Models")
-    parser.add_argument("-v", "--verbose", action="store_const",
-                        default=1, const=2, help="Use verbose output")
-    parser.add_argument("-e", "--engine", default="all",
-                        help="Engines on which to run the test.  "
-                        "Valid values are opencl, cuda, dll, and all. "
-                        "Defaults to all if no value is given")
-    parser.add_argument("models", nargs="*",
-                        help="The names of the models to be tested.  "
-                        "If the first model is 'all', then all but the listed "
-                        "models will be tested.  See core.list_models() for "
-                        "names of other groups, such as 'py' or 'single'.")
-    args, models = parser.parse_known_args()
-    if args.engine == "opencl":
-        if not use_opencl():
-            print("opencl is not available")
-            return 1
-        loaders = ['opencl']
-    elif args.engine == "dll":
-        loaders = ["dll"]
-    elif args.engine == "cuda":
-        if not use_cuda():
-            print("cuda is not available")
-            return 1
-        loaders = ['cuda']
-    elif args.engine == "all":
-        loaders = ['dll']
-        if use_opencl():
-            loaders.append('opencl')
-        if use_cuda():
-            loaders.append('cuda')
-    else:
-        print("unknown engine " + args.engine)
-        return 1
-    runner = TestRunner(verbosity=args.verbose, **test_args)
-    result = runner.run(make_suite(loaders, args.models))
-    return 1 if result.failures or result.errors else 0
 if __name__ == "__main__":
     sys.exit(main())
+    sys.exit(main(*sys.argv[1:]))

sasmodels/modelinfo.py

-                      rd8eaa3d
+                      r39a06c9
     example, might be used to set the value of a shape parameter.
+    Control parameters are used for variant models such as :ref:`rpa` which
+    have different cases with different parameters, as well as models
+    like :ref:`spherical_sld` with its user defined number of shells.
+    The control parameter should appear in the parameter table along with the
+    parameters it is is controlling.  For variant models, use *[CASES]* in
+    place of the parameter limits Within the parameter definition table,
+    with case names such as::
+         CASES = ["diblock copolymer", "triblock copolymer", ...]
+    This should give *limits=[[case1, case2, ...]]*, but the model loader
+    translates it to *limits=[0, len(CASES)-1]*, and adds *choices=CASES* to
+    the :class:`Parameter` definition. Note that models can use a list of
+    cases as a parameter without it being a control parameter.  Either way,
+    the parameter is sent to the model evaluator as *float(choice_num)*,
+    where choices are numbered from 0. :meth:`ModelInfo.get_hidden_parameters`
+    will determine which parameers to display.
+    The class contructor should not be called directly, but instead the
+    parameter table is built using :func:`make_parameter_table` and
+    These values are set by :func:`make_parameter_table` and
     :func:`parse_parameter` therein.
     """
 …
     info.sesans = getattr(kernel_module, 'sesans', None) # type: ignore
     info.random = getattr(kernel_module, 'random', None)
+    # multiplicity info
+    control_pars = [p.id for p in parameters.kernel_parameters if p.is_control]
+    default_control = control_pars[0] if control_pars else None
+    info.control = getattr(kernel_module, 'control', default_control)
     info.hidden = getattr(kernel_module, 'hidden', None) # type: ignore
 …
     info.opencl = getattr(kernel_module, 'opencl', not callable(info.Iq))
     info.single = getattr(kernel_module, 'single', not callable(info.Iq))
-    # Set control flag for explicitly set parameters, e.g., in the RPA model.
-    control = getattr(kernel_module, 'control', None)
-    if control is not None:
-        parameters[control].is_control = True
     if callable(info.Iq) and parameters.has_2d:
 …
     #: *sphere*hardsphere* or *cylinder+sphere*.
     composition = None      # type: Optional[Tuple[str, List[ModelInfo]]]
+    #: Name of the control parameter for a variant model such as :ref:`rpa`.
+    #: The *control* parameter should appear in the parameter table, with
+    #: limits defined as *[CASES]*, for case names such as
+    #: *CASES = ["diblock copolymer", "triblock copolymer", ...]*.
+    #: This should give *limits=[[case1, case2, ...]]*, but the
+    #: model loader translates this to *limits=[0, len(CASES)-1]*, and adds
+    #: *choices=CASES* to the :class:`Parameter` definition. Note that
+    #: models can use a list of cases as a parameter without it being a
+    #: control parameter.  Either way, the parameter is sent to the model
+    #: evaluator as *float(choice_num)*, where choices are numbered from 0.
+    #: See also :attr:`hidden`.
+    control = None          # type: str
     #: Different variants require different parameters.  In order to show
     #: just the parameters needed for the variant selected by :attr:`control`,
 …
     #: C code, either defined as a string, or in the sources.
     shell_volume = None      # type: Union[None, str, Callable[[np.ndarray], float]]
-    #: Computes the effective radius of the shape given the volume parameters.
-    #: Only needed for models defined in python that can be used for
-    #: monodisperse approximation for non-dilute solutions, P@S.  The first
-    #: argument is the integer effective radius mode, with default 0.
-    effective_radius = None  # type: Union[None, Callable[[int, np.ndarray], float]]
     #: Returns *I(q, a, b, ...)* for parameters *a*, *b*, etc. defined
     #: by the parameter table.  *Iq* can be defined as a python function, or
 …
     #: will return *I(q, a, b, ...)*.  Multiplicity parameters are sent as
     #: pointers to doubles.  Constants in floating point expressions should
+    #: include the decimal point. See :mod:`generate` for more details. If
+    #: *have_Fq* is True, then Iq should return an interleaved array of
+    #: $[\sum F(q_1), \sum F^2(q_1), \ldots, \sum F(q_n), \sum F^2(q_n)]$.
+    #: include the decimal point. See :mod:`generate` for more details.
     Iq = None               # type: Union[None, str, Callable[[np.ndarray], np.ndarray]]
     #: Returns *I(qab, qc, a, b, ...)*.  The interface follows :attr:`Iq`.

sasmodels/models/rpa.c

-                      r19dc29e7
+                      r71b751d
   double S0ba,Pbb,S0bb,Pbc,S0bc,Pbd,S0bd;
   double S0ca,S0cb,Pcc,S0cc,Pcd,S0cd;
   //double S0da,S0db,S0dc;
+  double S0da,S0db,S0dc;
   double Pdd,S0dd;
   double Kaa,Kbb,Kcc;
   double Kba,Kca,Kcb;
   //double Kda,Kdb,Kdc,Kdd;
+  double Kda,Kdb,Kdc,Kdd;
   double Zaa,Zab,Zac,Zba,Zbb,Zbc,Zca,Zcb,Zcc;
   double DenT,T11,T12,T13,T21,T22,T23,T31,T32,T33;
 …
   double N11,N12,N13,N21,N22,N23,N31,N32,N33;
   double M11,M12,M13,M21,M22,M23,M31,M32,M33;
+  double S11,S12,S22,S23,S13,S33;
+  //double S21,S31,S32,S44;
+  //double S14,S24,S34,S41,S42,S43;
+  double S11,S12,S13,S14,S21,S22,S23,S24;
+  double S31,S32,S33,S34,S41,S42,S43,S44;
   double Lad,Lbd,Lcd,Nav,Intg;
 …
   S0cd=(Phicd*vcd*Ncd)*Pcd;
   //S0da=S0ad;
   //S0db=S0bd;
   //S0dc=S0cd;
+  S0da=S0ad;
+  S0db=S0bd;
+  S0dc=S0cd;
   Pdd=2.0*(exp(-Xd)-1.0+Xd)/(Xd*Xd); // free D chain
   S0dd=N[3]*Phi[3]*v[3]*Pdd;
 …
   S0ca=S0ac;
   S0cb=S0bc;
   //S0da=S0ad;
   //S0db=S0bd;
   //S0dc=S0cd;
+  S0da=S0ad;
+  S0db=S0bd;
+  S0dc=S0cd;
   // self chi parameter is 0 ... of course
 …
   Kbb=0.0;
   Kcc=0.0;
   //Kdd=0.0;
+  Kdd=0.0;
   Kba=Kab;
   Kca=Kac;
   Kcb=Kbc;
   //Kda=Kad;
   //Kdb=Kbd;
   //Kdc=Kcd;
+  Kda=Kad;
+  Kdb=Kbd;
+  Kdc=Kcd;
   Zaa=Kaa-Kad-Kad;
 …
   S12= Q12*S0aa + Q22*S0ab + Q32*S0ac;
   S13= Q13*S0aa + Q23*S0ab + Q33*S0ac;
+  S14=-S11-S12-S13;
+  S21= Q11*S0ba + Q21*S0bb + Q31*S0bc;
   S22= Q12*S0ba + Q22*S0bb + Q32*S0bc;
   S23= Q13*S0ba + Q23*S0bb + Q33*S0bc;
+  S24=-S21-S22-S23;
+  S31= Q11*S0ca + Q21*S0cb + Q31*S0cc;
+  S32= Q12*S0ca + Q22*S0cb + Q32*S0cc;
   S33= Q13*S0ca + Q23*S0cb + Q33*S0cc;
+  //S21= Q11*S0ba + Q21*S0bb + Q31*S0bc;
+  //S31= Q11*S0ca + Q21*S0cb + Q31*S0cc;
+  //S32= Q12*S0ca + Q22*S0cb + Q32*S0cc;
+  //S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23;
+  //S14=-S11-S12-S13;
+  //S24=-S21-S22-S23;
+  //S34=-S31-S32-S33;
+  //S41=S14;
+  //S42=S24;
+  //S43=S34;
+  S34=-S31-S32-S33;
+  S41=S14;
+  S42=S24;
+  S43=S34;
+  S44=S11+S22+S33+2.0*S12+2.0*S13+2.0*S23;
   //calculate contrast where L[i] is the scattering length of i and D is the matrix

sasmodels/multiscat.py

-                      r2c4a190
+                      rb3703f5
     *probability* is related to the expected number of scattering
+    events in the sample $\lambda$ as $p = 1 - e^{-\lambda}$.
+    *coverage* determines how many scattering steps to consider.  The
+    default is 0.99, which sets $n$ such that $1 \ldots n$ covers 99%
+    of the Poisson probability mass function.
+    events in the sample $\lambda$ as $p = 1 = e^{-\lambda}$.  As a
+    hack to allow probability to be a fitted parameter, the "value"
+    can be a function that takes no parameters and returns the current
+    value of the probability.  *coverage* determines how many scattering
+    steps to consider.  The default is 0.99, which sets $n$ such that
+    $1 \ldots n$ covers 99% of the Poisson probability mass function.
     *is2d* is True then 2D scattering is used, otherwise it accepts
 …
         self.qmin = qmin
         self.nq = nq
         self.probability = 0. if probability is None else probability
+        self.probability = probability
         self.coverage = coverage
         self.is2d = is2d
 …
         self.Iqxy = None # type: np.ndarray
-        # Label probability as a fittable parameter, and give its external name
-        # Note that the external name must be a valid python identifier, since
-        # is will be set as an experiment attribute.
-        self.fittable = {'probability': 'scattering_probability'}
     def apply(self, theory):
         if self.is2d:
 …
         Iq_calc = Iq_calc.reshape(self.nq, self.nq)
-        # CRUFT: don't need probability as a function anymore
         probability = self.probability() if callable(self.probability) else self.probability
         coverage = self.coverage

sasmodels/product.py

rb171acd	r99658f6
128	128	# Remember the component info blocks so we can build the model
129	129	model_info.composition = ('product', [p_info, s_info])
	130	model_info.control = p_info.control
130	131	model_info.hidden = p_info.hidden
131	132	if getattr(p_info, 'profile', None) is not None:

sasmodels/resolution.py

-                      rda3638f
+                      re2592f0
         if q_min < 0:
             q_min = q[0]*MINIMUM_ABSOLUTE_Q
         n_low = int(np.ceil(log_delta_q * (log(q[0])-log(q_min))))
         q_low = np.logspace(log10(q_min), log10(q[0]), n_low+1)[:-1]
+        n_low = log_delta_q * (log(q[0])-log(q_min))
+        q_low = np.logspace(log10(q_min), log10(q[0]), int(np.ceil(n_low))+1)[:-1]
     else:
         q_low = []
     if q_max > q[-1]:
         n_high = int(np.ceil(log_delta_q * (log(q_max)-log(q[-1]))))
         q_high = np.logspace(log10(q[-1]), log10(q_max), n_high+1)[1:]
+        n_high = log_delta_q * (log(q_max)-log(q[-1]))
+        q_high = np.logspace(log10(q[-1]), log10(q_max), int(np.ceil(n_high))+1)[1:]
     else:
         q_high = []

sasmodels/sasview_model.py

-                      r21c93c3
+                      r3a1afed
 from . import core
 from . import custom
-from . import kernelcl
 from . import product
 from . import generate
 …
 from . import modelinfo
 from .details import make_kernel_args, dispersion_mesh
-from .kernelcl import reset_environment
 # pylint: disable=unused-import
 …
 #: has changed since we last reloaded.
 _CACHED_MODULE = {}  # type: Dict[str, "module"]
-def reset_environment():
-    # type: () -> None
-    """
-    Clear the compute engine context so that the GUI can change devices.
-    This removes all compiled kernels, even those that are active on fit
-    pages, but they will be restored the next time they are needed.
-    """
-    kernelcl.reset_environment()
-    for model in MODELS.values():
-        model._model = None
 def find_model(modelname):
 …
     # Process multiplicity
-    control_pars = [p.id for p in model_info.parameters.kernel_parameters
-                    if p.is_control]
-    control_id = control_pars[0] if control_pars else None
     non_fittable = []  # type: List[str]
     xlabel = model_info.profile_axes[0] if model_info.profile is not None else ""
     variants = MultiplicityInfo(0, "", [], xlabel)
     for p in model_info.parameters.kernel_parameters:
         if p.id == control_id:
+        if p.name == model_info.control:
             non_fittable.append(p.name)
             variants = MultiplicityInfo(
 …
     def _calculate_Iq(self, qx, qy=None):
         if self._model is None:
+            # Only need one copy of the compiled kernel regardless of how many
+            # times it is used, so store it in the class.  Also, to reset the
+            # compute engine, need to clear out all existing compiled kernels,
+            # which is much easier to do if we store them in the class.
+            self.__class__._model = core.build_model(self._model_info)
+            self._model = core.build_model(self._model_info)
         if qy is not None:
             q_vectors = [np.asarray(qx), np.asarray(qy)]
 …
         """
         if par.name not in self.params:
             if par.id == self.multiplicity_info.control:
+            if par.name == self.multiplicity_info.control:
                 return self.multiplicity, [self.multiplicity], [1.0]
             else:

SasView

Changes in / [225bf94:3b6567f] in sasmodels

Legend:

.travis.yml

doc/guide/plugin.rst

example/multiscatfit.py

sasmodels/init.py

sasmodels/bumps_model.py

sasmodels/convert.py

sasmodels/core.py

sasmodels/direct_model.py

sasmodels/kernel.py

sasmodels/kernelcl.py

sasmodels/kernelcuda.py

sasmodels/kerneldll.py

sasmodels/kernelpy.py

sasmodels/model_test.py

sasmodels/modelinfo.py

sasmodels/models/rpa.c

sasmodels/multiscat.py

sasmodels/product.py

sasmodels/resolution.py

sasmodels/sasview_model.py

Download in other formats: