Changes in / [8b31efa:508475a] in sasmodels


Ignore:
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • doc/guide/magnetism/magnetism.rst

    rbefe905 rdf87acf  
    8989 
    9090===========   ================================================================ 
    91  M0:sld       $D_M M_0$ 
    92  mtheta:sld   $\theta_M$ 
    93  mphi:sld     $\phi_M$ 
    94  up:angle     $\theta_\mathrm{up}$ 
    95  up:frac_i    $u_i$ = (spin up)/(spin up + spin down) *before* the sample 
    96  up:frac_f    $u_f$ = (spin up)/(spin up + spin down) *after* the sample 
     91 sld_M0       $D_M M_0$ 
     92 sld_mtheta   $\theta_M$ 
     93 sld_mphi     $\phi_M$ 
     94 up_frac_i    $u_i$ = (spin up)/(spin up + spin down) *before* the sample 
     95 up_frac_f    $u_f$ = (spin up)/(spin up + spin down) *after* the sample 
     96 up_angle     $\theta_\mathrm{up}$ 
    9797===========   ================================================================ 
    9898 
    9999.. note:: 
    100     The values of the 'up:frac_i' and 'up:frac_f' must be in the range 0 to 1. 
     100    The values of the 'up_frac_i' and 'up_frac_f' must be in the range 0 to 1. 
    101101 
    102102*Document History* 
  • sasmodels/kernelcl.py

    r8b31efa r95f62aa  
    7676 
    7777from . import generate 
     78from .generate import F32, F64 
    7879from .kernel import KernelModel, Kernel 
    7980 
     
    133134 
    134135def use_opencl(): 
    135     env = os.environ.get("SAS_OPENCL", "").lower() 
    136     return HAVE_OPENCL and env != "none" and not env.startswith("cuda") 
     136    sas_opencl = os.environ.get("SAS_OPENCL", "OpenCL").lower() 
     137    return HAVE_OPENCL and sas_opencl != "none" and not sas_opencl.startswith("cuda") 
    137138 
    138139ENV = None 
     
    165166    Return true if device supports the requested precision. 
    166167    """ 
    167     if dtype == generate.F32: 
     168    if dtype == F32: 
    168169        return True 
    169170    elif dtype == generate.F64: 
     
    219220    """ 
    220221    GPU context, with possibly many devices, and one queue per device. 
     222 
     223    Because the environment can be reset during a live program (e.g., if the 
     224    user changes the active GPU device in the GUI), everything associated 
     225    with the device context must be cached in the environment and recreated 
     226    if the environment changes.  The *cache* attribute is a simple dictionary 
     227    which holds keys and references to objects, such as compiled kernels and 
     228    allocated buffers.  The running program should check in the cache for 
     229    long lived objects and create them if they are not there.  The program 
     230    should not hold onto cached objects, but instead only keep them active 
     231    for the duration of a function call.  When the environment is destroyed 
     232    then the *release* method for each active cache item is called before 
     233    the environment is freed.  This means that each cl buffer should be 
     234    in its own cache entry. 
    221235    """ 
    222236    def __init__(self): 
    223237        # type: () -> None 
    224238        # find gpu context 
    225         #self.context = cl.create_some_context() 
    226  
    227         self.context = None 
    228         if 'SAS_OPENCL' in os.environ: 
    229             # Set the PyOpenCL environment variable PYOPENCL_CTX  
    230             # from SAS_OPENCL=driver:device.  Ignore the generic 
    231             # SAS_OPENCL=opencl, which is used to select the default  
    232             # OpenCL device.  Don't need to check for "none" or 
    233             # "cuda" since use_opencl() would return False if they 
    234             # were defined, and we wouldn't get here. 
    235             dev_str = os.environ["SAS_OPENCL"] 
    236             if dev_str and dev_str.lower() != "opencl": 
    237                 os.environ["PYOPENCL_CTX"] = dev_str 
    238  
    239         if 'PYOPENCL_CTX' in os.environ: 
    240             self._create_some_context() 
    241  
    242         if not self.context: 
    243             self.context = _get_default_context() 
     239        context_list = _create_some_context() 
     240 
     241        # Find a context for F32 and for F64 (maybe the same one). 
     242        # F16 isn't good enough. 
     243        self.context = {} 
     244        for dtype in (F32, F64): 
     245            for context in context_list: 
     246                if has_type(context.devices[0], dtype): 
     247                    self.context[dtype] = context 
     248                    break 
     249            else: 
     250                self.context[dtype] = None 
     251 
     252        # Build a queue for each context 
     253        self.queue = {} 
     254        context = self.context[F32] 
     255        self.queue[F32] = cl.CommandQueue(context, context.devices[0]) 
     256        if self.context[F64] == self.context[F32]: 
     257            self.queue[F64] = self.queue[F32] 
     258        else: 
     259            context = self.context[F64] 
     260            self.queue[F64] = cl.CommandQueue(context, context.devices[0]) 
    244261 
    245262        # Byte boundary for data alignment 
    246         #self.data_boundary = max(d.min_data_type_align_size 
    247         #                         for d in self.context.devices) 
    248         self.queues = [cl.CommandQueue(context, context.devices[0]) 
    249                        for context in self.context] 
     263        #self.data_boundary = max(context.devices[0].min_data_type_align_size 
     264        #                         for context in self.context.values()) 
     265 
     266        # Cache for compiled programs, and for items in context 
    250267        self.compiled = {} 
     268        self.cache = {} 
    251269 
    252270    def has_type(self, dtype): 
     
    255273        Return True if all devices support a given type. 
    256274        """ 
    257         return any(has_type(d, dtype) 
    258                    for context in self.context 
    259                    for d in context.devices) 
    260  
    261     def get_queue(self, dtype): 
    262         # type: (np.dtype) -> cl.CommandQueue 
    263         """ 
    264         Return a command queue for the kernels of type dtype. 
    265         """ 
    266         for context, queue in zip(self.context, self.queues): 
    267             if all(has_type(d, dtype) for d in context.devices): 
    268                 return queue 
    269  
    270     def get_context(self, dtype): 
    271         # type: (np.dtype) -> cl.Context 
    272         """ 
    273         Return a OpenCL context for the kernels of type dtype. 
    274         """ 
    275         for context in self.context: 
    276             if all(has_type(d, dtype) for d in context.devices): 
    277                 return context 
    278  
    279     def _create_some_context(self): 
    280         # type: () -> cl.Context 
    281         """ 
    282         Protected call to cl.create_some_context without interactivity.  Use 
    283         this if SAS_OPENCL is set in the environment.  Sets the *context* 
    284         attribute. 
    285         """ 
    286         try: 
    287             self.context = [cl.create_some_context(interactive=False)] 
    288         except Exception as exc: 
    289             warnings.warn(str(exc)) 
    290             warnings.warn("pyopencl.create_some_context() failed") 
    291             warnings.warn("the environment variable 'SAS_OPENCL' might not be set correctly") 
     275        return self.context.get(dtype, None) is not None 
    292276 
    293277    def compile_program(self, name, source, dtype, fast, timestamp): 
     
    306290            del self.compiled[key] 
    307291        if key not in self.compiled: 
    308             context = self.get_context(dtype) 
     292            context = self.context[dtype] 
    309293            logging.info("building %s for OpenCL %s", key, 
    310294                         context.devices[0].name.strip()) 
    311             program = compile_model(self.get_context(dtype), 
     295            program = compile_model(self.context[dtype], 
    312296                                    str(source), dtype, fast) 
    313297            self.compiled[key] = (program, timestamp) 
    314298        return program 
     299 
     300    def free_buffer(self, key): 
     301        if key in self.cache: 
     302            self.cache[key].release() 
     303            del self.cache[key] 
     304 
     305    def __del__(self): 
     306        for v in self.cache.values(): 
     307            release = getattr(v, 'release', lambda: None) 
     308            release() 
     309        self.cache = {} 
     310 
     311_CURRENT_ID = 0 
     312def unique_id(): 
     313    global _CURRENT_ID 
     314    _CURRENT_ID += 1 
     315    return _CURRENT_ID 
     316 
     317def _create_some_context(): 
     318    # type: () -> cl.Context 
     319    """ 
     320    Protected call to cl.create_some_context without interactivity. 
     321 
     322    Uses SAS_OPENCL or PYOPENCL_CTX if they are set in the environment, 
     323    otherwise scans for the most appropriate device using 
     324    :func:`_get_default_context`.  Ignore *SAS_OPENCL=OpenCL*, which 
     325    indicates that an OpenCL device should be used without specifying 
     326    which one (and not a CUDA device, or no GPU). 
     327    """ 
     328    # Assume we do not get here if SAS_OPENCL is None or CUDA 
     329    sas_opencl = os.environ.get('SAS_OPENCL', 'opencl') 
     330    if sas_opencl.lower() != 'opencl': 
     331        # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 
     332        os.environ["PYOPENCL_CTX"] = sas_opencl 
     333 
     334    if 'PYOPENCL_CTX' in os.environ: 
     335        try: 
     336            return [cl.create_some_context(interactive=False)] 
     337        except Exception as exc: 
     338            warnings.warn(str(exc)) 
     339            warnings.warn("pyopencl.create_some_context() failed") 
     340            warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly") 
     341 
     342    return _get_default_context() 
    315343 
    316344def _get_default_context(): 
     
    392420        self.dtype = dtype 
    393421        self.fast = fast 
    394         self.program = None # delay program creation 
    395         self._kernels = None 
     422        self.timestamp = generate.ocl_timestamp(self.info) 
     423        self._cache_key = unique_id() 
    396424 
    397425    def __getstate__(self): 
     
    402430        # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 
    403431        self.info, self.source, self.dtype, self.fast = state 
    404         self.program = None 
    405432 
    406433    def make_kernel(self, q_vectors): 
    407434        # type: (List[np.ndarray]) -> "GpuKernel" 
    408         if self.program is None: 
    409             compile_program = environment().compile_program 
    410             timestamp = generate.ocl_timestamp(self.info) 
    411             self.program = compile_program( 
     435        return GpuKernel(self, q_vectors) 
     436 
     437    @property 
     438    def Iq(self): 
     439        return self._fetch_kernel('Iq') 
     440 
     441    def fetch_kernel(self, name): 
     442        # type: (str) -> cl.Kernel 
     443        """ 
     444        Fetch the kernel from the environment by name, compiling it if it 
     445        does not already exist. 
     446        """ 
     447        gpu = environment() 
     448        key = self._cache_key 
     449        if key not in gpu.cache: 
     450            program = gpu.compile_program( 
    412451                self.info.name, 
    413452                self.source['opencl'], 
    414453                self.dtype, 
    415454                self.fast, 
    416                 timestamp) 
     455                self.timestamp) 
    417456            variants = ['Iq', 'Iqxy', 'Imagnetic'] 
    418457            names = [generate.kernel_name(self.info, k) for k in variants] 
    419             kernels = [getattr(self.program, k) for k in names] 
    420             self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 
    421         is_2d = len(q_vectors) == 2 
    422         if is_2d: 
    423             kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 
     458            kernels = [getattr(program, k) for k in names] 
     459            data = dict((k, v) for k, v in zip(variants, kernels)) 
     460            # keep a handle to program so GC doesn't collect 
     461            data['program'] = program 
     462            gpu.cache[key] = data 
    424463        else: 
    425             kernel = [self._kernels['Iq']]*2 
    426         return GpuKernel(kernel, self.dtype, self.info, q_vectors) 
    427  
    428     def release(self): 
    429         # type: () -> None 
    430         """ 
    431         Free the resources associated with the model. 
    432         """ 
    433         if self.program is not None: 
    434             self.program = None 
    435  
    436     def __del__(self): 
    437         # type: () -> None 
    438         self.release() 
     464            data = gpu.cache[key] 
     465        return data[name] 
    439466 
    440467# TODO: check that we don't need a destructor for buffers which go out of scope 
     
    461488        # type: (List[np.ndarray], np.dtype) -> None 
    462489        # TODO: do we ever need double precision q? 
    463         env = environment() 
    464490        self.nq = q_vectors[0].size 
    465491        self.dtype = np.dtype(dtype) 
     
    481507            self.q[:self.nq] = q_vectors[0] 
    482508        self.global_size = [self.q.shape[0]] 
    483         context = env.get_context(self.dtype) 
    484         #print("creating inputs of size", self.global_size) 
    485         self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    486                              hostbuf=self.q) 
     509        self._cache_key = unique_id() 
     510 
     511    @property 
     512    def q_b(self): 
     513        """Lazy creation of q buffer so it can survive context reset""" 
     514        env = environment() 
     515        key = self._cache_key 
     516        if key not in env.cache: 
     517            context = env.context[self.dtype] 
     518            #print("creating inputs of size", self.global_size) 
     519            buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
     520                               hostbuf=self.q) 
     521            env.cache[key] = buffer 
     522        return env.cache[key] 
    487523 
    488524    def release(self): 
    489525        # type: () -> None 
    490526        """ 
    491         Free the memory. 
    492         """ 
    493         if self.q_b is not None: 
    494             self.q_b.release() 
    495             self.q_b = None 
     527        Free the buffer associated with the q value 
     528        """ 
     529        environment().free_buffer(id(self)) 
    496530 
    497531    def __del__(self): 
     
    503537    Callable SAS kernel. 
    504538 
    505     *kernel* is the GpuKernel object to call 
    506  
    507     *model_info* is the module information 
    508  
    509     *q_vectors* is the q vectors at which the kernel should be evaluated 
     539    *model* is the GpuModel object to call 
     540 
     541    The following attributes are defined: 
     542 
     543    *info* is the module information 
    510544 
    511545    *dtype* is the kernel precision 
     546 
     547    *dim* is '1d' or '2d' 
     548 
     549    *result* is a vector to contain the results of the call 
    512550 
    513551    The resulting call method takes the *pars*, a list of values for 
     
    519557    Call :meth:`release` when done with the kernel instance. 
    520558    """ 
    521     def __init__(self, kernel, dtype, model_info, q_vectors): 
     559    def __init__(self, model, q_vectors): 
    522560        # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 
    523         q_input = GpuInput(q_vectors, dtype) 
    524         self.kernel = kernel 
    525         self.info = model_info 
    526         self.dtype = dtype 
    527         self.dim = '2d' if q_input.is_2d else '1d' 
    528         # plus three for the normalization values 
    529         self.result = np.empty(q_input.nq+1, dtype) 
    530  
    531         # Inputs and outputs for each kernel call 
    532         # Note: res may be shorter than res_b if global_size != nq 
     561        dtype = model.dtype 
     562        self.q_input = GpuInput(q_vectors, dtype) 
     563        self._model = model 
     564        self._as_dtype = (np.float32 if dtype == generate.F32 
     565                          else np.float64 if dtype == generate.F64 
     566                          else np.float16 if dtype == generate.F16 
     567                          else np.float32)  # will never get here, so use np.float32 
     568        self._cache_key = unique_id() 
     569 
     570        # attributes accessed from the outside 
     571        self.dim = '2d' if self.q_input.is_2d else '1d' 
     572        self.info = model.info 
     573        self.dtype = model.dtype 
     574 
     575        # holding place for the returned value 
     576        # plus one for the normalization values 
     577        self.result = np.empty(self.q_input.nq+1, dtype) 
     578 
     579    @property 
     580    def _result_b(self): 
     581        """Lazy creation of result buffer so it can survive context reset""" 
    533582        env = environment() 
    534         self.queue = env.get_queue(dtype) 
    535  
    536         self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 
    537                                   q_input.global_size[0] * dtype.itemsize) 
    538         self.q_input = q_input # allocated by GpuInput above 
    539  
    540         self._need_release = [self.result_b, self.q_input] 
    541         self.real = (np.float32 if dtype == generate.F32 
    542                      else np.float64 if dtype == generate.F64 
    543                      else np.float16 if dtype == generate.F16 
    544                      else np.float32)  # will never get here, so use np.float32 
     583        key = self._cache_key 
     584        if key not in env.cache: 
     585            context = env.context[self.dtype] 
     586            #print("creating inputs of size", self.global_size) 
     587            buffer = cl.Buffer(context, mf.READ_WRITE, 
     588                               self.q_input.global_size[0] * self.dtype.itemsize) 
     589            env.cache[key] = buffer 
     590        return env.cache[key] 
    545591 
    546592    def __call__(self, call_details, values, cutoff, magnetic): 
    547593        # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 
    548         context = self.queue.context 
    549         # Arrange data transfer to card 
     594        env = environment() 
     595        queue = env.queue[self._model.dtype] 
     596        context = queue.context 
     597 
     598        # Arrange data transfer to/from card 
     599        q_b = self.q_input.q_b 
     600        result_b = self._result_b 
    550601        details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    551602                              hostbuf=call_details.buffer) 
     
    553604                             hostbuf=values) 
    554605 
    555         kernel = self.kernel[1 if magnetic else 0] 
    556         args = [ 
     606        name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 
     607        kernel = self._model.fetch_kernel(name) 
     608        kernel_args = [ 
    557609            np.uint32(self.q_input.nq), None, None, 
    558             details_b, values_b, self.q_input.q_b, self.result_b, 
    559             self.real(cutoff), 
     610            details_b, values_b, q_b, result_b, 
     611            self._as_dtype(cutoff), 
    560612        ] 
    561613        #print("Calling OpenCL") 
     
    568620            stop = min(start + step, call_details.num_eval) 
    569621            #print("queuing",start,stop) 
    570             args[1:3] = [np.int32(start), np.int32(stop)] 
    571             wait_for = [kernel(self.queue, self.q_input.global_size, None, 
    572                                *args, wait_for=wait_for)] 
     622            kernel_args[1:3] = [np.int32(start), np.int32(stop)] 
     623            wait_for = [kernel(queue, self.q_input.global_size, None, 
     624                               *kernel_args, wait_for=wait_for)] 
    573625            if stop < call_details.num_eval: 
    574626                # Allow other processes to run 
     
    578630                    time.sleep(0.001) 
    579631                    last_nap = current_time 
    580         cl.enqueue_copy(self.queue, self.result, self.result_b) 
     632        cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) 
    581633        #print("result", self.result) 
    582634 
     
    598650        Release resources associated with the kernel. 
    599651        """ 
    600         for v in self._need_release: 
    601             v.release() 
    602         self._need_release = [] 
     652        environment().free_buffer(id(self)) 
     653        self.q_input.release() 
    603654 
    604655    def __del__(self): 
  • sasmodels/models/spinodal.py

    r475ff58 r93fe8a1  
    1212where $x=q/q_0$, $q_0$ is the peak position, $I_{max}$ is the intensity  
    1313at $q_0$ (parameterised as the $scale$ parameter), and $B$ is a flat  
    14 background. The spinodal wavelength is given by $2\pi/q_0$.  
     14background. The spinodal wavelength, $\Lambda$, is given by $2\pi/q_0$.  
     15 
     16The definition of $I_{max}$ in the literature varies. Hashimoto *et al* (1991)  
     17define it as  
     18 
     19.. math:: 
     20    I_{max} = \Lambda^3\Delta\rho^2 
     21     
     22whereas Meier & Strobl (1987) give  
     23 
     24.. math:: 
     25    I_{max} = V_z\Delta\rho^2 
     26     
     27where $V_z$ is the volume per monomer unit. 
    1528 
    1629The exponent $\gamma$ is equal to $d+1$ for off-critical concentration  
     
    2841 
    2942H. Furukawa. Dynamics-scaling theory for phase-separating unmixing mixtures: 
    30 Growth rates of droplets and scaling properties of autocorrelation functions. 
    31 Physica A 123,497 (1984). 
     43Growth rates of droplets and scaling properties of autocorrelation functions.  
     44Physica A 123, 497 (1984). 
     45 
     46H. Meier & G. Strobl. Small-Angle X-ray Scattering Study of Spinodal  
     47Decomposition in Polystyrene/Poly(styrene-co-bromostyrene) Blends.  
     48Macromolecules 20, 649-654 (1987). 
     49 
     50T. Hashimoto, M. Takenaka & H. Jinnai. Scattering Studies of Self-Assembling  
     51Processes of Polymer Blends in Spinodal Decomposition.  
     52J. Appl. Cryst. 24, 457-466 (1991). 
    3253 
    3354Revision History 
     
    3556 
    3657* **Author:**  Dirk Honecker **Date:** Oct 7, 2016 
    37 * **Revised:** Steve King    **Date:** Sep 7, 2018 
     58* **Revised:** Steve King    **Date:** Oct 25, 2018 
    3859""" 
    3960 
  • setup.py

    r1f991d6 r783e76f  
    2929                return version[1:-1] 
    3030    raise RuntimeError("Could not read version from %s/__init__.py"%package) 
     31 
     32install_requires = ['numpy', 'scipy'] 
     33 
     34if sys.platform=='win32' or sys.platform=='cygwin': 
     35    install_requires.append('tinycc') 
    3136 
    3237setup( 
     
    6166        'sasmodels': ['*.c', '*.cl'], 
    6267    }, 
    63     install_requires=[ 
    64     ], 
     68    install_requires=install_requires, 
    6569    extras_require={ 
     70        'full': ['docutils', 'bumps', 'matplotlib'], 
     71        'server': ['bumps'], 
    6672        'OpenCL': ["pyopencl"], 
    67         'Bumps': ["bumps"], 
    68         'TinyCC': ["tinycc"], 
    6973    }, 
    7074    build_requires=['setuptools'], 
Note: See TracChangeset for help on using the changeset viewer.