Changes in / [508475a:8b31efa] in sasmodels


Ignore:
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • doc/guide/magnetism/magnetism.rst

    rdf87acf rbefe905  
    8989 
    9090===========   ================================================================ 
    91  sld_M0       $D_M M_0$ 
    92  sld_mtheta   $\theta_M$ 
    93  sld_mphi     $\phi_M$ 
    94  up_frac_i    $u_i$ = (spin up)/(spin up + spin down) *before* the sample 
    95  up_frac_f    $u_f$ = (spin up)/(spin up + spin down) *after* the sample 
    96  up_angle     $\theta_\mathrm{up}$ 
     91 M0:sld       $D_M M_0$ 
     92 mtheta:sld   $\theta_M$ 
     93 mphi:sld     $\phi_M$ 
     94 up:angle     $\theta_\mathrm{up}$ 
     95 up:frac_i    $u_i$ = (spin up)/(spin up + spin down) *before* the sample 
     96 up:frac_f    $u_f$ = (spin up)/(spin up + spin down) *after* the sample 
    9797===========   ================================================================ 
    9898 
    9999.. note:: 
    100     The values of the 'up_frac_i' and 'up_frac_f' must be in the range 0 to 1. 
     100    The values of the 'up:frac_i' and 'up:frac_f' must be in the range 0 to 1. 
    101101 
    102102*Document History* 
  • sasmodels/kernelcl.py

    r95f62aa r8b31efa  
    7676 
    7777from . import generate 
    78 from .generate import F32, F64 
    7978from .kernel import KernelModel, Kernel 
    8079 
     
    134133 
    135134def use_opencl(): 
    136     sas_opencl = os.environ.get("SAS_OPENCL", "OpenCL").lower() 
    137     return HAVE_OPENCL and sas_opencl != "none" and not sas_opencl.startswith("cuda") 
     135    env = os.environ.get("SAS_OPENCL", "").lower() 
     136    return HAVE_OPENCL and env != "none" and not env.startswith("cuda") 
    138137 
    139138ENV = None 
     
    166165    Return true if device supports the requested precision. 
    167166    """ 
    168     if dtype == F32: 
     167    if dtype == generate.F32: 
    169168        return True 
    170169    elif dtype == generate.F64: 
     
    220219    """ 
    221220    GPU context, with possibly many devices, and one queue per device. 
    222  
    223     Because the environment can be reset during a live program (e.g., if the 
    224     user changes the active GPU device in the GUI), everything associated 
    225     with the device context must be cached in the environment and recreated 
    226     if the environment changes.  The *cache* attribute is a simple dictionary 
    227     which holds keys and references to objects, such as compiled kernels and 
    228     allocated buffers.  The running program should check in the cache for 
    229     long lived objects and create them if they are not there.  The program 
    230     should not hold onto cached objects, but instead only keep them active 
    231     for the duration of a function call.  When the environment is destroyed 
    232     then the *release* method for each active cache item is called before 
    233     the environment is freed.  This means that each cl buffer should be 
    234     in its own cache entry. 
    235221    """ 
    236222    def __init__(self): 
    237223        # type: () -> None 
    238224        # find gpu context 
    239         context_list = _create_some_context() 
    240  
    241         # Find a context for F32 and for F64 (maybe the same one). 
    242         # F16 isn't good enough. 
    243         self.context = {} 
    244         for dtype in (F32, F64): 
    245             for context in context_list: 
    246                 if has_type(context.devices[0], dtype): 
    247                     self.context[dtype] = context 
    248                     break 
    249             else: 
    250                 self.context[dtype] = None 
    251  
    252         # Build a queue for each context 
    253         self.queue = {} 
    254         context = self.context[F32] 
    255         self.queue[F32] = cl.CommandQueue(context, context.devices[0]) 
    256         if self.context[F64] == self.context[F32]: 
    257             self.queue[F64] = self.queue[F32] 
    258         else: 
    259             context = self.context[F64] 
    260             self.queue[F64] = cl.CommandQueue(context, context.devices[0]) 
     225        #self.context = cl.create_some_context() 
     226 
     227        self.context = None 
     228        if 'SAS_OPENCL' in os.environ: 
     229            # Set the PyOpenCL environment variable PYOPENCL_CTX  
     230            # from SAS_OPENCL=driver:device.  Ignore the generic 
     231            # SAS_OPENCL=opencl, which is used to select the default  
     232            # OpenCL device.  Don't need to check for "none" or 
     233            # "cuda" since use_opencl() would return False if they 
     234            # were defined, and we wouldn't get here. 
     235            dev_str = os.environ["SAS_OPENCL"] 
     236            if dev_str and dev_str.lower() != "opencl": 
     237                os.environ["PYOPENCL_CTX"] = dev_str 
     238 
     239        if 'PYOPENCL_CTX' in os.environ: 
     240            self._create_some_context() 
     241 
     242        if not self.context: 
     243            self.context = _get_default_context() 
    261244 
    262245        # Byte boundary for data alignment 
    263         #self.data_boundary = max(context.devices[0].min_data_type_align_size 
    264         #                         for context in self.context.values()) 
    265  
    266         # Cache for compiled programs, and for items in context 
     246        #self.data_boundary = max(d.min_data_type_align_size 
     247        #                         for d in self.context.devices) 
     248        self.queues = [cl.CommandQueue(context, context.devices[0]) 
     249                       for context in self.context] 
    267250        self.compiled = {} 
    268         self.cache = {} 
    269251 
    270252    def has_type(self, dtype): 
     
    273255        Return True if all devices support a given type. 
    274256        """ 
    275         return self.context.get(dtype, None) is not None 
     257        return any(has_type(d, dtype) 
     258                   for context in self.context 
     259                   for d in context.devices) 
     260 
     261    def get_queue(self, dtype): 
     262        # type: (np.dtype) -> cl.CommandQueue 
     263        """ 
     264        Return a command queue for the kernels of type dtype. 
     265        """ 
     266        for context, queue in zip(self.context, self.queues): 
     267            if all(has_type(d, dtype) for d in context.devices): 
     268                return queue 
     269 
     270    def get_context(self, dtype): 
     271        # type: (np.dtype) -> cl.Context 
     272        """ 
     273        Return a OpenCL context for the kernels of type dtype. 
     274        """ 
     275        for context in self.context: 
     276            if all(has_type(d, dtype) for d in context.devices): 
     277                return context 
     278 
     279    def _create_some_context(self): 
     280        # type: () -> cl.Context 
     281        """ 
     282        Protected call to cl.create_some_context without interactivity.  Use 
     283        this if SAS_OPENCL is set in the environment.  Sets the *context* 
     284        attribute. 
     285        """ 
     286        try: 
     287            self.context = [cl.create_some_context(interactive=False)] 
     288        except Exception as exc: 
     289            warnings.warn(str(exc)) 
     290            warnings.warn("pyopencl.create_some_context() failed") 
     291            warnings.warn("the environment variable 'SAS_OPENCL' might not be set correctly") 
    276292 
    277293    def compile_program(self, name, source, dtype, fast, timestamp): 
     
    290306            del self.compiled[key] 
    291307        if key not in self.compiled: 
    292             context = self.context[dtype] 
     308            context = self.get_context(dtype) 
    293309            logging.info("building %s for OpenCL %s", key, 
    294310                         context.devices[0].name.strip()) 
    295             program = compile_model(self.context[dtype], 
     311            program = compile_model(self.get_context(dtype), 
    296312                                    str(source), dtype, fast) 
    297313            self.compiled[key] = (program, timestamp) 
    298314        return program 
    299  
    300     def free_buffer(self, key): 
    301         if key in self.cache: 
    302             self.cache[key].release() 
    303             del self.cache[key] 
    304  
    305     def __del__(self): 
    306         for v in self.cache.values(): 
    307             release = getattr(v, 'release', lambda: None) 
    308             release() 
    309         self.cache = {} 
    310  
    311 _CURRENT_ID = 0 
    312 def unique_id(): 
    313     global _CURRENT_ID 
    314     _CURRENT_ID += 1 
    315     return _CURRENT_ID 
    316  
    317 def _create_some_context(): 
    318     # type: () -> cl.Context 
    319     """ 
    320     Protected call to cl.create_some_context without interactivity. 
    321  
    322     Uses SAS_OPENCL or PYOPENCL_CTX if they are set in the environment, 
    323     otherwise scans for the most appropriate device using 
    324     :func:`_get_default_context`.  Ignore *SAS_OPENCL=OpenCL*, which 
    325     indicates that an OpenCL device should be used without specifying 
    326     which one (and not a CUDA device, or no GPU). 
    327     """ 
    328     # Assume we do not get here if SAS_OPENCL is None or CUDA 
    329     sas_opencl = os.environ.get('SAS_OPENCL', 'opencl') 
    330     if sas_opencl.lower() != 'opencl': 
    331         # Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 
    332         os.environ["PYOPENCL_CTX"] = sas_opencl 
    333  
    334     if 'PYOPENCL_CTX' in os.environ: 
    335         try: 
    336             return [cl.create_some_context(interactive=False)] 
    337         except Exception as exc: 
    338             warnings.warn(str(exc)) 
    339             warnings.warn("pyopencl.create_some_context() failed") 
    340             warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly") 
    341  
    342     return _get_default_context() 
    343315 
    344316def _get_default_context(): 
     
    420392        self.dtype = dtype 
    421393        self.fast = fast 
    422         self.timestamp = generate.ocl_timestamp(self.info) 
    423         self._cache_key = unique_id() 
     394        self.program = None # delay program creation 
     395        self._kernels = None 
    424396 
    425397    def __getstate__(self): 
     
    430402        # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 
    431403        self.info, self.source, self.dtype, self.fast = state 
     404        self.program = None 
    432405 
    433406    def make_kernel(self, q_vectors): 
    434407        # type: (List[np.ndarray]) -> "GpuKernel" 
    435         return GpuKernel(self, q_vectors) 
    436  
    437     @property 
    438     def Iq(self): 
    439         return self._fetch_kernel('Iq') 
    440  
    441     def fetch_kernel(self, name): 
    442         # type: (str) -> cl.Kernel 
    443         """ 
    444         Fetch the kernel from the environment by name, compiling it if it 
    445         does not already exist. 
    446         """ 
    447         gpu = environment() 
    448         key = self._cache_key 
    449         if key not in gpu.cache: 
    450             program = gpu.compile_program( 
     408        if self.program is None: 
     409            compile_program = environment().compile_program 
     410            timestamp = generate.ocl_timestamp(self.info) 
     411            self.program = compile_program( 
    451412                self.info.name, 
    452413                self.source['opencl'], 
    453414                self.dtype, 
    454415                self.fast, 
    455                 self.timestamp) 
     416                timestamp) 
    456417            variants = ['Iq', 'Iqxy', 'Imagnetic'] 
    457418            names = [generate.kernel_name(self.info, k) for k in variants] 
    458             kernels = [getattr(program, k) for k in names] 
    459             data = dict((k, v) for k, v in zip(variants, kernels)) 
    460             # keep a handle to program so GC doesn't collect 
    461             data['program'] = program 
    462             gpu.cache[key] = data 
     419            kernels = [getattr(self.program, k) for k in names] 
     420            self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 
     421        is_2d = len(q_vectors) == 2 
     422        if is_2d: 
     423            kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 
    463424        else: 
    464             data = gpu.cache[key] 
    465         return data[name] 
     425            kernel = [self._kernels['Iq']]*2 
     426        return GpuKernel(kernel, self.dtype, self.info, q_vectors) 
     427 
     428    def release(self): 
     429        # type: () -> None 
     430        """ 
     431        Free the resources associated with the model. 
     432        """ 
     433        if self.program is not None: 
     434            self.program = None 
     435 
     436    def __del__(self): 
     437        # type: () -> None 
     438        self.release() 
    466439 
    467440# TODO: check that we don't need a destructor for buffers which go out of scope 
     
    488461        # type: (List[np.ndarray], np.dtype) -> None 
    489462        # TODO: do we ever need double precision q? 
     463        env = environment() 
    490464        self.nq = q_vectors[0].size 
    491465        self.dtype = np.dtype(dtype) 
     
    507481            self.q[:self.nq] = q_vectors[0] 
    508482        self.global_size = [self.q.shape[0]] 
    509         self._cache_key = unique_id() 
    510  
    511     @property 
    512     def q_b(self): 
    513         """Lazy creation of q buffer so it can survive context reset""" 
    514         env = environment() 
    515         key = self._cache_key 
    516         if key not in env.cache: 
    517             context = env.context[self.dtype] 
    518             #print("creating inputs of size", self.global_size) 
    519             buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    520                                hostbuf=self.q) 
    521             env.cache[key] = buffer 
    522         return env.cache[key] 
     483        context = env.get_context(self.dtype) 
     484        #print("creating inputs of size", self.global_size) 
     485        self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
     486                             hostbuf=self.q) 
    523487 
    524488    def release(self): 
    525489        # type: () -> None 
    526490        """ 
    527         Free the buffer associated with the q value 
    528         """ 
    529         environment().free_buffer(id(self)) 
     491        Free the memory. 
     492        """ 
     493        if self.q_b is not None: 
     494            self.q_b.release() 
     495            self.q_b = None 
    530496 
    531497    def __del__(self): 
     
    537503    Callable SAS kernel. 
    538504 
    539     *model* is the GpuModel object to call 
    540  
    541     The following attributes are defined: 
    542  
    543     *info* is the module information 
     505    *kernel* is the GpuKernel object to call 
     506 
     507    *model_info* is the module information 
     508 
     509    *q_vectors* is the q vectors at which the kernel should be evaluated 
    544510 
    545511    *dtype* is the kernel precision 
    546  
    547     *dim* is '1d' or '2d' 
    548  
    549     *result* is a vector to contain the results of the call 
    550512 
    551513    The resulting call method takes the *pars*, a list of values for 
     
    557519    Call :meth:`release` when done with the kernel instance. 
    558520    """ 
    559     def __init__(self, model, q_vectors): 
     521    def __init__(self, kernel, dtype, model_info, q_vectors): 
    560522        # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 
    561         dtype = model.dtype 
    562         self.q_input = GpuInput(q_vectors, dtype) 
    563         self._model = model 
    564         self._as_dtype = (np.float32 if dtype == generate.F32 
    565                           else np.float64 if dtype == generate.F64 
    566                           else np.float16 if dtype == generate.F16 
    567                           else np.float32)  # will never get here, so use np.float32 
    568         self._cache_key = unique_id() 
    569  
    570         # attributes accessed from the outside 
    571         self.dim = '2d' if self.q_input.is_2d else '1d' 
    572         self.info = model.info 
    573         self.dtype = model.dtype 
    574  
    575         # holding place for the returned value 
    576         # plus one for the normalization values 
    577         self.result = np.empty(self.q_input.nq+1, dtype) 
    578  
    579     @property 
    580     def _result_b(self): 
    581         """Lazy creation of result buffer so it can survive context reset""" 
     523        q_input = GpuInput(q_vectors, dtype) 
     524        self.kernel = kernel 
     525        self.info = model_info 
     526        self.dtype = dtype 
     527        self.dim = '2d' if q_input.is_2d else '1d' 
     528        # plus three for the normalization values 
     529        self.result = np.empty(q_input.nq+1, dtype) 
     530 
     531        # Inputs and outputs for each kernel call 
     532        # Note: res may be shorter than res_b if global_size != nq 
    582533        env = environment() 
    583         key = self._cache_key 
    584         if key not in env.cache: 
    585             context = env.context[self.dtype] 
    586             #print("creating inputs of size", self.global_size) 
    587             buffer = cl.Buffer(context, mf.READ_WRITE, 
    588                                self.q_input.global_size[0] * self.dtype.itemsize) 
    589             env.cache[key] = buffer 
    590         return env.cache[key] 
     534        self.queue = env.get_queue(dtype) 
     535 
     536        self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 
     537                                  q_input.global_size[0] * dtype.itemsize) 
     538        self.q_input = q_input # allocated by GpuInput above 
     539 
     540        self._need_release = [self.result_b, self.q_input] 
     541        self.real = (np.float32 if dtype == generate.F32 
     542                     else np.float64 if dtype == generate.F64 
     543                     else np.float16 if dtype == generate.F16 
     544                     else np.float32)  # will never get here, so use np.float32 
    591545 
    592546    def __call__(self, call_details, values, cutoff, magnetic): 
    593547        # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 
    594         env = environment() 
    595         queue = env.queue[self._model.dtype] 
    596         context = queue.context 
    597  
    598         # Arrange data transfer to/from card 
    599         q_b = self.q_input.q_b 
    600         result_b = self._result_b 
     548        context = self.queue.context 
     549        # Arrange data transfer to card 
    601550        details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    602551                              hostbuf=call_details.buffer) 
     
    604553                             hostbuf=values) 
    605554 
    606         name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 
    607         kernel = self._model.fetch_kernel(name) 
    608         kernel_args = [ 
     555        kernel = self.kernel[1 if magnetic else 0] 
     556        args = [ 
    609557            np.uint32(self.q_input.nq), None, None, 
    610             details_b, values_b, q_b, result_b, 
    611             self._as_dtype(cutoff), 
     558            details_b, values_b, self.q_input.q_b, self.result_b, 
     559            self.real(cutoff), 
    612560        ] 
    613561        #print("Calling OpenCL") 
     
    620568            stop = min(start + step, call_details.num_eval) 
    621569            #print("queuing",start,stop) 
    622             kernel_args[1:3] = [np.int32(start), np.int32(stop)] 
    623             wait_for = [kernel(queue, self.q_input.global_size, None, 
    624                                *kernel_args, wait_for=wait_for)] 
     570            args[1:3] = [np.int32(start), np.int32(stop)] 
     571            wait_for = [kernel(self.queue, self.q_input.global_size, None, 
     572                               *args, wait_for=wait_for)] 
    625573            if stop < call_details.num_eval: 
    626574                # Allow other processes to run 
     
    630578                    time.sleep(0.001) 
    631579                    last_nap = current_time 
    632         cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) 
     580        cl.enqueue_copy(self.queue, self.result, self.result_b) 
    633581        #print("result", self.result) 
    634582 
     
    650598        Release resources associated with the kernel. 
    651599        """ 
    652         environment().free_buffer(id(self)) 
    653         self.q_input.release() 
     600        for v in self._need_release: 
     601            v.release() 
     602        self._need_release = [] 
    654603 
    655604    def __del__(self): 
  • sasmodels/models/spinodal.py

    r93fe8a1 r475ff58  
    1212where $x=q/q_0$, $q_0$ is the peak position, $I_{max}$ is the intensity  
    1313at $q_0$ (parameterised as the $scale$ parameter), and $B$ is a flat  
    14 background. The spinodal wavelength, $\Lambda$, is given by $2\pi/q_0$.  
    15  
    16 The definition of $I_{max}$ in the literature varies. Hashimoto *et al* (1991)  
    17 define it as  
    18  
    19 .. math:: 
    20     I_{max} = \Lambda^3\Delta\rho^2 
    21      
    22 whereas Meier & Strobl (1987) give  
    23  
    24 .. math:: 
    25     I_{max} = V_z\Delta\rho^2 
    26      
    27 where $V_z$ is the volume per monomer unit. 
     14background. The spinodal wavelength is given by $2\pi/q_0$.  
    2815 
    2916The exponent $\gamma$ is equal to $d+1$ for off-critical concentration  
     
    4128 
    4229H. Furukawa. Dynamics-scaling theory for phase-separating unmixing mixtures: 
    43 Growth rates of droplets and scaling properties of autocorrelation functions.  
    44 Physica A 123, 497 (1984). 
    45  
    46 H. Meier & G. Strobl. Small-Angle X-ray Scattering Study of Spinodal  
    47 Decomposition in Polystyrene/Poly(styrene-co-bromostyrene) Blends.  
    48 Macromolecules 20, 649-654 (1987). 
    49  
    50 T. Hashimoto, M. Takenaka & H. Jinnai. Scattering Studies of Self-Assembling  
    51 Processes of Polymer Blends in Spinodal Decomposition.  
    52 J. Appl. Cryst. 24, 457-466 (1991). 
     30Growth rates of droplets and scaling properties of autocorrelation functions. 
     31Physica A 123,497 (1984). 
    5332 
    5433Revision History 
     
    5635 
    5736* **Author:**  Dirk Honecker **Date:** Oct 7, 2016 
    58 * **Revised:** Steve King    **Date:** Oct 25, 2018 
     37* **Revised:** Steve King    **Date:** Sep 7, 2018 
    5938""" 
    6039 
  • setup.py

    r783e76f r1f991d6  
    2929                return version[1:-1] 
    3030    raise RuntimeError("Could not read version from %s/__init__.py"%package) 
    31  
    32 install_requires = ['numpy', 'scipy'] 
    33  
    34 if sys.platform=='win32' or sys.platform=='cygwin': 
    35     install_requires.append('tinycc') 
    3631 
    3732setup( 
     
    6661        'sasmodels': ['*.c', '*.cl'], 
    6762    }, 
    68     install_requires=install_requires, 
     63    install_requires=[ 
     64    ], 
    6965    extras_require={ 
    70         'full': ['docutils', 'bumps', 'matplotlib'], 
    71         'server': ['bumps'], 
    7266        'OpenCL': ["pyopencl"], 
     67        'Bumps': ["bumps"], 
     68        'TinyCC': ["tinycc"], 
    7369    }, 
    7470    build_requires=['setuptools'], 
Note: See TracChangeset for help on using the changeset viewer.