Changeset 17cb5bb in sasmodels


Ignore:
Timestamp:
Nov 26, 2018 10:00:54 PM (3 weeks ago)
Author:
GitHub <noreply@…>
Parents:
119073a (diff), 00afc15 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
git-author:
Paul Kienzle <pkienzle@…> (11/26/18 22:00:54)
git-committer:
GitHub <noreply@…> (11/26/18 22:00:54)
Message:

Merge 00afc158f4641f755fc8518e7528d23f8655d30b into 119073a61da4bc8256d0f33c999a38f346f9dfc6

Files:
5 edited
6 moved

Legend:

Unmodified
Added
Removed
  • sasmodels/kernelcl.py

    rf872fd1 r00afc15  
    265265        # Cache for compiled programs, and for items in context 
    266266        self.compiled = {} 
    267         self.cache = {} 
    268267 
    269268    def has_type(self, dtype): 
     
    296295            self.compiled[key] = (program, timestamp) 
    297296        return program 
    298  
    299     def free_buffer(self, key): 
    300         if key in self.cache: 
    301             self.cache[key].release() 
    302             del self.cache[key] 
    303  
    304     def __del__(self): 
    305         for v in self.cache.values(): 
    306             release = getattr(v, 'release', lambda: None) 
    307             release() 
    308         self.cache = {} 
    309  
    310 _CURRENT_ID = 0 
    311 def unique_id(): 
    312     global _CURRENT_ID 
    313     _CURRENT_ID += 1 
    314     return _CURRENT_ID 
    315297 
    316298def _create_some_context(): 
     
    413395    that the compiler is allowed to take shortcuts. 
    414396    """ 
     397    info = None # type: ModelInfo 
     398    source = "" # type: str 
     399    dtype = None # type: np.dtype 
     400    fast = False # type: bool 
     401    _program = None # type: cl.Program 
     402    _kernels = None # type: Dict[str, cl.Kernel] 
     403 
    415404    def __init__(self, source, model_info, dtype=generate.F32, fast=False): 
    416405        # type: (Dict[str,str], ModelInfo, np.dtype, bool) -> None 
     
    419408        self.dtype = dtype 
    420409        self.fast = fast 
    421         self.timestamp = generate.ocl_timestamp(self.info) 
    422         self._cache_key = unique_id() 
    423410 
    424411    def __getstate__(self): 
     
    429416        # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 
    430417        self.info, self.source, self.dtype, self.fast = state 
     418        self._program = self._kernels = None 
    431419 
    432420    def make_kernel(self, q_vectors): 
     
    434422        return GpuKernel(self, q_vectors) 
    435423 
    436     @property 
    437     def Iq(self): 
    438         return self._fetch_kernel('Iq') 
    439  
    440     def fetch_kernel(self, name): 
     424    def get_function(self, name): 
    441425        # type: (str) -> cl.Kernel 
    442426        """ 
     
    444428        does not already exist. 
    445429        """ 
    446         gpu = environment() 
    447         key = self._cache_key 
    448         if key not in gpu.cache: 
    449             program = gpu.compile_program( 
    450                 self.info.name, 
    451                 self.source['opencl'], 
    452                 self.dtype, 
    453                 self.fast, 
    454                 self.timestamp) 
    455             variants = ['Iq', 'Iqxy', 'Imagnetic'] 
    456             names = [generate.kernel_name(self.info, k) for k in variants] 
    457             kernels = [getattr(program, k) for k in names] 
    458             data = dict((k, v) for k, v in zip(variants, kernels)) 
    459             # keep a handle to program so GC doesn't collect 
    460             data['program'] = program 
    461             gpu.cache[key] = data 
    462         else: 
    463             data = gpu.cache[key] 
    464         return data[name] 
     430        if self._program is None: 
     431            self._prepare_program() 
     432        return self._kernels[name] 
     433 
     434    def _prepare_program(self): 
     435        # type: (str) -> None 
     436        env = environment() 
     437        timestamp = generate.ocl_timestamp(self.info) 
     438        program = env.compile_program( 
     439            self.info.name, 
     440            self.source['opencl'], 
     441            self.dtype, 
     442            self.fast, 
     443            timestamp) 
     444        variants = ['Iq', 'Iqxy', 'Imagnetic'] 
     445        names = [generate.kernel_name(self.info, k) for k in variants] 
     446        functions = [getattr(program, k) for k in names] 
     447        self._kernels = {k: v for k, v in zip(variants, functions)} 
     448        # keep a handle to program so GC doesn't collect 
     449        self._program = program 
    465450 
    466451# TODO: check that we don't need a destructor for buffers which go out of scope 
     
    504489            self.q[:self.nq] = q_vectors[0] 
    505490        self.global_size = [self.q.shape[0]] 
    506         self._cache_key = unique_id() 
    507  
    508     @property 
    509     def q_b(self): 
    510         """Lazy creation of q buffer so it can survive context reset""" 
     491        #print("creating inputs of size", self.global_size) 
     492 
     493        # transfer input value to gpu 
    511494        env = environment() 
    512         key = self._cache_key 
    513         if key not in env.cache: 
    514             context = env.context[self.dtype] 
    515             #print("creating inputs of size", self.global_size) 
    516             buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    517                                hostbuf=self.q) 
    518             env.cache[key] = buffer 
    519         return env.cache[key] 
     495        context = env.context[self.dtype] 
     496        self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
     497                             hostbuf=self.q) 
    520498 
    521499    def release(self): 
     
    524502        Free the buffer associated with the q value 
    525503        """ 
    526         environment().free_buffer(id(self)) 
     504        if self.q_b is not None: 
     505            self.q_b.release() 
     506            self.q_b = None 
    527507 
    528508    def __del__(self): 
     
    536516    *model* is the GpuModel object to call 
    537517 
    538     The following attributes are defined: 
    539  
    540     *info* is the module information 
    541  
    542     *dtype* is the kernel precision 
    543  
    544     *dim* is '1d' or '2d' 
    545  
    546     *result* is a vector to contain the results of the call 
    547  
    548     The resulting call method takes the *pars*, a list of values for 
    549     the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 
    550     vectors for the polydisperse parameters.  *cutoff* determines the 
    551     integration limits: any points with combined weight less than *cutoff* 
    552     will not be calculated. 
     518    The kernel is derived from :class:`Kernel`, providing the 
     519    :meth:`call_kernel` method to evaluate the kernel for a given set of 
     520    parameters.  Because of the need to move the q values to the GPU before 
     521    evaluation, the kernel is instantiated for a particular set of q vectors, 
     522    and can be called many times without transfering q each time. 
    553523 
    554524    Call :meth:`release` when done with the kernel instance. 
    555525    """ 
     526    #: SAS model information structure 
     527    info = None # type: ModelInfo 
     528    #: kernel precision 
     529    dtype = None # type: np.dtype 
     530    #: kernel dimensions (1d or 2d) 
     531    dim = "" # type: str 
     532    #: calculation results, updated after each call to :meth:`_call_kernel` 
     533    result = None # type: np.ndarray 
     534 
    556535    def __init__(self, model, q_vectors): 
    557         # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 
     536        # type: (GpuModel, List[np.ndarray]) -> None 
    558537        dtype = model.dtype 
    559538        self.q_input = GpuInput(q_vectors, dtype) 
     
    561540        # F16 isn't sufficient, so don't support it 
    562541        self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 
    563         self._cache_key = unique_id() 
    564542 
    565543        # attributes accessed from the outside 
     
    573551        self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 
    574552 
    575     @property 
    576     def _result_b(self): 
    577         """Lazy creation of result buffer so it can survive context reset""" 
     553        # allocate result value on gpu 
    578554        env = environment() 
    579         key = self._cache_key 
    580         if key not in env.cache: 
    581             context = env.context[self.dtype] 
    582             width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 
    583             buffer = cl.Buffer(context, mf.READ_WRITE, width) 
    584             env.cache[key] = buffer 
    585         return env.cache[key] 
     555        context = env.context[self.dtype] 
     556        width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 
     557        self._result_b = cl.Buffer(context, mf.READ_WRITE, width) 
    586558 
    587559    def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 
     
    592564 
    593565        # Arrange data transfer to/from card 
    594         q_b = self.q_input.q_b 
    595         result_b = self._result_b 
    596566        details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    597567                              hostbuf=call_details.buffer) 
     
    600570 
    601571        name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 
    602         kernel = self._model.fetch_kernel(name) 
     572        kernel = self._model.get_function(name) 
    603573        kernel_args = [ 
    604574            np.uint32(self.q_input.nq), None, None, 
    605             details_b, values_b, q_b, result_b, 
     575            details_b, values_b, self.q_input.q_b, self._result_b, 
    606576            self._as_dtype(cutoff), 
    607577            np.uint32(effective_radius_type), 
     
    626596                    time.sleep(0.001) 
    627597                    last_nap = current_time 
    628         cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) 
     598        cl.enqueue_copy(queue, self.result, self._result_b, wait_for=wait_for) 
    629599        #print("result", self.result) 
    630600 
    631601        # Free buffers 
    632         for v in (details_b, values_b): 
    633             if v is not None: 
    634                 v.release() 
     602        details_b.release() 
     603        values_b.release() 
    635604 
    636605    def release(self): 
     
    639608        Release resources associated with the kernel. 
    640609        """ 
    641         environment().free_buffer(id(self)) 
    642610        self.q_input.release() 
     611        if self._result_b is not None: 
     612            self._result_b.release() 
     613            self._result_b = None 
    643614 
    644615    def __del__(self): 
  • sasmodels/kernelcuda.py

    rf872fd1 r00afc15  
    6363import time 
    6464import re 
     65import atexit 
    6566 
    6667import numpy as np  # type: ignore 
     
    138139    return ENV 
    139140 
     141def free_context(): 
     142    global ENV 
     143    if ENV is not None: 
     144        ENV.release() 
     145        ENV = None 
     146 
     147atexit.register(free_context) 
     148 
    140149def has_type(dtype): 
    141150    # type: (np.dtype) -> bool 
     
    296305    dtype = None # type: np.dtype 
    297306    fast = False # type: bool 
    298     program = None # type: SourceModule 
    299     _kernels = None # type: List[cuda.Function] 
     307    _program = None # type: SourceModule 
     308    _kernels = None # type: Dict[str, cuda.Function] 
    300309 
    301310    def __init__(self, source, model_info, dtype=generate.F32, fast=False): 
     
    305314        self.dtype = dtype 
    306315        self.fast = fast 
    307         self.program = None # delay program creation 
    308         self._kernels = None 
    309316 
    310317    def __getstate__(self): 
     
    315322        # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 
    316323        self.info, self.source, self.dtype, self.fast = state 
    317         self.program = None 
     324        self._program = self._kernels = None 
    318325 
    319326    def make_kernel(self, q_vectors): 
    320327        # type: (List[np.ndarray]) -> "GpuKernel" 
    321         if self.program is None: 
    322             compile_program = environment().compile_program 
    323             timestamp = generate.ocl_timestamp(self.info) 
    324             self.program = compile_program( 
    325                 self.info.name, 
    326                 self.source['opencl'], 
    327                 self.dtype, 
    328                 self.fast, 
    329                 timestamp) 
    330             variants = ['Iq', 'Iqxy', 'Imagnetic'] 
    331             names = [generate.kernel_name(self.info, k) for k in variants] 
    332             kernels = [self.program.get_function(k) for k in names] 
    333             self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 
    334         is_2d = len(q_vectors) == 2 
    335         if is_2d: 
    336             kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 
    337         else: 
    338             kernel = [self._kernels['Iq']]*2 
    339         return GpuKernel(kernel, self.dtype, self.info, q_vectors) 
    340  
    341     def release(self): 
    342         # type: () -> None 
    343         """ 
    344         Free the resources associated with the model. 
    345         """ 
    346         if self.program is not None: 
    347             self.program = None 
    348  
    349     def __del__(self): 
    350         # type: () -> None 
    351         self.release() 
     328        return GpuKernel(self, q_vectors) 
     329 
     330    def get_function(self, name): 
     331        # type: (str) -> cuda.Function 
     332        """ 
     333        Fetch the kernel from the environment by name, compiling it if it 
     334        does not already exist. 
     335        """ 
     336        if self._program is None: 
     337            self._prepare_program() 
     338        return self._kernels[name] 
     339 
     340    def _prepare_program(self): 
     341        # type: (str) -> None 
     342        env = environment() 
     343        timestamp = generate.ocl_timestamp(self.info) 
     344        program = env.compile_program( 
     345            self.info.name, 
     346            self.source['opencl'], 
     347            self.dtype, 
     348            self.fast, 
     349            timestamp) 
     350        variants = ['Iq', 'Iqxy', 'Imagnetic'] 
     351        names = [generate.kernel_name(self.info, k) for k in variants] 
     352        functions = [program.get_function(k) for k in names] 
     353        self._kernels = {k: v for k, v in zip(variants, functions)} 
     354        # keep a handle to program so GC doesn't collect 
     355        self._program = program 
    352356 
    353357# TODO: check that we don't need a destructor for buffers which go out of scope 
     
    394398        self.global_size = [self.q.shape[0]] 
    395399        #print("creating inputs of size", self.global_size) 
     400 
     401        # transfer input value to gpu 
    396402        self.q_b = cuda.to_device(self.q) 
    397403 
     
    413419    Callable SAS kernel. 
    414420 
    415     *kernel* is the GpuKernel object to call 
    416  
    417     *model_info* is the module information 
    418  
    419     *q_vectors* is the q vectors at which the kernel should be evaluated 
    420  
    421     *dtype* is the kernel precision 
    422  
    423     The resulting call method takes the *pars*, a list of values for 
    424     the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 
    425     vectors for the polydisperse parameters.  *cutoff* determines the 
    426     integration limits: any points with combined weight less than *cutoff* 
    427     will not be calculated. 
     421    *model* is the GpuModel object to call 
     422 
     423    The kernel is derived from :class:`Kernel`, providing the 
     424    :meth:`call_kernel` method to evaluate the kernel for a given set of 
     425    parameters.  Because of the need to move the q values to the GPU before 
     426    evaluation, the kernel is instantiated for a particular set of q vectors, 
     427    and can be called many times without transfering q each time. 
    428428 
    429429    Call :meth:`release` when done with the kernel instance. 
    430430    """ 
    431     def __init__(self, kernel, dtype, model_info, q_vectors): 
    432         # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 
     431    #: SAS model information structure 
     432    info = None # type: ModelInfo 
     433    #: kernel precision 
     434    dtype = None # type: np.dtype 
     435    #: kernel dimensions (1d or 2d) 
     436    dim = "" # type: str 
     437    #: calculation results, updated after each call to :meth:`_call_kernel` 
     438    result = None # type: np.ndarray 
     439 
     440    def __init__(self, model, q_vectors): 
     441        # type: (GpuModel, List[np.ndarray]) -> None 
     442        dtype = model.dtype 
    433443        self.q_input = GpuInput(q_vectors, dtype) 
    434         self.kernel = kernel 
     444        self._model = model 
    435445        # F16 isn't sufficient, so don't support it 
    436446        self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 
     
    438448        # attributes accessed from the outside 
    439449        self.dim = '2d' if self.q_input.is_2d else '1d' 
    440         self.info = model_info 
    441         self.dtype = dtype 
     450        self.info = model.info 
     451        self.dtype = model.dtype 
    442452 
    443453        # holding place for the returned value 
     
    446456        self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 
    447457 
    448         # Inputs and outputs for each kernel call 
    449         # Note: res may be shorter than res_b if global_size != nq 
     458        # allocate result value on gpu 
    450459        width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 
    451         self.result_b = cuda.mem_alloc(width) 
    452         self._need_release = [self.result_b] 
     460        self._result_b = cuda.mem_alloc(width) 
    453461 
    454462    def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 
     
    458466        values_b = cuda.to_device(values) 
    459467 
    460         kernel = self.kernel[1 if magnetic else 0] 
    461         args = [ 
     468        name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 
     469        kernel = self._model.get_function(name) 
     470        kernel_args = [ 
    462471            np.uint32(self.q_input.nq), None, None, 
    463             details_b, values_b, self.q_input.q_b, self.result_b, 
     472            details_b, values_b, self.q_input.q_b, self._result_b, 
    464473            self._as_dtype(cutoff), 
    465474            np.uint32(effective_radius_type), 
     
    475484            stop = min(start + step, call_details.num_eval) 
    476485            #print("queuing",start,stop) 
    477             args[1:3] = [np.int32(start), np.int32(stop)] 
    478             kernel(*args, **grid) 
     486            kernel_args[1:3] = [np.int32(start), np.int32(stop)] 
     487            kernel(*kernel_args, **grid) 
    479488            if stop < call_details.num_eval: 
    480489                sync() 
     
    485494                    last_nap = current_time 
    486495        sync() 
    487         cuda.memcpy_dtoh(self.result, self.result_b) 
     496        cuda.memcpy_dtoh(self.result, self._result_b) 
    488497        #print("result", self.result) 
    489498 
     
    496505        Release resources associated with the kernel. 
    497506        """ 
    498         for p in self._need_release: 
    499             p.free() 
    500         self._need_release = [] 
     507        self.q_input.release() 
     508        if self._result_b is not None: 
     509            self._result_b.free() 
     510            self._result_b = None 
    501511 
    502512    def __del__(self): 
  • sasmodels/model_test.py

    r5024a56 r00afc15  
    167167        # test using cuda if desired and available 
    168168        if 'cuda' in loaders and use_cuda(): 
    169             test_name = "%s-cuda"%model_name 
     169            test_name = "%s-cuda" % model_info.id 
    170170            test_method_name = "test_%s_cuda" % model_info.id 
    171171            # Using dtype=None so that the models that are only 
  • explore/beta/sasfit_compare.py

    r2a12351b r119073a  
    505505    } 
    506506 
    507     Q, IQ = load_sasfit(data_file('richard_test.txt')) 
    508     Q, IQSD = load_sasfit(data_file('richard_test2.txt')) 
    509     Q, IQBD = load_sasfit(data_file('richard_test3.txt')) 
     507    Q, IQ = load_sasfit(data_file('sasfit_sphere_schulz_IQD.txt')) 
     508    Q, IQSD = load_sasfit(data_file('sasfit_sphere_schulz_IQSD.txt')) 
     509    Q, IQBD = load_sasfit(data_file('sasfit_sphere_schulz_IQBD.txt')) 
    510510    target = Theory(Q=Q, F1=None, F2=None, P=IQ, S=None, I=IQSD, Seff=None, Ibeta=IQBD) 
    511511    actual = sphere_r(Q, norm="sasfit", **pars) 
     
    526526    } 
    527527 
    528     Q, IQ = load_sasfit(data_file('richard_test4.txt')) 
    529     Q, IQSD = load_sasfit(data_file('richard_test5.txt')) 
    530     Q, IQBD = load_sasfit(data_file('richard_test6.txt')) 
     528    Q, IQ = load_sasfit(data_file('sasfit_ellipsoid_shulz_IQD.txt')) 
     529    Q, IQSD = load_sasfit(data_file('sasfit_ellipsoid_shulz_IQSD.txt')) 
     530    Q, IQBD = load_sasfit(data_file('sasfit_ellipsoid_shulz_IQBD.txt')) 
    531531    target = Theory(Q=Q, F1=None, F2=None, P=IQ, S=None, I=IQSD, Seff=None, Ibeta=IQBD) 
    532532    actual = ellipsoid_pe(Q, norm="sasfit", **pars) 
  • sasmodels/models/pearl_necklace.c

    r99658f6 r9b5fd42  
    4040    const double si = sas_sinx_x(q*A_s); 
    4141    const double omsi = 1.0 - si; 
    42     const double pow_si = pow(si, num_pearls); 
     42    const double pow_si = pown(si, num_pearls); 
    4343 
    4444    // form factor for num_pearls 
     
    8181radius_from_volume(double radius, double edge_sep, double thick_string, double fp_num_pearls) 
    8282{ 
    83     const int num_pearls = (int) fp_num_pearls +0.5; 
    8483    const double vol_tot = form_volume(radius, edge_sep, thick_string, fp_num_pearls); 
    8584    return cbrt(vol_tot/M_4PI_3); 
Note: See TracChangeset for help on using the changeset viewer.