Changeset d18582e in sasmodels


Ignore:
Timestamp:
Feb 4, 2016 10:44:23 AM (8 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
5054e80
Parents:
bb6f0f3
Message:

default to double precision if single=False is set in model file

Location:
sasmodels
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/core.py

    reafc9fa rd18582e  
    7373    return True 
    7474 
    75 def load_model(model_definition, dtype="single", platform="ocl"): 
     75def load_model(model_definition, dtype=None, platform="ocl"): 
    7676    """ 
    7777    Prepare the model for the default execution platform. 
     
    8787    for the calculation. Any valid numpy single or double precision identifier 
    8888    is valid, such as 'single', 'f', 'f32', or np.float32 for single, or 
    89     'double', 'd', 'f64'  and np.float64 for double. 
     89    'double', 'd', 'f64'  and np.float64 for double.  If *None*, then use 
     90    'single' unless the model defines single=False. 
    9091 
    9192    *platform* should be "dll" to force the dll to be used for C models, 
     
    9495    if isstr(model_definition): 
    9596        model_definition = load_model_definition(model_definition) 
     97    if dtype is None: 
     98        dtype = 'single' if getattr(model_definition, 'single', True) else 'double' 
    9699    source, info = generate.make(model_definition) 
    97100    if callable(info.get('Iq', None)): 
  • sasmodels/data.py

    r5c962df rd18582e  
    242242 
    243243 
    244 def empty_data1D(q, resolution=0.05): 
     244def empty_data1D(q, resolution=0.0): 
    245245    """ 
    246246    Create empty 1D data using the given *q* as the x value. 
     
    252252    #dIq = np.sqrt(Iq) 
    253253    Iq, dIq = None, None 
     254    q = np.asarray(q) 
    254255    data = Data1D(q, Iq, dx=resolution * q, dy=dIq) 
    255256    data.filename = "fake data" 
     
    257258 
    258259 
    259 def empty_data2D(qx, qy=None, resolution=0.05): 
     260def empty_data2D(qx, qy=None, resolution=0.0): 
    260261    """ 
    261262    Create empty 2D data using the given mesh. 
     
    267268    if qy is None: 
    268269        qy = qx 
     270    qx, qy = np.asarray(qx), np.asarray(qy) 
    269271    # 5% dQ/Q resolution 
    270272    Qx, Qy = np.meshgrid(qx, qy) 
  • sasmodels/direct_model.py

    reafc9fa rd18582e  
    234234 
    235235    model_definition = load_model_definition(model_name) 
    236     model = load_model(model_definition, dtype='single') 
     236    model = load_model(model_definition) 
    237237    calculator = DirectModel(data, model) 
    238238    pars = dict((k, float(v)) 
  • sasmodels/kernelcl.py

    reafc9fa rd18582e  
    172172        #self.data_boundary = max(d.min_data_type_align_size 
    173173        #                         for d in self.context.devices) 
    174         self.queues = [cl.CommandQueue(self.context, d) 
    175                        for d in self.context.devices] 
     174        self.queues = [cl.CommandQueue(context, context.devices[0]) 
     175                       for context in self.context] 
    176176        self.compiled = {} 
    177177 
     
    181181        """ 
    182182        dtype = generate.F32 if dtype == 'fast' else np.dtype(dtype) 
    183         return all(has_type(d, dtype) for d in self.context.devices) 
     183        return any(has_type(d, dtype) 
     184                   for context in self.context 
     185                   for d in context.devices) 
     186 
     187    def get_queue(self, dtype): 
     188        """ 
     189        Return a command queue for the kernels of type dtype. 
     190        """ 
     191        for context, queue in zip(self.context, self.queues): 
     192            if all(has_type(d, dtype) for d in context.devices): 
     193                return queue 
     194 
     195    def get_context(self, dtype): 
     196        """ 
     197        Return a OpenCL context for the kernels of type dtype. 
     198        """ 
     199        for context, queue in zip(self.context, self.queues): 
     200            if all(has_type(d, dtype) for d in context.devices): 
     201                return context 
    184202 
    185203    def _create_some_context(self): 
     
    190208        """ 
    191209        try: 
    192             self.context = cl.create_some_context(interactive=False) 
     210            self.context = [cl.create_some_context(interactive=False)] 
    193211        except Exception as exc: 
    194212            warnings.warn(str(exc)) 
     
    204222            #print("compiling",name) 
    205223            dtype = np.dtype(dtype) 
    206             program = compile_model(self.context, source, dtype, fast) 
     224            program = compile_model(self.get_context(dtype), source, dtype, fast) 
    207225            self.compiled[key] = program 
    208226        return self.compiled[key] 
     
    218236def _get_default_context(): 
    219237    """ 
    220     Get an OpenCL context, preferring GPU over CPU. 
    221     """ 
    222     default = None 
     238    Get an OpenCL context, preferring GPU over CPU, and preferring Intel 
     239    drivers over AMD drivers. 
     240    """ 
     241    # Note: on mobile devices there is automatic clock scaling if either the 
     242    # CPU or the GPU is underutilized; probably doesn't affect us, but we if 
     243    # it did, it would mean that putting a busy loop on the CPU while the GPU 
     244    # is running may increase throughput. 
     245    # 
     246    # Macbook pro, base install: 
     247    #     {'Apple': [Intel CPU, NVIDIA GPU]} 
     248    # Macbook pro, base install: 
     249    #     {'Apple': [Intel CPU, Intel GPU]} 
     250    # 2 x nvidia 295 with Intel and NVIDIA opencl drivers installed 
     251    #     {'Intel': [CPU], 'NVIDIA': [GPU, GPU, GPU, GPU]} 
     252    gpu, cpu = None, None 
    223253    for platform in cl.get_platforms(): 
    224254        for device in platform.get_devices(): 
    225255            if device.type == cl.device_type.GPU: 
    226                 return cl.Context([device]) 
    227             if default is None: 
    228                 default = device 
    229  
    230     if not default: 
    231         raise RuntimeError("OpenCL device not found") 
    232  
    233     return cl.Context([default]) 
     256                gpu = device 
     257            else: 
     258                cpu = device 
     259    single = gpu if gpu is not None else cpu 
     260    double = gpu if has_type(gpu, np.dtype('double')) else cpu 
     261    single_context = cl.Context([single]) 
     262    double_context = cl.Context([double]) if single != double else single_context 
     263    return single_context, double_context 
    234264 
    235265 
     
    314344        # architectures tested so far. 
    315345        self.q_vectors = [_stretch_input(q, self.dtype, 32) for q in q_vectors] 
     346        context = env.get_context(self.dtype) 
    316347        self.q_buffers = [ 
    317             cl.Buffer(env.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q) 
     348            cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q) 
    318349            for q in self.q_vectors 
    319350        ] 
     
    363394        # Note: res may be shorter than res_b if global_size != nq 
    364395        env = environment() 
    365         self.loops_b = [cl.Buffer(env.context, mf.READ_WRITE, 
    366                                   2 * MAX_LOOPS * q_input.dtype.itemsize) 
    367                         for _ in env.queues] 
    368         self.res_b = [cl.Buffer(env.context, mf.READ_WRITE, 
    369                                 q_input.global_size[0] * q_input.dtype.itemsize) 
    370                       for _ in env.queues] 
     396        self.queue = env.get_queue(dtype) 
     397        self.loops_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 
     398                                 2 * MAX_LOOPS * q_input.dtype.itemsize) 
     399        self.res_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 
     400                               q_input.global_size[0] * q_input.dtype.itemsize) 
    371401        self.q_input = q_input 
     402 
     403        self._need_release = [self.loops_b, self.res_b, self.q_input] 
    372404 
    373405    def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 
     
    377409                else np.float32)  # will never get here, so use np.float32 
    378410 
    379         device_num = 0 
    380         queuei = environment().queues[device_num] 
    381         res_bi = self.res_b[device_num] 
     411        res_bi = self.res_b 
    382412        nq = np.uint32(self.q_input.nq) 
    383413        if pd_pars: 
     
    394424                raise ValueError("too many polydispersity points") 
    395425 
    396             loops_bi = self.loops_b[device_num] 
    397             cl.enqueue_copy(queuei, loops_bi, loops) 
     426            loops_bi = self.loops_b 
     427            cl.enqueue_copy(self.queue, loops_bi, loops) 
    398428            loops_l = cl.LocalMemory(len(loops.data)) 
    399429            #ctx = environment().context 
     
    404434        fixed = [real(p) for p in fixed_pars] 
    405435        args = self.q_input.q_buffers + [res_bi, nq] + dispersed + fixed 
    406         self.kernel(queuei, self.q_input.global_size, None, *args) 
    407         cl.enqueue_copy(queuei, self.res, res_bi) 
     436        self.kernel(self.queue, self.q_input.global_size, None, *args) 
     437        cl.enqueue_copy(self.queue, self.res, res_bi) 
    408438 
    409439        return self.res 
     
    413443        Release resources associated with the kernel. 
    414444        """ 
    415         for b in self.loops_b: 
    416             b.release() 
    417         self.loops_b = [] 
    418         for b in self.res_b: 
    419             b.release() 
    420         self.res_b = [] 
    421         self.q_input.release() 
     445        for v in self._need_release: 
     446            v.release() 
     447        self._need_release = [] 
    422448 
    423449    def __del__(self): 
  • sasmodels/models/pearl_necklace.py

    rf12357f rd18582e  
    9696 
    9797source = ["lib/Si.c", "pearl_necklace.c"] 
    98 # new flag to let the compiler know to never use single precision 
    99 single = False 
     98single = False  # use double precision unless told otherwise 
    10099 
    101100def volume(radius, edge_separation, string_thickness, number_of_pearls): 
Note: See TracChangeset for help on using the changeset viewer.