Changeset 8b31efa in sasmodels for sasmodels


Ignore:
Timestamp:
Oct 15, 2018 1:27:14 PM (6 years ago)
Author:
pkienzle
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
508475a, d5ce7fa
Parents:
4de14584
Message:

document cuda device selection; fix cuda speed issue

Location:
sasmodels
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernelcl.py

    rb0de252 r8b31efa  
    227227        self.context = None 
    228228        if 'SAS_OPENCL' in os.environ: 
    229             #Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 
    230             os.environ["PYOPENCL_CTX"] = os.environ["SAS_OPENCL"] 
     229            # Set the PyOpenCL environment variable PYOPENCL_CTX  
     230            # from SAS_OPENCL=driver:device.  Ignore the generic 
     231            # SAS_OPENCL=opencl, which is used to select the default  
     232            # OpenCL device.  Don't need to check for "none" or 
     233            # "cuda" since use_opencl() would return False if they 
     234            # were defined, and we wouldn't get here. 
     235            dev_str = os.environ["SAS_OPENCL"] 
     236            if dev_str and dev_str.lower() != "opencl": 
     237                os.environ["PYOPENCL_CTX"] = dev_str 
     238 
    231239        if 'PYOPENCL_CTX' in os.environ: 
    232240            self._create_some_context() 
     
    568576                current_time = time.clock() 
    569577                if current_time - last_nap > 0.5: 
    570                     time.sleep(0.05) 
     578                    time.sleep(0.001) 
    571579                    last_nap = current_time 
    572580        cl.enqueue_copy(self.queue, self.result, self.result_b) 
  • sasmodels/kernelcuda.py

    r74e9b5f r8b31efa  
    444444        self.q_input = q_input # allocated by GpuInput above 
    445445 
    446         self._need_release = [self.result_b, self.q_input] 
     446        self._need_release = [self.result_b] 
    447447        self.real = (np.float32 if dtype == generate.F32 
    448448                     else np.float64 if dtype == generate.F64 
     
    467467        # Call kernel and retrieve results 
    468468        last_nap = time.clock() 
    469         step = 1000000//self.q_input.nq + 1 
     469        step = 100000000//self.q_input.nq + 1 
    470470        #step = 1000000000 
    471471        for start in range(0, call_details.num_eval, step): 
     
    479479                current_time = time.clock() 
    480480                if current_time - last_nap > 0.5: 
    481                     time.sleep(0.05) 
     481                    time.sleep(0.001) 
    482482                    last_nap = current_time 
    483483        sync() 
     
    500500        Release resources associated with the kernel. 
    501501        """ 
    502         if self.result_b is not None: 
    503             self.result_b.free() 
    504             self.result_b = None 
     502        for p in self._need_release: 
     503            p.free() 
     504        self._need_release = [] 
    505505 
    506506    def __del__(self): 
Note: See TracChangeset for help on using the changeset viewer.