Changeset 869fd7b in sasmodels
 Timestamp:
 Oct 26, 2018 11:29:56 AM (2 weeks ago)
 Branches:
 beta_approx, py3, ticket1015gpumemerror, ticket1157, ticket608userdefinedweights, ticket_1156
 Children:
 12f4c19
 Parents:
 81751c2
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

sasmodels/kernelcuda.py
r8b31efa r869fd7b 431 431 def __init__(self, kernel, dtype, model_info, q_vectors): 432 432 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) > None 433 q_input = GpuInput(q_vectors, dtype)433 self.q_input = GpuInput(q_vectors, dtype) 434 434 self.kernel = kernel 435 self._as_dtype = (np.float32 if dtype == generate.F32 436 else np.float64 if dtype == generate.F64 437 else np.float16 if dtype == generate.F16 438 else np.float32) # will never get here, so use np.float32 439 440 # attributes accessed from the outside 441 self.dim = '2d' if self.q_input.is_2d else '1d' 435 442 self.info = model_info 436 443 self.dtype = dtype 437 self.dim = '2d' if q_input.is_2d else '1d' 438 # plus three for the normalization values 439 self.result = np.empty(q_input.nq+1, dtype) 444 445 # holding place for the returned value 446 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 447 extra_q = 4 # total weight, form volume, shell volume and R_eff 448 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 440 449 441 450 # Inputs and outputs for each kernel call 442 451 # Note: res may be shorter than res_b if global_size != nq 443 self.result_b = cuda.mem_alloc(q_input.global_size[0] * dtype.itemsize) 444 self.q_input = q_input # allocated by GpuInput above 445 452 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 453 self.result_b = cuda.mem_alloc(width) 446 454 self._need_release = [self.result_b] 447 self.real = (np.float32 if dtype == generate.F32 448 else np.float64 if dtype == generate.F64 449 else np.float16 if dtype == generate.F16 450 else np.float32) # will never get here, so use np.float32 451 452 def __call__(self, call_details, values, cutoff, magnetic): 455 456 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 453 457 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) > np.ndarray 454 458 # Arrange data transfer to card … … 460 464 np.uint32(self.q_input.nq), None, None, 461 465 details_b, values_b, self.q_input.q_b, self.result_b, 462 self.real(cutoff), 466 self._as_dtype(cutoff), 467 np.uint32(effective_radius_type), 463 468 ] 464 469 grid = partition(self.q_input.nq) … … 488 493 values_b.free() 489 494 490 pd_norm = self.result[self.q_input.nq]491 scale = values[0]/(pd_norm if pd_norm != 0.0 else 1.0)492 background = values[1]493 #print("scale",scale,values[0],self.result[self.q_input.nq],background)494 return scale*self.result[:self.q_input.nq] + background495 # return self.result[:self.q_input.nq]496 497 495 def release(self): 498 496 # type: () > None
Note: See TracChangeset
for help on using the changeset viewer.