Changes in sasmodels/kernelcl.py [c036ddb:5399809] in sasmodels
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
rc036ddb r5399809 481 481 # at this point, so instead using 32, which is good on the set of 482 482 # architectures tested so far. 483 extra_q = 3 # total weight, weighted volume and weighted radius 483 484 if self.is_2d: 484 # Note: 16 rather than 15 because result is 1 longer than input. 485 width = ((self.nq+16)//16)*16 485 width = ((self.nq+15+extra_q)//16)*16 486 486 self.q = np.empty((width, 2), dtype=dtype) 487 487 self.q[:self.nq, 0] = q_vectors[0] 488 488 self.q[:self.nq, 1] = q_vectors[1] 489 489 else: 490 # Note: 32 rather than 31 because result is 1 longer than input. 491 width = ((self.nq+32)//32)*32 490 width = ((self.nq+31+extra_q)//32)*32 492 491 self.q = np.empty(width, dtype=dtype) 493 492 self.q[:self.nq] = q_vectors[0] … … 539 538 self.dim = '2d' if q_input.is_2d else '1d' 540 539 # leave room for f1/f2 results in case we need to compute beta for 1d models 541 n um_returns = 1 if self.dim == '2d' else 2 #542 # plus 1 for the normalization value543 self.result = np.empty( (q_input.nq+1)*num_returns,dtype)540 nout = 2 if self.info.have_Fq and self.dim == '1d' else 1 541 # plus 3 weight, volume, radius 542 self.result = np.empty(q_input.nq*nout + 3, self.dtype) 544 543 545 544 # Inputs and outputs for each kernel call … … 549 548 550 549 self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 551 q_input.global_size[0] * n um_returns* dtype.itemsize)550 q_input.global_size[0] * nout * dtype.itemsize) 552 551 self.q_input = q_input # allocated by GpuInput above 553 552 … … 558 557 else np.float32) # will never get here, so use np.float32 559 558 560 def Iq(self, call_details, values, cutoff, magnetic): 561 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 562 self._call_kernel(call_details, values, cutoff, magnetic) 563 #print("returned",self.q_input.q, self.result) 564 pd_norm = self.result[self.q_input.nq] 565 scale = values[0]/(pd_norm if pd_norm != 0.0 else 1.0) 566 background = values[1] 567 #print("scale",scale,background) 568 return scale*self.result[:self.q_input.nq] + background 569 __call__ = Iq 570 571 def beta(self, call_details, values, cutoff, magnetic): 572 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 573 if self.dim == '2d': 574 raise NotImplementedError("beta not yet supported for 2D") 575 self._call_kernel(call_details, values, cutoff, magnetic) 576 w_norm = self.result[2*self.q_input.nq + 1] 577 pd_norm = self.result[self.q_input.nq] 578 if w_norm == 0.: 579 w_norm = 1. 580 F2 = self.result[:self.q_input.nq]/w_norm 581 F1 = self.result[self.q_input.nq+1:2*self.q_input.nq+1]/w_norm 582 volume_avg = pd_norm/w_norm 583 return F1, F2, volume_avg 584 585 def _call_kernel(self, call_details, values, cutoff, magnetic): 559 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 586 560 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 587 561 context = self.queue.context … … 597 571 details_b, values_b, self.q_input.q_b, self.result_b, 598 572 self.real(cutoff), 573 np.uint32(effective_radius_type), 599 574 ] 600 575 #print("Calling OpenCL")
Note: See TracChangeset
for help on using the changeset viewer.