Changeset 9eb3632 – SasView

sasmodels/details.py

-                      r32e3c9b
+                      r9eb3632
 import numpy as np  # type: ignore
+from numpy import pi, cos, sin
+try:
+    np.meshgrid([])
+    meshgrid = np.meshgrid
+except ValueError:
+    # CRUFT: np.meshgrid requires multiple vectors
+    def meshgrid(*args):
+        if len(args) > 1:
+            return np.meshgrid(*args)
+        else:
+            return [np.asarray(v) for v in args]
 try:
 …
         print("pd_stride", self.pd_stride)
 def mono_details(model_info):
     call_details = CallDetails(model_info)
     call_details.pd_prod = 1
+    call_details.pd_sum = model_info.parameters.nvalues
+    call_details.pd_par[:] = np.arange(0, model_info.parameters.max_pd)
+    call_details.pd_length[:] = 1
+    call_details.pd_offset[:] = np.arange(0, model_info.parameters.max_pd)
+    call_details.pd_stride[:] = 1
     return call_details
 def poly_details(model_info, weights):
 …
     # Decreasing list of polydispersity lengths
+    pd_length = np.array([len(w) for w in weights])
+    #print([p.id for p in model_info.parameters.call_parameters])
+    pd_length = np.array([len(w) for w in weights[2:2+model_info.parameters.npars]])
     num_active = np.sum(pd_length>1)
+    if num_active > model_info.parameters.max_pd:
+    max_pd = model_info.parameters.max_pd
+    if num_active > max_pd:
         raise ValueError("Too many polydisperse parameters")
 …
     #print("off:",pd_offset)
     # Note: the reversing view, x[::-1], does not require a copy
+    idx = np.argsort(pd_length)[::-1][:num_active]
+    par_length = np.array([len(w) for w in weights])
+    pd_stride = np.cumprod(np.hstack((1, par_length[idx])))
+    idx = np.argsort(pd_length)[::-1][:max_pd]
+    pd_stride = np.cumprod(np.hstack((1, pd_length[idx])))
     call_details = CallDetails(model_info)
     call_details.pd_par[:num_active] = idx - 2  # skip background & scale
     call_details.pd_length[:num_active] = pd_length[idx]
     call_details.pd_offset[:num_active] = pd_offset[idx]
     call_details.pd_stride[:num_active] = pd_stride[:-1]
+    call_details.pd_par[:max_pd] = idx
+    call_details.pd_length[:max_pd] = pd_length[idx]
+    call_details.pd_offset[:max_pd] = pd_offset[idx]
+    call_details.pd_stride[:max_pd] = pd_stride[:-1]
     call_details.pd_prod = pd_stride[-1]
     call_details.pd_sum = np.sum(par_length)
+    call_details.pd_sum = sum(len(w) for w in weights)
     call_details.num_active = num_active
     #call_details.show()
     return call_details
+def dispersion_mesh(model_info, pars):
+    """
+    Create a mesh grid of dispersion parameters and weights.
+    Returns [p1,p2,...],w where pj is a vector of values for parameter j
+    and w is a vector containing the products for weights for each
+    parameter set in the vector.
+    """
+    value, weight = zip(*pars)
+    weight = [w if w else [1.] for w in weight]
+    weight = np.vstack([v.flatten() for v in meshgrid(*weight)])
+    weight = np.prod(weight, axis=0)
+    value = [v.flatten() for v in meshgrid(*value)]
+    lengths = [par.length for par in model_info.parameters.kernel_parameters
+               if par.type == 'volume']
+    if any(n > 1 for n in lengths):
+        pars = []
+        offset = 0
+        for n in lengths:
+            pars.append(np.vstack(value[offset:offset+n]) if n > 1 else value[offset])
+            offset += n
+        value = pars
+    return value, weight
+def build_details(kernel, pairs):
+    # type: (Kernel, Tuple[List[np.ndarray], List[np.ndarray]]) -> Tuple[CallDetails, np.ndarray, bool]
+    """
+    Converts (value, weight) pairs into parameters for the kernel call.
+    Returns a CallDetails object indicating the polydispersity, a data object
+    containing the different values, and the magnetic flag indicating whether
+    any magnetic magnitudes are non-zero. Magnetic vectors (M0, phi, theta) are
+    converted to rectangular coordinates (mx, my, mz).
+    """
+    values, weights = zip(*pairs)
+    scalars = [v[0] for v in values]
+    if all(len(w)==1 for w in weights):
+        call_details = mono_details(kernel.info)
+        data = np.array(scalars+scalars+[1]*len(scalars), dtype=kernel.dtype)
+    else:
+        call_details = poly_details(kernel.info, weights)
+        data = np.hstack(scalars+list(values)+list(weights)).astype(kernel.dtype)
+    is_magnetic = convert_magnetism(kernel.info.parameters, data)
+    #call_details.show()
+    return call_details, data, is_magnetic
+def convert_magnetism(parameters, values):
+    """
+    Convert magnetism in value table from polar to rectangular coordinates.
+    Returns True if any magnetism is present.
+    """
+    mag = values[parameters.nvalues-3*parameters.nmagnetic:parameters.nvalues]
+    mag = mag.reshape(-1, 3)
+    M0 = mag[:,0]
+    if np.any(M0):
+        theta, phi = mag[:,1]*pi/180., mag[:,2]*pi/180.
+        cos_theta = cos(theta)
+        mx = M0*cos_theta*cos(phi)
+        my = M0*sin(theta)
+        mz = -M0*cos_theta*sin(phi)
+        mag[:,0], mag[:,1], mag[:,2] = mx, my, mz
+        return True
+    else:
+        return False

sasmodels/direct_model.py

-                      r32e3c9b
+                      r9eb3632
 from . import resolution
 from . import resolution2d
 from . import kernel
+from .details import build_details
 try:
 …
         active = lambda name: True
+    #print("pars",[p.id for p in parameters.call_parameters])
     vw_pairs = [(get_weights(p, pars) if active(p.name)
                  else ([pars.get(p.name, p.default)], [1.0]))
                 for p in parameters.call_parameters]
+    call_details, values = kernel.build_details(calculator, vw_pairs)
+    magnetic = any(values[k]!=0 for k in parameters.magnetism_index)
+    call_details, values, is_magnetic = build_details(calculator, vw_pairs)
     #print("values:", values)
     return calculator(call_details, values, cutoff, magnetic)
+    return calculator(call_details, values, cutoff, is_magnetic)
 def get_weights(parameter, values):

sasmodels/generate.py

-                      rb966a96
+                      r9eb3632
     source.append("#define MAX_PD %s"%partable.max_pd)
     source.append("#define NPARS %d"%partable.npars)
+    source.append("#define NUM_MAGNETIC %d" % len(magpars))
+    source.append("#define NUM_MAGNETIC %d" % partable.nmagnetic)
+    source.append("#define NUM_VALUES %d" % partable.nvalues)
     source.append("#define MAGNETIC_PARS %s"%",".join(str(k) for k in magpars))
 …
     code = kernel[0]
     path = kernel[1].replace('\\', '\\\\')
     source = [
+    iq = [
         # define the Iq kernel
         "#define KERNEL_NAME %s_Iq" % name,
 …
         "#undef CALL_IQ",
         "#undef KERNEL_NAME",
+        ]
+    iqxy = [
         # define the Iqxy kernel from the same source with different #defines
         "#define KERNEL_NAME %s_Iqxy" % name,
 …
         "#undef CALL_IQ",
         "#undef KERNEL_NAME",
+         ]
+    imagnetic = [
         # define the Imagnetic kernel
         "#define KERNEL_NAME %s_Imagnetic" % name,
 …
         "#undef KERNEL_NAME",
+    ]
     return source
+    return iq+iqxy+imagnetic

sasmodels/kernel.py

-                      r32e3c9b
+                      r9eb3632
 import numpy as np
-from .details import mono_details, poly_details
 try:
 …
         # type: () -> None
         pass
-try:
-    np.meshgrid([])
-    meshgrid = np.meshgrid
-except ValueError:
-    # CRUFT: np.meshgrid requires multiple vectors
-    def meshgrid(*args):
-        if len(args) > 1:
-            return np.meshgrid(*args)
-        else:
-            return [np.asarray(v) for v in args]
-def dispersion_mesh(model_info, pars):
-    """
-    Create a mesh grid of dispersion parameters and weights.
-    Returns [p1,p2,...],w where pj is a vector of values for parameter j
-    and w is a vector containing the products for weights for each
-    parameter set in the vector.
-    """
-    value, weight = zip(*pars)
-    weight = [w if w else [1.] for w in weight]
-    weight = np.vstack([v.flatten() for v in meshgrid(*weight)])
-    weight = np.prod(weight, axis=0)
-    value = [v.flatten() for v in meshgrid(*value)]
-    lengths = [par.length for par in model_info.parameters.kernel_parameters
-               if par.type == 'volume']
-    if any(n > 1 for n in lengths):
-        pars = []
-        offset = 0
-        for n in lengths:
-            pars.append(np.vstack(value[offset:offset+n]) if n > 1 else value[offset])
-            offset += n
-        value = pars
-    return value, weight
-def build_details(kernel, pairs):
-    # type: (Kernel, Tuple[List[np.ndarray], List[np.ndarray]]) -> Tuple[CallDetails, np.ndarray, np.ndarray]
-    """
-    Construct the kernel call details object for calling the particular kernel.
-    """
-    values, weights = zip(*pairs)
-    scalars = [v[0] for v in values]
-    if all(len(w)==1 for w in weights):
-        call_details = mono_details(kernel.info)
-        data = np.array(scalars, dtype=kernel.dtype)
-    else:
-        call_details = poly_details(kernel.info, weights)
-        data = np.hstack(scalars+list(values)+list(weights)).astype(kernel.dtype)
-    return call_details, data

sasmodels/kernel_iq.c

-                      rb966a96
+                      r9eb3632
 typedef struct {
     PARAMETER_TABLE
+    PARAMETER_TABLE;
 } ParameterBlock;
+#endif
+#ifdef MAGNETIC
+const int32_t magnetic[] = { MAGNETIC_PARS };
+#endif
+#endif // _PAR_BLOCK_
 #ifdef MAGNETIC
 …
+}
+// Convert polar to rectangular coordinates.
+static void polrec(double r, double theta, double phi,
+  double *x, double *y, double *z)
+{
+  double cos_theta, sin_theta, cos_phi, sin_phi;
+  SINCOS(theta*M_PI_180, sin_theta, cos_theta);
+  SINCOS(phi*M_PI_180, sin_phi, cos_phi);
+  *x = r * cos_theta * cos_phi;
+  *y = r * sin_theta;
+  *z = -r * cos_theta * sin_phi;
+}
+#endif
+#endif // MAGNETIC
 kernel
 …
     const int32_t pd_stop,      // where we are stopping in the polydispersity loop
     global const ProblemDetails *details,
+   // global const  // TODO: make it const again!
+    double *values,
+    global const double *values,
     global const double *q, // nq q values, with padding to boundary
     global double *result,  // nq+3 return values, again with padding
+    global double *result,  // nq+1 return values, again with padding
     const double cutoff     // cutoff in the polydispersity weight product
+    )
+{
   // Storage for the current parameter values.  These will be updated as we
+  // walk the polydispersity cube.
+  ParameterBlock local_values;  // current parameter values
+  double *pvec = (double *)(&local_values);  // Alias named parameters with a vector
+  // walk the polydispersity cube.  local_values will be aliased to pvec.
+  ParameterBlock local_values;
+  double *pvec = (double *)&local_values;
+#ifdef MAGNETIC
+  // Location of the sld parameters in the parameter pvec.
+  // These parameters are updated with the effective sld due to magnetism.
+  const int32_t slds[] = { MAGNETIC_PARS };
+  const double up_frac_i = values[NPARS+2];
+  const double up_frac_f = values[NPARS+3];
+  const double up_angle = values[NPARS+4];
+  #define MX(_k) (values[NPARS+5+3*_k])
+  #define MY(_k) (values[NPARS+6+3*_k])
+  #define MZ(_k) (values[NPARS+7+3*_k])
+  // TODO: could precompute these outside of the kernel.
+  // Interpret polarization cross section.
+  double uu, dd, ud, du;
+  double cos_mspin, sin_mspin;
+  spins(up_frac_i, up_frac_f, &uu, &dd, &ud, &du);
+  SINCOS(-up_angle*M_PI_180, sin_mspin, cos_mspin);
+#endif // MAGNETIC
   // Fill in the initial variables
 …
   #pragma omp parallel for
   #endif
+  for (int k=0; k < NPARS; k++) {
+    pvec[k] = values[k+2];
+  }
+#ifdef MAGNETIC
+  const double up_frac_i = values[NPARS+2];
+  const double up_frac_f = values[NPARS+3];
+  const double up_angle = values[NPARS+4];
+  #define MX(_k) (values[NPARS+5+3*_k])
+  #define MY(_k) (values[NPARS+6+3*_k])
+  #define MZ(_k) (values[NPARS+7+3*_k])
+  // TODO: precompute this on the python side
+  // Convert polar to rectangular coordinates in place.
+  if (pd_start == 0) {  // Update in place; only do this for the first hunk!
+//printf("spin: %g %g %g\n", up_frac_i, up_frac_f, up_angle);
+    for (int mag=0; mag < NUM_MAGNETIC; mag++) {
+//printf("mag %d: %g %g %g\n", mag, MX(mag), MY(mag), MZ(mag));
+        polrec(MX(mag), MY(mag), MZ(mag), &MX(mag), &MY(mag), &MZ(mag));
+//printf("   ==>: %g %g %g\n", MX(mag), MY(mag), MZ(mag));
+    }
+  }
+  // Interpret polarization cross section.
+  double uu, dd, ud, du;
+  double cos_mspin, sin_mspin;
+  spins(up_frac_i, up_frac_f, &uu, &dd, &ud, &du);
+  SINCOS(-up_angle*M_PI_180, sin_mspin, cos_mspin);
+#endif
+  // Monodisperse computation
+  if (details->num_active == 0) {
+    double norm, scale, background;
+    #ifdef INVALID
+    if (INVALID(local_values)) { return; }
+    #endif
+    norm = CALL_VOLUME(local_values);
+    scale = values[0];
+    background = values[1];
+  for (int i=0; i < NPARS; i++) {
+    pvec[i] = values[2+i];
+//printf("p%d = %g\n",i, pvec[i]);
+  }
+  double pd_norm;
+//printf("start: %d %d\n",pd_start, pd_stop);
+  if (pd_start == 0) {
+    pd_norm = 0.0;
     #ifdef USE_OPENMP
     #pragma omp parallel for
     #endif
+    for (int q_index=0; q_index < nq; q_index++) {
+    for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
+//printf("initializing %d\n", nq);
+  } else {
+    pd_norm = result[nq];
+  }
+//printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
+  global const double *pd_value = values + NUM_VALUES + 2;
+  global const double *pd_weight = pd_value + details->pd_sum;
+  // Jump into the middle of the polydispersity loop
+#if MAX_PD>4
+  int n4=details->pd_length[4];
+  int i4=(pd_start/details->pd_stride[4])%n4;
+  const int p4=details->pd_par[4];
+  global const double *v4 = pd_value + details->pd_offset[4];
+  global const double *w4 = pd_weight + details->pd_offset[4];
+#endif
+#if MAX_PD>3
+  int n3=details->pd_length[3];
+  int i3=(pd_start/details->pd_stride[3])%n3;
+  const int p3=details->pd_par[3];
+  global const double *v3 = pd_value + details->pd_offset[3];
+  global const double *w3 = pd_weight + details->pd_offset[3];
+//printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES);
+#endif
+#if MAX_PD>2
+  int n2=details->pd_length[2];
+  int i2=(pd_start/details->pd_stride[2])%n2;
+  const int p2=details->pd_par[2];
+  global const double *v2 = pd_value + details->pd_offset[2];
+  global const double *w2 = pd_weight + details->pd_offset[2];
+#endif
+#if MAX_PD>1
+  int n1=details->pd_length[1];
+  int i1=(pd_start/details->pd_stride[1])%n1;
+  const int p1=details->pd_par[1];
+  global const double *v1 = pd_value + details->pd_offset[1];
+  global const double *w1 = pd_weight + details->pd_offset[1];
+#endif
+#if MAX_PD>0
+  int n0=details->pd_length[0];
+  int i0=(pd_start/details->pd_stride[0])%n0;
+  const int p0=details->pd_par[0];
+  global const double *v0 = pd_value + details->pd_offset[0];
+  global const double *w0 = pd_weight + details->pd_offset[0];
+//printf("w0:%p, values:%p, diff:%d, %d\n",w0,values,(w0-values),NUM_VALUES);
+#endif
+  double spherical_correction=1.0;
+  const int theta_par = details->theta_par;
+#if MAX_PD>0
+  const int fast_theta = (theta_par == p0);
+  const int slow_theta = (theta_par >= 0 && !fast_theta);
+#else
+  const int slow_theta = (theta_par >= 0);
+#endif
+  int step = pd_start;
+#if MAX_PD>4
+  const double weight5 = 1.0;
+  while (i4 < n4) {
+    pvec[p4] = v4[i4];
+    double weight4 = w4[i4] * weight5;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, pvec[p4], weight4);
+#elif MAX_PD>3
+    const double weight4 = 1.0;
+#endif
+#if MAX_PD>3
+  while (i3 < n3) {
+    pvec[p3] = v3[i3];
+    double weight3 = w3[i3] * weight4;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, pvec[p3], weight3);
+#elif MAX_PD>2
+    const double weight3 = 1.0;
+#endif
+#if MAX_PD>2
+  while (i2 < n2) {
+    pvec[p2] = v2[i2];
+    double weight2 = w2[i2] * weight3;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, pvec[p2], weight2);
+#elif MAX_PD>1
+    const double weight2 = 1.0;
+#endif
+#if MAX_PD>1
+  while (i1 < n1) {
+    pvec[p1] = v1[i1];
+    double weight1 = w1[i1] * weight2;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, pvec[p1], weight1);
+#elif MAX_PD>0
+    const double weight1 = 1.0;
+#endif
+    if (slow_theta) { // Theta is not in inner loop
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[theta_par])), 1.e-6);
+    }
+#if MAX_PD>0
+  while(i0 < n0) {
+    pvec[p0] = v0[i0];
+    double weight0 = w0[i0] * weight1;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, pvec[p0], weight0);
+    if (fast_theta) { // Theta is in inner loop
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0])), 1.e-6);
+    }
+#else
+    const double weight0 = 1.0;
+#endif
+//printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NPARS; i++) printf("p%d=%g ",i, pvec[i]); printf("\n");
+//printf("sphcor: %g\n", spherical_correction);
+    #ifdef INVALID
+    if (!INVALID(local_values))
+    #endif
+    {
+      // Accumulate I(q)
+      // Note: weight==0 must always be excluded
+      if (weight0 > cutoff) {
+        // spherical correction has some nasty effects when theta is +90 or -90
+        // where it becomes zero.
+        const double weight = weight0 * spherical_correction;
+        pd_norm += weight * CALL_VOLUME(local_values);
+        #ifdef USE_OPENMP
+        #pragma omp parallel for
+        #endif
+        for (int q_index=0; q_index<nq; q_index++) {
 #ifdef MAGNETIC
+      const double qx = q[2*q_index];
+      const double qy = q[2*q_index+1];
+      const double qsq = qx*qx + qy*qy;
+      // Constant across orientation, polydispersity for given qx, qy
+      double px, py, pz;
+      if (qsq > 1e-16) {
+        px = (qy*cos_mspin + qx*sin_mspin)/qsq;
+        py = (qy*sin_mspin - qx*cos_mspin)/qsq;
+        pz = 1.0;
+      } else {
+        px = py = pz = 0.0;
+      }
+      double scattering = 0.0;
+      if (uu > 1e-8) {
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            const double perp = (qy*MX(mag) - qx*MY(mag));
+            pvec[magnetic[mag]] = (values[magnetic[mag]+2] - perp*px)*uu;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+      }
+      if (dd > 1e-8){
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            const double perp = (qy*MX(mag) - qx*MY(mag));
+            pvec[magnetic[mag]] = (values[magnetic[mag]+2] + perp*px)*dd;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+      }
+      if (ud > 1e-8){
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            const double perp = (qy*MX(mag) - qx*MY(mag));
+            pvec[magnetic[mag]] = perp*py*ud;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            pvec[magnetic[mag]] = MZ(mag)*pz*ud;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+      }
+      if (du > 1e-8) {
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            const double perp = (qy*MX(mag) - qx*MY(mag));
+            pvec[magnetic[mag]] = perp*py*du;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            pvec[magnetic[mag]] = -MZ(mag)*pz*du;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+      }
+#else
+      double scattering = CALL_IQ(q, q_index, local_values);
+#endif
+      result[q_index] = (norm>0. ? scale*scattering/norm + background : background);
+    }
+    return;
+  }
+#if MAX_PD > 0
+#if MAGNETIC
+  const double *pd_value = values+2+NPARS+3+3*NUM_MAGNETIC;
+#else
+  const double *pd_value = values+2+NPARS;
+#endif
+  const double *pd_weight = pd_value+details->pd_sum;
+  // need product of weights at every Iq calc, so keep product of
+  // weights from the outer loops so that weight = partial_weight * fast_weight
+  double pd_norm;
+  double partial_weight; // product of weight w4*w3*w2 but not w1
+  double spherical_correction; // cosine correction for latitude variation
+  double weight; // product of partial_weight*w1*spherical_correction
+  // Number of elements in the longest polydispersity loop
+  const int p0_par = details->pd_par[0];
+  const int p0_length = details->pd_length[0];
+  const int p0_offset = details->pd_offset[0];
+  const int p0_is_theta = (p0_par == details->theta_par);
+  int p0_index;
+  // Trigger the reset behaviour that happens at the end the fast loop
+  // by setting the initial index >= weight vector length.
+  p0_index = p0_length;
+  // Default the spherical correction to 1.0 in case it is not otherwise set
+  spherical_correction = 1.0;
+  // Since we are no longer looping over the entire polydispersity hypercube
+  // for each q, we need to track the result and normalization values between
+  // calls.  This means initializing them to 0 at the start and accumulating
+  // them between calls.
+  pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
+  if (pd_start == 0) {
+    #ifdef USE_OPENMP
+    #pragma omp parallel for
+    #endif
+    for (int q_index=0; q_index < nq; q_index++) {
+      result[q_index] = 0.0;
+    }
+  }
+  // Loop over the weights then loop over q, accumulating values
+  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
+    // check if fast loop needs to be reset
+    if (p0_index == p0_length) {
+      // Compute position in polydispersity hypercube and partial weight
+      partial_weight = 1.0;
+      for (int k=1; k < details->num_active; k++) {
+        int pk = details->pd_par[k];
+        int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k];
+        pvec[pk] = pd_value[index];
+        partial_weight *= pd_weight[index];
+        if (pk == details->theta_par) {
+          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6);
+          const double qx = q[2*q_index];
+          const double qy = q[2*q_index+1];
+          const double qsq = qx*qx + qy*qy;
+          // Constant across orientation, polydispersity for given qx, qy
+          double px, py, pz;
+          if (qsq > 1.e-16) {
+            px = (qy*cos_mspin + qx*sin_mspin)/qsq;
+            py = (qy*sin_mspin - qx*cos_mspin)/qsq;
+            pz = 1.0;
+          } else {
+            px = py = pz = 0.0;
+          }
+          double scattering = 0.0;
+          if (uu > 1.e-8) {
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = (values[slds[sk]+2] - perp*px)*uu;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (dd > 1.e-8){
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = (values[slds[sk]+2] + perp*px)*dd;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (ud > 1.e-8){
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = perp*py*ud;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                pvec[slds[sk]] = MZ(sk)*pz*ud;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (du > 1.e-8) {
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = perp*py*du;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                pvec[slds[sk]] = -MZ(sk)*pz*du;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+#else  // !MAGNETIC
+          const double scattering = CALL_IQ(q, q_index, local_values);
+#endif // !MAGNETIC
+//printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0);
+          result[q_index] += weight * scattering;
+        }
+      }
-      p0_index = loop_index%p0_length;
+    }
+    // Update parameter p0
+    weight = partial_weight*pd_weight[p0_offset + p0_index];
+    pvec[p0_par] = pd_value[p0_offset + p0_index];
+    if (p0_is_theta) {
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6);
+    }
+    p0_index++;
+    #ifdef INVALID
+    if (INVALID(local_values)) continue;
+    #endif
+    // Accumulate I(q)
+    // Note: weight==0 must always be excluded
+    if (weight > cutoff) {
+      // spherical correction has some nasty effects when theta is +90 or -90
+      // where it becomes zero.  If the entirety of the correction
+      weight *= spherical_correction;
+      pd_norm += weight * CALL_VOLUME(local_values);
+      #ifdef USE_OPENMP
+      #pragma omp parallel for
+      #endif
+      for (int q_index=0; q_index < nq; q_index++) {
+#ifdef MAGNETIC
+        const double qx = q[2*q_index];
+        const double qy = q[2*q_index+1];
+        const double qsq = qx*qx + qy*qy;
+        // Constant across orientation, polydispersity for given qx, qy
+        double px, py, pz;
+        if (qsq > 1e-16) {
+          px = (qy*cos_mspin + qx*sin_mspin)/qsq;
+          py = (qy*sin_mspin - qx*cos_mspin)/qsq;
+          pz = 1.0;
+        } else {
+          px = py = pz = 0.0;
+        }
+        double scattering = 0.0;
+        if (uu > 1e-8) {
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              const double perp = (qy*MX(mag) - qx*MY(mag));
+              pvec[magnetic[mag]] = (values[magnetic[mag]+2] - perp*px)*uu;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+        }
+        if (dd > 1e-8){
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              const double perp = (qy*MX(mag) - qx*MY(mag));
+              pvec[magnetic[mag]] = (values[magnetic[mag]+2] + perp*px)*dd;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+        }
+        if (ud > 1e-8){
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              const double perp = (qy*MX(mag) - qx*MY(mag));
+              pvec[magnetic[mag]] = perp*py*ud;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              pvec[magnetic[mag]] = MZ(mag)*pz*ud;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+        }
+        if (du > 1e-8) {
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              const double perp = (qy*MX(mag) - qx*MY(mag));
+              pvec[magnetic[mag]] = perp*py*du;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              pvec[magnetic[mag]] = -MZ(mag)*pz*du;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+        }
+#else
+        double scattering = CALL_IQ(q, q_index, local_values);
+#endif
+        result[q_index] += weight*scattering;
+      }
+    }
+  }
+  if (pd_stop >= details->pd_prod) {
+    // End of the PD loop we can normalize
+    double scale, background;
+    scale = values[0];
+    background = values[1];
+    #ifdef USE_OPENMP
+    #pragma omp parallel for
+    #endif
+    for (int q_index=0; q_index < nq; q_index++) {
+      result[q_index] = (pd_norm>0. ? scale*result[q_index]/pd_norm + background : background);
+    }
+  }
+    ++step;
+#if MAX_PD>0
+    if (step >= pd_stop) break;
+    ++i0;
+  }
+  i0 = 0;
+#endif
+#if MAX_PD>1
+    if (step >= pd_stop) break;
+    ++i1;
+  }
+  i1 = 0;
+#endif
+#if MAX_PD>2
+    if (step >= pd_stop) break;
+    ++i2;
+  }
+  i2 = 0;
+#endif
+#if MAX_PD>3
+    if (step >= pd_stop) break;
+    ++i3;
+  }
+  i3 = 0;
+#endif
+#if MAX_PD>4
+    if (step >= pd_stop) break;
+    ++i4;
+  }
+  i4 = 0;
+#endif
+//printf("res: %g/%g\n", result[0], pd_norm);
   // Remember the updated norm.
   result[nq] = pd_norm;
-#endif // MAX_PD > 0
+}

sasmodels/kernel_iq.cl

-                      rb9c12fe5
+                      r9eb3632
     PARAMETER_TABLE;
 } ParameterBlock;
+#endif
+#endif // _PAR_BLOCK_
+#ifdef MAGNETIC
+// Return value restricted between low and high
+static double clip(double value, double low, double high)
+{
+  return (value < low ? low : (value > high ? high : value));
+}
+// Compute spin cross sections given in_spin and out_spin
+// To convert spin cross sections to sld b:
+//     uu * (sld - m_sigma_x);
+//     dd * (sld + m_sigma_x);
+//     ud * (m_sigma_y + 1j*m_sigma_z);
+//     du * (m_sigma_y - 1j*m_sigma_z);
+static void spins(double in_spin, double out_spin,
+    double *uu, double *dd, double *ud, double *du)
+{
+  in_spin = clip(in_spin, 0.0, 1.0);
+  out_spin = clip(out_spin, 0.0, 1.0);
+  *uu = sqrt(sqrt(in_spin * out_spin));
+  *dd = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin)));
+  *ud = sqrt(sqrt(in_spin * (1.0-out_spin)));
+  *du = sqrt(sqrt((1.0-in_spin) * out_spin));
+}
+#endif // MAGNETIC
 kernel
 …
+    )
+{
+  // who we are and what element we are working with
+  const int q_index = get_global_id(0);
+  if (q_index >= nq) return;
   // Storage for the current parameter values.  These will be updated as we
   // walk the polydispersity cube.  local_values will be aliased to pvec.
 …
   double *pvec = (double *)&local_values;
-  // who we are and what element we are working with
-  const int q_index = get_global_id(0);
   // Fill in the initial variables
   for (int i=0; i < NPARS; i++) {
     pvec[i] = values[2+i];
+  }
+  // Monodisperse computation
+  if (details->num_active == 0) {
+    double norm, scale, background;
+    // TODO: only needs to be done by one process...
+//if (q_index==0) printf("p%d = %g\n",i, pvec[i]);
+  }
+#ifdef MAGNETIC
+  // Location of the sld parameters in the parameter pvec.
+  // These parameters are updated with the effective sld due to magnetism.
+  const int32_t slds[] = { MAGNETIC_PARS };
+  const double up_frac_i = values[NPARS+2];
+  const double up_frac_f = values[NPARS+3];
+  const double up_angle = values[NPARS+4];
+  #define MX(_k) (values[NPARS+5+3*_k])
+  #define MY(_k) (values[NPARS+6+3*_k])
+  #define MZ(_k) (values[NPARS+7+3*_k])
+  // TODO: could precompute these outside of the kernel.
+  // Interpret polarization cross section.
+  double uu, dd, ud, du;
+  double cos_mspin, sin_mspin;
+  spins(up_frac_i, up_frac_f, &uu, &dd, &ud, &du);
+  SINCOS(-up_angle*M_PI_180, sin_mspin, cos_mspin);
+#endif // MAGNETIC
+  double pd_norm, this_result;
+  if (pd_start == 0) {
+    pd_norm = this_result = 0.0;
+  } else {
+    pd_norm = result[nq];
+    this_result = result[q_index];
+  }
+//if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, this_result);
+  global const double *pd_value = values + NUM_VALUES + 2;
+  global const double *pd_weight = pd_value + details->pd_sum;
+  // Jump into the middle of the polydispersity loop
+#if MAX_PD>4
+  int n4=details->pd_length[4];
+  int i4=(pd_start/details->pd_stride[4])%n4;
+  const int p4=details->pd_par[4];
+  global const double *v4 = pd_value + details->pd_offset[4];
+  global const double *w4 = pd_weight + details->pd_offset[4];
+#endif
+#if MAX_PD>3
+  int n3=details->pd_length[3];
+  int i3=(pd_start/details->pd_stride[3])%n3;
+  const int p3=details->pd_par[3];
+  global const double *v3 = pd_value + details->pd_offset[3];
+  global const double *w3 = pd_weight + details->pd_offset[3];
+//if (q_index==0) printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES);
+#endif
+#if MAX_PD>2
+  int n2=details->pd_length[2];
+  int i2=(pd_start/details->pd_stride[2])%n2;
+  const int p2=details->pd_par[2];
+  global const double *v2 = pd_value + details->pd_offset[2];
+  global const double *w2 = pd_weight + details->pd_offset[2];
+#endif
+#if MAX_PD>1
+  int n1=details->pd_length[1];
+  int i1=(pd_start/details->pd_stride[1])%n1;
+  const int p1=details->pd_par[1];
+  global const double *v1 = pd_value + details->pd_offset[1];
+  global const double *w1 = pd_weight + details->pd_offset[1];
+#endif
+#if MAX_PD>0
+  int n0=details->pd_length[0];
+  int i0=(pd_start/details->pd_stride[0])%n0;
+  const int p0=details->pd_par[0];
+  global const double *v0 = pd_value + details->pd_offset[0];
+  global const double *w0 = pd_weight + details->pd_offset[0];
+#endif
+  double spherical_correction=1.0;
+  const int theta_par = details->theta_par;
+#if MAX_PD>0
+  const int fast_theta = (theta_par == p0);
+  const int slow_theta = (theta_par >= 0 && !fast_theta);
+#else
+  const int slow_theta = (theta_par >= 0);
+#endif
+  int step = pd_start;
+#if MAX_PD>4
+  const double weight5 = 1.0;
+  while (i4 < n4) {
+    pvec[p4] = v4[i4];
+    double weight4 = w4[i4] * weight5;
+//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, pvec[p4], weight4);
+#elif MAX_PD>3
+    const double weight4 = 1.0;
+#endif
+#if MAX_PD>3
+  while (i3 < n3) {
+    pvec[p3] = v3[i3];
+    double weight3 = w3[i3] * weight4;
+//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, pvec[p3], weight3);
+#elif MAX_PD>2
+    const double weight3 = 1.0;
+#endif
+#if MAX_PD>2
+  while (i2 < n2) {
+    pvec[p2] = v2[i2];
+    double weight2 = w2[i2] * weight3;
+//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, pvec[p2], weight2);
+#elif MAX_PD>1
+    const double weight2 = 1.0;
+#endif
+#if MAX_PD>1
+  while (i1 < n1) {
+    pvec[p1] = v1[i1];
+    double weight1 = w1[i1] * weight2;
+//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, pvec[p1], weight1);
+#elif MAX_PD>0
+    const double weight1 = 1.0;
+#endif
+    if (slow_theta) { // Theta is not in inner loop
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[theta_par])), 1.e-6);
+    }
+#if MAX_PD>0
+  while(i0 < n0) {
+    pvec[p0] = v0[i0];
+    double weight0 = w0[i0] * weight1;
+//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, pvec[p0], weight0);
+    if (fast_theta) { // Theta is in inner loop
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0])), 1.e-6);
+    }
+#else
+    const double weight0 = 1.0;
+#endif
+//if (q_index == 0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NPARS; i++) printf("p%d=%g ",i, pvec[i]); printf("\n"); }
+//if (q_index == 0) printf("sphcor: %g\n", spherical_correction);
     #ifdef INVALID
     if (INVALID(local_values)) { return; }
+    if (!INVALID(local_values))
     #endif
+    norm = CALL_VOLUME(local_values);
+    scale = values[0];
+    background = values[1];
+    if (q_index < nq) {
+      double scattering = CALL_IQ(q, q_index, local_values);
+      result[q_index] = (norm>0. ? scale*scattering/norm + background : background);
+    {
+      // Accumulate I(q)
+      // Note: weight==0 must always be excluded
+      if (weight0 > cutoff) {
+        // spherical correction has some nasty effects when theta is +90 or -90
+        // where it becomes zero.
+        const double weight = weight0 * spherical_correction;
+        pd_norm += weight * CALL_VOLUME(local_values);
+#ifdef MAGNETIC
+          const double qx = q[2*q_index];
+          const double qy = q[2*q_index+1];
+          const double qsq = qx*qx + qy*qy;
+          // Constant across orientation, polydispersity for given qx, qy
+          double px, py, pz;
+          if (qsq > 1.e-16) {
+            px = (qy*cos_mspin + qx*sin_mspin)/qsq;
+            py = (qy*sin_mspin - qx*cos_mspin)/qsq;
+            pz = 1.0;
+          } else {
+            px = py = pz = 0.0;
+          }
+          double scattering = 0.0;
+          if (uu > 1.e-8) {
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = (values[slds[sk]+2] - perp*px)*uu;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (dd > 1.e-8){
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = (values[slds[sk]+2] + perp*px)*dd;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (ud > 1.e-8){
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = perp*py*ud;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                pvec[slds[sk]] = MZ(sk)*pz*ud;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (du > 1.e-8) {
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = perp*py*du;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                pvec[slds[sk]] = -MZ(sk)*pz*du;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+#else  // !MAGNETIC
+        const double scattering = CALL_IQ(q, q_index, local_values);
+#endif // !MAGNETIC
+        this_result += weight * scattering;
+      }
+    }
+    return;
+  }
+#if MAX_PD > 0
+  double this_result;
+  //printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop);
+  global const double *pd_value = values+2+NPARS;
+  global const double *pd_weight = pd_value+details->pd_sum;
+  // need product of weights at every Iq calc, so keep product of
+  // weights from the outer loops so that weight = partial_weight * fast_weight
+  double pd_norm;
+  double partial_weight; // product of weight w4*w3*w2 but not w1
+  double spherical_correction; // cosine correction for latitude variation
+  double weight; // product of partial_weight*w1*spherical_correction
+  int p0_par;
+  int p0_length;
+  int p0_offset;
+  int p0_is_theta;
+  int p0_index;
+  // Number of elements in the longest polydispersity loop
+  p0_par = details->pd_par[0];
+  p0_length = details->pd_length[0];
+  p0_offset = details->pd_offset[0];
+  p0_is_theta = (p0_par == details->theta_par);
+  // Trigger the reset behaviour that happens at the end the fast loop
+  // by setting the initial index >= weight vector length.
+  p0_index = p0_length;
+  // Default the spherical correction to 1.0 in case it is not otherwise set
+  spherical_correction = 1.0;
+  weight=1.0;
+  // Since we are no longer looping over the entire polydispersity hypercube
+  // for each q, we need to track the result and normalization values between
+  // calls.  This means initializing them to 0 at the start and accumulating
+  // them between calls.
+  pd_norm = pd_start == 0 ? 0.0 : result[nq];
+  if (q_index < nq) {
+    this_result = pd_start == 0 ? 0.0 : result[q_index];
+  }
+  // Loop over the weights then loop over q, accumulating values
+  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
+    // check if fast loop needs to be reset
+    if (p0_index == p0_length) {
+      //printf("should be here with %d active\n", num_active);
+      // Compute position in polydispersity hypercube and partial weight
+      partial_weight = 1.0;
+      for (int k=1; k < details->num_active; k++) {
+        int pk = details->pd_par[k];
+        int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k];
+        pvec[pk] = pd_value[index];
+        partial_weight *= pd_weight[index];
+        //printf("index[%d] = %d\n",k,index);
+        if (pk == details->theta_par) {
+          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6);
+        }
+      }
+      p0_index = loop_index%p0_length;
+      //printf("\n");
+    }
+    // Update parameter p0
+    weight = partial_weight*pd_weight[p0_offset + p0_index];
+    pvec[p0_par] = pd_value[p0_offset + p0_index];
+    if (p0_is_theta) {
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6);
+    }
+    p0_index++;
+    //printf("\n");
+    // Increment fast index
+    #ifdef INVALID
+    if (INVALID(local_values)) continue;
+    #endif
+    // Accumulate I(q)
+    // Note: weight==0 must always be excluded
+    if (weight > cutoff) {
+      // spherical correction has some nasty effects when theta is +90 or -90
+      // where it becomes zero.  If the entirety of the correction
+      weight *= spherical_correction;
+      pd_norm += weight * CALL_VOLUME(local_values);
+      const double scattering = CALL_IQ(q, q_index, local_values);
+      this_result += weight*scattering;
+    }
+  }
+  if (q_index < nq) {
+    if (pd_stop >= details->pd_prod) {
+      // End of the PD loop we can normalize
+      double scale, background;
+      scale = values[0];
+      background = values[1];
+      result[q_index] = (pd_norm>0. ? scale*this_result/pd_norm + background : background);
+    } else {
+      // Partial result, so remember it but don't normalize it.
+      result[q_index] = this_result;
+    }
+    // Remember the updated norm.
+    if (q_index == 0) result[nq] = pd_norm;
+  }
+#endif // MAX_PD > 0
+    ++step;
+#if MAX_PD>0
+    if (step >= pd_stop) break;
+    ++i0;
+  }
+  i0 = 0;
+#endif
+#if MAX_PD>1
+    if (step >= pd_stop) break;
+    ++i1;
+  }
+  i1 = 0;
+#endif
+#if MAX_PD>2
+    if (step >= pd_stop) break;
+    ++i2;
+  }
+  i2 = 0;
+#endif
+#if MAX_PD>3
+    if (step >= pd_stop) break;
+    ++i3;
+  }
+  i3 = 0;
+#endif
+#if MAX_PD>4
+    if (step >= pd_stop) break;
+    ++i4;
+  }
+  i4 = 0;
+#endif
+//if (q_index==0) printf("res: %g/%g\n", this_result, pd_norm);
+  // Remember the current result and the updated norm.
+  result[q_index] = this_result;
+  if (q_index == 0) result[nq] = pd_norm;
+}

sasmodels/kernelcl.py

-                      r32e3c9b
+                      r9eb3632
         self.program = None
     def make_kernel(self, q_vectors, magnetic=False):
+    def make_kernel(self, q_vectors):
         # type: (List[np.ndarray]) -> "GpuKernel"
         if self.program is None:
 …
             self.program = compiler(self.info.name, self.source,
                                     self.dtype, self.fast)
+            names = [generate.kernel_name(self.info, variant)
+                     for variant in ("Iq", "Iqxy", "Imagnetic")]
+            self._kernels = [getattr(self.program, name) for name in names]
         is_2d = len(q_vectors) == 2
+        variant = "Imagnetic" if magnetic else "Iqxy" if is_2d else "Iq"
+        kernel_name = generate.kernel_name(self.info, variant)
+        kernel = getattr(self.program, kernel_name)
+        kernel = self._kernels[1:3] if is_2d else [self._kernels[0]]*2
         return GpuKernel(kernel, self.dtype, self.info, q_vectors)
 …
                              hostbuf=values)
+        kernel = self.kernel[1 if magnetic else 0]
+        args = [
+            np.uint32(self.q_input.nq), None, None,
+            details_b, values_b, self.q_input.q_b, self.result_b,
+            self.real(cutoff),
+        ]
+        #print("Calling OpenCL")
+        #print("values",values)
         # Call kernel and retrieve results
+        last_call = None
         step = 100
-        #print("calling OpenCL")
         for start in range(0, call_details.pd_prod, step):
             stop = min(start+step, call_details.pd_prod)
+            args = [
+                np.uint32(self.q_input.nq), np.int32(start), np.int32(stop),
+                details_b, values_b, self.q_input.q_b, self.result_b,
+                self.real(cutoff),
+            ]
+            self.kernel(self.queue, self.q_input.global_size, None, *args)
+            #print("queuing",start,stop)
+            args[1:3] = [np.int32(start), np.int32(stop)]
+            last_call = [kernel(self.queue, self.q_input.global_size,
+                                None, *args, wait_for=last_call)]
         cl.enqueue_copy(self.queue, self.result, self.result_b)
 …
             if v is not None: v.release()
+        return self.result[:self.q_input.nq]
+        scale = values[0]/self.result[self.q_input.nq]
+        background = values[1]
+        #print("scale",scale,background)
+        return scale*self.result[:self.q_input.nq] + background
+        # return self.result[:self.q_input.nq]
     def release(self):

sasmodels/kerneldll.py

-                      rb966a96
+                      r9eb3632
         # int, int, int, int*, double*, double*, double*, double*, double
         argtypes = [c_int32]*3 + [c_void_p]*4 + [fp]
+        self._Iq = self._dll[generate.kernel_name(self.info, "Iq")]
+        self._Iqxy = self._dll[generate.kernel_name(self.info, "Iqxy")]
+        self._Imagnetic = self._dll[generate.kernel_name(self.info, "Imagnetic")]
+        self._Iq.argtypes = argtypes
+        self._Iqxy.argtypes = argtypes
+        self._Imagnetic.argtypes = argtypes
+        names = [generate.kernel_name(self.info, variant)
+                 for variant in ("Iq", "Iqxy", "Imagnetic")]
+        self._kernels = [self._dll[name] for name in names]
+        for k in self._kernels:
+            k.argtypes = argtypes
     def __getstate__(self):
 …
         self._dll = None
     def make_kernel(self, q_vectors, magnetic=False):
+    def make_kernel(self, q_vectors):
         # type: (List[np.ndarray]) -> DllKernel
         q_input = PyInput(q_vectors, self.dtype)
 …
         if self._dll is None:
             self._load_dll()
+        kernel = [self._Iqxy, self._Imagnetic] if q_input.is_2d else [self._Iq]*2
+        is_2d = len(q_vectors) == 2
+        kernel = self._kernels[1:3] if is_2d else [self._kernels[0]]*2
         return DllKernel(kernel, self.info, q_input)
 …
         # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray
+        #print("in kerneldll")
+        #print("values", values)
+        start, stop = 0, call_details.pd_prod
+        kernel = self.kernel[1 if magnetic else 0]
         args = [
             self.q_input.nq, # nq
             start, # pd_start
             stop, # pd_stop pd_stride[MAX_PD]
+            None, # pd_start
+            None, # pd_stop pd_stride[MAX_PD]
             call_details.buffer.ctypes.data, # problem
             values.ctypes.data,  #pars
 …
             self.result.ctypes.data,   # results
             self.real(cutoff), # cutoff
+            ]
+        #print("calling DLL")
+        self.kernel[1 if magnetic else 0](*args) # type: ignore
+        return self.result[:-1]
+        ]
+        #print("kerneldll values", values)
+        step = 100
+        for start in range(0, call_details.pd_prod, step):
+            stop = min(start+step, call_details.pd_prod)
+            args[1:3] = [start, stop]
+            #print("calling DLL")
+            kernel(*args) # type: ignore
+        #print("returned",self.q_input.q, self.result)
+        scale = values[0]/self.result[self.q_input.nq]
+        background = values[1]
+        #print("scale",scale,background)
+        return scale*self.result[:self.q_input.nq] + background
     def release(self):

sasmodels/model_test.py

r639c4e3	r9eb3632
51	51
52	52	from .core import list_models, load_model_info, build_model, HAVE_OPENCL
53		from .~~kernel~~ import dispersion_mesh
	53	from .details import dispersion_mesh
54	54	from .direct_model import call_kernel, get_weights
55	55	from .exception import annotate_exception

sasmodels/modelinfo.py

-                      r32e3c9b
+                      r9eb3632
         self.npars = sum(p.length for p in self.kernel_parameters)
+        self.num_magnetic = sum(p.length for p in self.kernel_parameters
+                                if p.type=='sld')
+        self.nmagnetic = sum(p.length for p in self.kernel_parameters
+                             if p.type=='sld')
+        self.nvalues = 2 + self.npars
+        if self.nmagnetic:
+            self.nvalues += 3 + 3*self.nmagnetic
         self.call_parameters = self._get_call_parameters()
 …
         # Add the magnetic parameters to the end of the call parameter list.
         if self.num_magnetic > 0:
+        if self.nmagnetic > 0:
             full_list.extend([
                 Parameter('up:frac_i', '', 0., [0., 1.],

sasmodels/models/hardsphere.py

-                      rd2bb604
+                      r9eb3632
     """
 Iq = """
+Iq = r"""
       double D,A,B,G,X,X2,X4,S,C,FF,HARDSPH;
       // these are c compiler instructions, can also put normal code inside the "if else" structure
 …
       if(fabs(radius_effective) < 1.E-12) {
                HARDSPH=1.0;
+//printf("HS1 %g: %g\n",q,HARDSPH);
                return(HARDSPH);
+      }
 …
       if(X < 5.E-06) {
                  HARDSPH=1./A;
+//printf("HS2 %g: %g\n",q,HARDSPH);
                  return(HARDSPH);
+      }
 …
             //FF = (8 +2.*volfraction + ( volfraction/4. -0.8 +(volfraction/100. -1./35.)*X2 )*X2 )*A  + (3.0 -X2/3. +X4/40.)*2.*B;
             HARDSPH= 1./(1. + volfraction*FF );
+//printf("HS3 %g: %g\n",q,HARDSPH);
             return(HARDSPH);
+      }
 …
 //      FF=A*(S/X3-C/X2) + B*(2.*S/X3 - C/X2 +2.0*(C-1.0)/X4) + G*( (4./X -24./X3)*S -(1.0 -12./X2 +24./X4)*C +24./X4 )/X2;
 //      HARDSPH= 1./(1. + 24.*volfraction*FF );
+//printf("HS4 %g: %g\n",q,HARDSPH);
       return(HARDSPH);
    """

sasmodels/product.py

-                      r0ff62d4
+                      r9eb3632
 from .modelinfo import suffix_parameter, ParameterTable, ModelInfo
+from .kernel import KernelModel, Kernel, dispersion_mesh
+from .kernel import KernelModel, Kernel
+from .details import dispersion_mesh
 try:

sasmodels/sasview_model.py

-                      r6a0d6aa
+                      r9eb3632
 from . import weights
 from . import modelinfo
 from . import kernel
+from .details import build_details, dispersion_mesh
 try:
 …
         parameters = self._model_info.parameters
         pairs = [self._get_weights(p) for p in parameters.call_parameters]
+        call_details, values = kernel.build_details(calculator, pairs)
+        # TODO: should test for 2d?
+        magnetic = any(values[k]!=0 for k in parameters.magnetism_index)
+        call_details, values, is_magnetic = build_details(calculator, pairs)
         result = calculator(call_details, values, cutoff=self.cutoff,
                             magnetic=magnetic)
+                            magnetic=is_magnetic)
         calculator.release()
         return result
 …
                 for p in self._model_info.parameters.call_parameters
                 if p.type == 'volume']
         return kernel.dispersion_mesh(self._model_info, pars)
+        return dispersion_mesh(self._model_info, pars)
     def _get_weights(self, par):

SasView

Changeset 9eb3632 in sasmodels

Legend:

sasmodels/details.py

sasmodels/direct_model.py

sasmodels/generate.py

sasmodels/kernel.py

sasmodels/kernel_iq.c

sasmodels/kernel_iq.cl

sasmodels/kernelcl.py

sasmodels/kerneldll.py

sasmodels/model_test.py

sasmodels/modelinfo.py

sasmodels/models/hardsphere.py

sasmodels/product.py

sasmodels/sasview_model.py

Download in other formats: