-                      rb966a96
+                      r9eb3632
 typedef struct {
     PARAMETER_TABLE
+    PARAMETER_TABLE;
 } ParameterBlock;
+#endif
+#ifdef MAGNETIC
+const int32_t magnetic[] = { MAGNETIC_PARS };
+#endif
+#endif // _PAR_BLOCK_
 #ifdef MAGNETIC
 …
+}
+// Convert polar to rectangular coordinates.
+static void polrec(double r, double theta, double phi,
+  double *x, double *y, double *z)
+{
+  double cos_theta, sin_theta, cos_phi, sin_phi;
+  SINCOS(theta*M_PI_180, sin_theta, cos_theta);
+  SINCOS(phi*M_PI_180, sin_phi, cos_phi);
+  *x = r * cos_theta * cos_phi;
+  *y = r * sin_theta;
+  *z = -r * cos_theta * sin_phi;
+}
+#endif
+#endif // MAGNETIC
 kernel
 …
     const int32_t pd_stop,      // where we are stopping in the polydispersity loop
     global const ProblemDetails *details,
+   // global const  // TODO: make it const again!
+    double *values,
+    global const double *values,
     global const double *q, // nq q values, with padding to boundary
     global double *result,  // nq+3 return values, again with padding
+    global double *result,  // nq+1 return values, again with padding
     const double cutoff     // cutoff in the polydispersity weight product
+    )
+{
   // Storage for the current parameter values.  These will be updated as we
+  // walk the polydispersity cube.
+  ParameterBlock local_values;  // current parameter values
+  double *pvec = (double *)(&local_values);  // Alias named parameters with a vector
+  // walk the polydispersity cube.  local_values will be aliased to pvec.
+  ParameterBlock local_values;
+  double *pvec = (double *)&local_values;
+#ifdef MAGNETIC
+  // Location of the sld parameters in the parameter pvec.
+  // These parameters are updated with the effective sld due to magnetism.
+  const int32_t slds[] = { MAGNETIC_PARS };
+  const double up_frac_i = values[NPARS+2];
+  const double up_frac_f = values[NPARS+3];
+  const double up_angle = values[NPARS+4];
+  #define MX(_k) (values[NPARS+5+3*_k])
+  #define MY(_k) (values[NPARS+6+3*_k])
+  #define MZ(_k) (values[NPARS+7+3*_k])
+  // TODO: could precompute these outside of the kernel.
+  // Interpret polarization cross section.
+  double uu, dd, ud, du;
+  double cos_mspin, sin_mspin;
+  spins(up_frac_i, up_frac_f, &uu, &dd, &ud, &du);
+  SINCOS(-up_angle*M_PI_180, sin_mspin, cos_mspin);
+#endif // MAGNETIC
   // Fill in the initial variables
 …
   #pragma omp parallel for
   #endif
+  for (int k=0; k < NPARS; k++) {
+    pvec[k] = values[k+2];
+  }
+#ifdef MAGNETIC
+  const double up_frac_i = values[NPARS+2];
+  const double up_frac_f = values[NPARS+3];
+  const double up_angle = values[NPARS+4];
+  #define MX(_k) (values[NPARS+5+3*_k])
+  #define MY(_k) (values[NPARS+6+3*_k])
+  #define MZ(_k) (values[NPARS+7+3*_k])
+  // TODO: precompute this on the python side
+  // Convert polar to rectangular coordinates in place.
+  if (pd_start == 0) {  // Update in place; only do this for the first hunk!
+//printf("spin: %g %g %g\n", up_frac_i, up_frac_f, up_angle);
+    for (int mag=0; mag < NUM_MAGNETIC; mag++) {
+//printf("mag %d: %g %g %g\n", mag, MX(mag), MY(mag), MZ(mag));
+        polrec(MX(mag), MY(mag), MZ(mag), &MX(mag), &MY(mag), &MZ(mag));
+//printf("   ==>: %g %g %g\n", MX(mag), MY(mag), MZ(mag));
+    }
+  }
+  // Interpret polarization cross section.
+  double uu, dd, ud, du;
+  double cos_mspin, sin_mspin;
+  spins(up_frac_i, up_frac_f, &uu, &dd, &ud, &du);
+  SINCOS(-up_angle*M_PI_180, sin_mspin, cos_mspin);
+#endif
+  // Monodisperse computation
+  if (details->num_active == 0) {
+    double norm, scale, background;
+    #ifdef INVALID
+    if (INVALID(local_values)) { return; }
+    #endif
+    norm = CALL_VOLUME(local_values);
+    scale = values[0];
+    background = values[1];
+  for (int i=0; i < NPARS; i++) {
+    pvec[i] = values[2+i];
+//printf("p%d = %g\n",i, pvec[i]);
+  }
+  double pd_norm;
+//printf("start: %d %d\n",pd_start, pd_stop);
+  if (pd_start == 0) {
+    pd_norm = 0.0;
     #ifdef USE_OPENMP
     #pragma omp parallel for
     #endif
+    for (int q_index=0; q_index < nq; q_index++) {
+    for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
+//printf("initializing %d\n", nq);
+  } else {
+    pd_norm = result[nq];
+  }
+//printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
+  global const double *pd_value = values + NUM_VALUES + 2;
+  global const double *pd_weight = pd_value + details->pd_sum;
+  // Jump into the middle of the polydispersity loop
+#if MAX_PD>4
+  int n4=details->pd_length[4];
+  int i4=(pd_start/details->pd_stride[4])%n4;
+  const int p4=details->pd_par[4];
+  global const double *v4 = pd_value + details->pd_offset[4];
+  global const double *w4 = pd_weight + details->pd_offset[4];
+#endif
+#if MAX_PD>3
+  int n3=details->pd_length[3];
+  int i3=(pd_start/details->pd_stride[3])%n3;
+  const int p3=details->pd_par[3];
+  global const double *v3 = pd_value + details->pd_offset[3];
+  global const double *w3 = pd_weight + details->pd_offset[3];
+//printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES);
+#endif
+#if MAX_PD>2
+  int n2=details->pd_length[2];
+  int i2=(pd_start/details->pd_stride[2])%n2;
+  const int p2=details->pd_par[2];
+  global const double *v2 = pd_value + details->pd_offset[2];
+  global const double *w2 = pd_weight + details->pd_offset[2];
+#endif
+#if MAX_PD>1
+  int n1=details->pd_length[1];
+  int i1=(pd_start/details->pd_stride[1])%n1;
+  const int p1=details->pd_par[1];
+  global const double *v1 = pd_value + details->pd_offset[1];
+  global const double *w1 = pd_weight + details->pd_offset[1];
+#endif
+#if MAX_PD>0
+  int n0=details->pd_length[0];
+  int i0=(pd_start/details->pd_stride[0])%n0;
+  const int p0=details->pd_par[0];
+  global const double *v0 = pd_value + details->pd_offset[0];
+  global const double *w0 = pd_weight + details->pd_offset[0];
+//printf("w0:%p, values:%p, diff:%d, %d\n",w0,values,(w0-values),NUM_VALUES);
+#endif
+  double spherical_correction=1.0;
+  const int theta_par = details->theta_par;
+#if MAX_PD>0
+  const int fast_theta = (theta_par == p0);
+  const int slow_theta = (theta_par >= 0 && !fast_theta);
+#else
+  const int slow_theta = (theta_par >= 0);
+#endif
+  int step = pd_start;
+#if MAX_PD>4
+  const double weight5 = 1.0;
+  while (i4 < n4) {
+    pvec[p4] = v4[i4];
+    double weight4 = w4[i4] * weight5;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, pvec[p4], weight4);
+#elif MAX_PD>3
+    const double weight4 = 1.0;
+#endif
+#if MAX_PD>3
+  while (i3 < n3) {
+    pvec[p3] = v3[i3];
+    double weight3 = w3[i3] * weight4;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, pvec[p3], weight3);
+#elif MAX_PD>2
+    const double weight3 = 1.0;
+#endif
+#if MAX_PD>2
+  while (i2 < n2) {
+    pvec[p2] = v2[i2];
+    double weight2 = w2[i2] * weight3;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, pvec[p2], weight2);
+#elif MAX_PD>1
+    const double weight2 = 1.0;
+#endif
+#if MAX_PD>1
+  while (i1 < n1) {
+    pvec[p1] = v1[i1];
+    double weight1 = w1[i1] * weight2;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, pvec[p1], weight1);
+#elif MAX_PD>0
+    const double weight1 = 1.0;
+#endif
+    if (slow_theta) { // Theta is not in inner loop
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[theta_par])), 1.e-6);
+    }
+#if MAX_PD>0
+  while(i0 < n0) {
+    pvec[p0] = v0[i0];
+    double weight0 = w0[i0] * weight1;
+//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, pvec[p0], weight0);
+    if (fast_theta) { // Theta is in inner loop
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0])), 1.e-6);
+    }
+#else
+    const double weight0 = 1.0;
+#endif
+//printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NPARS; i++) printf("p%d=%g ",i, pvec[i]); printf("\n");
+//printf("sphcor: %g\n", spherical_correction);
+    #ifdef INVALID
+    if (!INVALID(local_values))
+    #endif
+    {
+      // Accumulate I(q)
+      // Note: weight==0 must always be excluded
+      if (weight0 > cutoff) {
+        // spherical correction has some nasty effects when theta is +90 or -90
+        // where it becomes zero.
+        const double weight = weight0 * spherical_correction;
+        pd_norm += weight * CALL_VOLUME(local_values);
+        #ifdef USE_OPENMP
+        #pragma omp parallel for
+        #endif
+        for (int q_index=0; q_index<nq; q_index++) {
 #ifdef MAGNETIC
+      const double qx = q[2*q_index];
+      const double qy = q[2*q_index+1];
+      const double qsq = qx*qx + qy*qy;
+      // Constant across orientation, polydispersity for given qx, qy
+      double px, py, pz;
+      if (qsq > 1e-16) {
+        px = (qy*cos_mspin + qx*sin_mspin)/qsq;
+        py = (qy*sin_mspin - qx*cos_mspin)/qsq;
+        pz = 1.0;
+      } else {
+        px = py = pz = 0.0;
+      }
+      double scattering = 0.0;
+      if (uu > 1e-8) {
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            const double perp = (qy*MX(mag) - qx*MY(mag));
+            pvec[magnetic[mag]] = (values[magnetic[mag]+2] - perp*px)*uu;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+      }
+      if (dd > 1e-8){
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            const double perp = (qy*MX(mag) - qx*MY(mag));
+            pvec[magnetic[mag]] = (values[magnetic[mag]+2] + perp*px)*dd;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+      }
+      if (ud > 1e-8){
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            const double perp = (qy*MX(mag) - qx*MY(mag));
+            pvec[magnetic[mag]] = perp*py*ud;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            pvec[magnetic[mag]] = MZ(mag)*pz*ud;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+      }
+      if (du > 1e-8) {
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            const double perp = (qy*MX(mag) - qx*MY(mag));
+            pvec[magnetic[mag]] = perp*py*du;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+        for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+            pvec[magnetic[mag]] = -MZ(mag)*pz*du;
+        }
+        scattering += CALL_IQ(q, q_index, local_values);
+      }
+#else
+      double scattering = CALL_IQ(q, q_index, local_values);
+#endif
+      result[q_index] = (norm>0. ? scale*scattering/norm + background : background);
+    }
+    return;
+  }
+#if MAX_PD > 0
+#if MAGNETIC
+  const double *pd_value = values+2+NPARS+3+3*NUM_MAGNETIC;
+#else
+  const double *pd_value = values+2+NPARS;
+#endif
+  const double *pd_weight = pd_value+details->pd_sum;
+  // need product of weights at every Iq calc, so keep product of
+  // weights from the outer loops so that weight = partial_weight * fast_weight
+  double pd_norm;
+  double partial_weight; // product of weight w4*w3*w2 but not w1
+  double spherical_correction; // cosine correction for latitude variation
+  double weight; // product of partial_weight*w1*spherical_correction
+  // Number of elements in the longest polydispersity loop
+  const int p0_par = details->pd_par[0];
+  const int p0_length = details->pd_length[0];
+  const int p0_offset = details->pd_offset[0];
+  const int p0_is_theta = (p0_par == details->theta_par);
+  int p0_index;
+  // Trigger the reset behaviour that happens at the end the fast loop
+  // by setting the initial index >= weight vector length.
+  p0_index = p0_length;
+  // Default the spherical correction to 1.0 in case it is not otherwise set
+  spherical_correction = 1.0;
+  // Since we are no longer looping over the entire polydispersity hypercube
+  // for each q, we need to track the result and normalization values between
+  // calls.  This means initializing them to 0 at the start and accumulating
+  // them between calls.
+  pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
+  if (pd_start == 0) {
+    #ifdef USE_OPENMP
+    #pragma omp parallel for
+    #endif
+    for (int q_index=0; q_index < nq; q_index++) {
+      result[q_index] = 0.0;
+    }
+  }
+  // Loop over the weights then loop over q, accumulating values
+  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
+    // check if fast loop needs to be reset
+    if (p0_index == p0_length) {
+      // Compute position in polydispersity hypercube and partial weight
+      partial_weight = 1.0;
+      for (int k=1; k < details->num_active; k++) {
+        int pk = details->pd_par[k];
+        int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k];
+        pvec[pk] = pd_value[index];
+        partial_weight *= pd_weight[index];
+        if (pk == details->theta_par) {
+          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6);
+          const double qx = q[2*q_index];
+          const double qy = q[2*q_index+1];
+          const double qsq = qx*qx + qy*qy;
+          // Constant across orientation, polydispersity for given qx, qy
+          double px, py, pz;
+          if (qsq > 1.e-16) {
+            px = (qy*cos_mspin + qx*sin_mspin)/qsq;
+            py = (qy*sin_mspin - qx*cos_mspin)/qsq;
+            pz = 1.0;
+          } else {
+            px = py = pz = 0.0;
+          }
+          double scattering = 0.0;
+          if (uu > 1.e-8) {
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = (values[slds[sk]+2] - perp*px)*uu;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (dd > 1.e-8){
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = (values[slds[sk]+2] + perp*px)*dd;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (ud > 1.e-8){
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = perp*py*ud;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                pvec[slds[sk]] = MZ(sk)*pz*ud;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+          if (du > 1.e-8) {
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                const double perp = (qy*MX(sk) - qx*MY(sk));
+                pvec[slds[sk]] = perp*py*du;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+            for (int sk=0; sk<NUM_MAGNETIC; sk++) {
+                pvec[slds[sk]] = -MZ(sk)*pz*du;
+            }
+            scattering += CALL_IQ(q, q_index, local_values);
+          }
+#else  // !MAGNETIC
+          const double scattering = CALL_IQ(q, q_index, local_values);
+#endif // !MAGNETIC
+//printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0);
+          result[q_index] += weight * scattering;
+        }
+      }
-      p0_index = loop_index%p0_length;
+    }
+    // Update parameter p0
+    weight = partial_weight*pd_weight[p0_offset + p0_index];
+    pvec[p0_par] = pd_value[p0_offset + p0_index];
+    if (p0_is_theta) {
+      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6);
+    }
+    p0_index++;
+    #ifdef INVALID
+    if (INVALID(local_values)) continue;
+    #endif
+    // Accumulate I(q)
+    // Note: weight==0 must always be excluded
+    if (weight > cutoff) {
+      // spherical correction has some nasty effects when theta is +90 or -90
+      // where it becomes zero.  If the entirety of the correction
+      weight *= spherical_correction;
+      pd_norm += weight * CALL_VOLUME(local_values);
+      #ifdef USE_OPENMP
+      #pragma omp parallel for
+      #endif
+      for (int q_index=0; q_index < nq; q_index++) {
+#ifdef MAGNETIC
+        const double qx = q[2*q_index];
+        const double qy = q[2*q_index+1];
+        const double qsq = qx*qx + qy*qy;
+        // Constant across orientation, polydispersity for given qx, qy
+        double px, py, pz;
+        if (qsq > 1e-16) {
+          px = (qy*cos_mspin + qx*sin_mspin)/qsq;
+          py = (qy*sin_mspin - qx*cos_mspin)/qsq;
+          pz = 1.0;
+        } else {
+          px = py = pz = 0.0;
+        }
+        double scattering = 0.0;
+        if (uu > 1e-8) {
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              const double perp = (qy*MX(mag) - qx*MY(mag));
+              pvec[magnetic[mag]] = (values[magnetic[mag]+2] - perp*px)*uu;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+        }
+        if (dd > 1e-8){
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              const double perp = (qy*MX(mag) - qx*MY(mag));
+              pvec[magnetic[mag]] = (values[magnetic[mag]+2] + perp*px)*dd;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+        }
+        if (ud > 1e-8){
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              const double perp = (qy*MX(mag) - qx*MY(mag));
+              pvec[magnetic[mag]] = perp*py*ud;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              pvec[magnetic[mag]] = MZ(mag)*pz*ud;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+        }
+        if (du > 1e-8) {
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              const double perp = (qy*MX(mag) - qx*MY(mag));
+              pvec[magnetic[mag]] = perp*py*du;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+          for (int mag=0; mag<NUM_MAGNETIC; mag++) {
+              pvec[magnetic[mag]] = -MZ(mag)*pz*du;
+          }
+          scattering += CALL_IQ(q, q_index, local_values);
+        }
+#else
+        double scattering = CALL_IQ(q, q_index, local_values);
+#endif
+        result[q_index] += weight*scattering;
+      }
+    }
+  }
+  if (pd_stop >= details->pd_prod) {
+    // End of the PD loop we can normalize
+    double scale, background;
+    scale = values[0];
+    background = values[1];
+    #ifdef USE_OPENMP
+    #pragma omp parallel for
+    #endif
+    for (int q_index=0; q_index < nq; q_index++) {
+      result[q_index] = (pd_norm>0. ? scale*result[q_index]/pd_norm + background : background);
+    }
+  }
+    ++step;
+#if MAX_PD>0
+    if (step >= pd_stop) break;
+    ++i0;
+  }
+  i0 = 0;
+#endif
+#if MAX_PD>1
+    if (step >= pd_stop) break;
+    ++i1;
+  }
+  i1 = 0;
+#endif
+#if MAX_PD>2
+    if (step >= pd_stop) break;
+    ++i2;
+  }
+  i2 = 0;
+#endif
+#if MAX_PD>3
+    if (step >= pd_stop) break;
+    ++i3;
+  }
+  i3 = 0;
+#endif
+#if MAX_PD>4
+    if (step >= pd_stop) break;
+    ++i4;
+  }
+  i4 = 0;
+#endif
+//printf("res: %g/%g\n", result[0], pd_norm);
   // Remember the updated norm.
   result[nq] = pd_norm;
-#endif // MAX_PD > 0
+}

Note: See TracChangeset for help on using the changeset viewer.

SasView

Changeset 9eb3632 in sasmodels for sasmodels/kernel_iq.c

Legend:

sasmodels/kernel_iq.c

Download in other formats: