Changeset 9eb3632 in sasmodels


Ignore:
Timestamp:
Jul 23, 2016 12:54:17 AM (8 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
7b7da6b
Parents:
6a0d6aa
Message:

restructure kernels using fixed PD loops

Location:
sasmodels
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/details.py

    r32e3c9b r9eb3632  
    22 
    33import numpy as np  # type: ignore 
     4from numpy import pi, cos, sin 
     5 
     6try: 
     7    np.meshgrid([]) 
     8    meshgrid = np.meshgrid 
     9except ValueError: 
     10    # CRUFT: np.meshgrid requires multiple vectors 
     11    def meshgrid(*args): 
     12        if len(args) > 1: 
     13            return np.meshgrid(*args) 
     14        else: 
     15            return [np.asarray(v) for v in args] 
    416 
    517try: 
     
    8092        print("pd_stride", self.pd_stride) 
    8193 
     94 
    8295def mono_details(model_info): 
    8396    call_details = CallDetails(model_info) 
    8497    call_details.pd_prod = 1 
     98    call_details.pd_sum = model_info.parameters.nvalues 
     99    call_details.pd_par[:] = np.arange(0, model_info.parameters.max_pd) 
     100    call_details.pd_length[:] = 1 
     101    call_details.pd_offset[:] = np.arange(0, model_info.parameters.max_pd) 
     102    call_details.pd_stride[:] = 1 
    85103    return call_details 
     104 
    86105 
    87106def poly_details(model_info, weights): 
     
    90109 
    91110    # Decreasing list of polydispersity lengths 
    92     pd_length = np.array([len(w) for w in weights]) 
     111    #print([p.id for p in model_info.parameters.call_parameters]) 
     112    pd_length = np.array([len(w) for w in weights[2:2+model_info.parameters.npars]]) 
    93113    num_active = np.sum(pd_length>1) 
    94     if num_active > model_info.parameters.max_pd: 
     114    max_pd = model_info.parameters.max_pd 
     115    if num_active > max_pd: 
    95116        raise ValueError("Too many polydisperse parameters") 
    96117 
     
    100121    #print("off:",pd_offset) 
    101122    # Note: the reversing view, x[::-1], does not require a copy 
    102     idx = np.argsort(pd_length)[::-1][:num_active] 
    103     par_length = np.array([len(w) for w in weights]) 
    104     pd_stride = np.cumprod(np.hstack((1, par_length[idx]))) 
     123    idx = np.argsort(pd_length)[::-1][:max_pd] 
     124    pd_stride = np.cumprod(np.hstack((1, pd_length[idx]))) 
    105125 
    106126    call_details = CallDetails(model_info) 
    107     call_details.pd_par[:num_active] = idx - 2  # skip background & scale 
    108     call_details.pd_length[:num_active] = pd_length[idx] 
    109     call_details.pd_offset[:num_active] = pd_offset[idx] 
    110     call_details.pd_stride[:num_active] = pd_stride[:-1] 
     127    call_details.pd_par[:max_pd] = idx 
     128    call_details.pd_length[:max_pd] = pd_length[idx] 
     129    call_details.pd_offset[:max_pd] = pd_offset[idx] 
     130    call_details.pd_stride[:max_pd] = pd_stride[:-1] 
    111131    call_details.pd_prod = pd_stride[-1] 
    112     call_details.pd_sum = np.sum(par_length) 
     132    call_details.pd_sum = sum(len(w) for w in weights) 
    113133    call_details.num_active = num_active 
    114134    #call_details.show() 
    115135    return call_details 
     136 
     137 
     138def dispersion_mesh(model_info, pars): 
     139    """ 
     140    Create a mesh grid of dispersion parameters and weights. 
     141 
     142    Returns [p1,p2,...],w where pj is a vector of values for parameter j 
     143    and w is a vector containing the products for weights for each 
     144    parameter set in the vector. 
     145    """ 
     146    value, weight = zip(*pars) 
     147    weight = [w if w else [1.] for w in weight] 
     148    weight = np.vstack([v.flatten() for v in meshgrid(*weight)]) 
     149    weight = np.prod(weight, axis=0) 
     150    value = [v.flatten() for v in meshgrid(*value)] 
     151    lengths = [par.length for par in model_info.parameters.kernel_parameters 
     152               if par.type == 'volume'] 
     153    if any(n > 1 for n in lengths): 
     154        pars = [] 
     155        offset = 0 
     156        for n in lengths: 
     157            pars.append(np.vstack(value[offset:offset+n]) if n > 1 else value[offset]) 
     158            offset += n 
     159        value = pars 
     160    return value, weight 
     161 
     162 
     163 
     164def build_details(kernel, pairs): 
     165    # type: (Kernel, Tuple[List[np.ndarray], List[np.ndarray]]) -> Tuple[CallDetails, np.ndarray, bool] 
     166    """ 
     167    Converts (value, weight) pairs into parameters for the kernel call. 
     168 
     169    Returns a CallDetails object indicating the polydispersity, a data object 
     170    containing the different values, and the magnetic flag indicating whether 
     171    any magnetic magnitudes are non-zero. Magnetic vectors (M0, phi, theta) are 
     172    converted to rectangular coordinates (mx, my, mz). 
     173    """ 
     174    values, weights = zip(*pairs) 
     175    scalars = [v[0] for v in values] 
     176    if all(len(w)==1 for w in weights): 
     177        call_details = mono_details(kernel.info) 
     178        data = np.array(scalars+scalars+[1]*len(scalars), dtype=kernel.dtype) 
     179    else: 
     180        call_details = poly_details(kernel.info, weights) 
     181        data = np.hstack(scalars+list(values)+list(weights)).astype(kernel.dtype) 
     182    is_magnetic = convert_magnetism(kernel.info.parameters, data) 
     183    #call_details.show() 
     184    return call_details, data, is_magnetic 
     185 
     186def convert_magnetism(parameters, values): 
     187    """ 
     188    Convert magnetism in value table from polar to rectangular coordinates. 
     189 
     190    Returns True if any magnetism is present. 
     191    """ 
     192    mag = values[parameters.nvalues-3*parameters.nmagnetic:parameters.nvalues] 
     193    mag = mag.reshape(-1, 3) 
     194    M0 = mag[:,0] 
     195    if np.any(M0): 
     196        theta, phi = mag[:,1]*pi/180., mag[:,2]*pi/180. 
     197        cos_theta = cos(theta) 
     198        mx = M0*cos_theta*cos(phi) 
     199        my = M0*sin(theta) 
     200        mz = -M0*cos_theta*sin(phi) 
     201        mag[:,0], mag[:,1], mag[:,2] = mx, my, mz 
     202        return True 
     203    else: 
     204        return False 
  • sasmodels/direct_model.py

    r32e3c9b r9eb3632  
    3030from . import resolution 
    3131from . import resolution2d 
    32 from . import kernel 
     32from .details import build_details 
    3333 
    3434try: 
     
    6565        active = lambda name: True 
    6666 
     67    #print("pars",[p.id for p in parameters.call_parameters]) 
    6768    vw_pairs = [(get_weights(p, pars) if active(p.name) 
    6869                 else ([pars.get(p.name, p.default)], [1.0])) 
    6970                for p in parameters.call_parameters] 
    7071 
    71     call_details, values = kernel.build_details(calculator, vw_pairs) 
    72     magnetic = any(values[k]!=0 for k in parameters.magnetism_index) 
     72    call_details, values, is_magnetic = build_details(calculator, vw_pairs) 
    7373    #print("values:", values) 
    74     return calculator(call_details, values, cutoff, magnetic) 
     74    return calculator(call_details, values, cutoff, is_magnetic) 
    7575 
    7676def get_weights(parameter, values): 
  • sasmodels/generate.py

    rb966a96 r9eb3632  
    576576    source.append("#define MAX_PD %s"%partable.max_pd) 
    577577    source.append("#define NPARS %d"%partable.npars) 
    578     source.append("#define NUM_MAGNETIC %d" % len(magpars)) 
     578    source.append("#define NUM_MAGNETIC %d" % partable.nmagnetic) 
     579    source.append("#define NUM_VALUES %d" % partable.nvalues) 
    579580    source.append("#define MAGNETIC_PARS %s"%",".join(str(k) for k in magpars)) 
    580581 
     
    593594    code = kernel[0] 
    594595    path = kernel[1].replace('\\', '\\\\') 
    595     source = [ 
     596    iq = [ 
    596597        # define the Iq kernel 
    597598        "#define KERNEL_NAME %s_Iq" % name, 
     
    601602        "#undef CALL_IQ", 
    602603        "#undef KERNEL_NAME", 
    603  
     604        ] 
     605 
     606    iqxy = [ 
    604607        # define the Iqxy kernel from the same source with different #defines 
    605608        "#define KERNEL_NAME %s_Iqxy" % name, 
     
    609612        "#undef CALL_IQ", 
    610613        "#undef KERNEL_NAME", 
    611  
     614         ] 
     615 
     616    imagnetic = [ 
    612617        # define the Imagnetic kernel 
    613618        "#define KERNEL_NAME %s_Imagnetic" % name, 
     
    620625        "#undef KERNEL_NAME", 
    621626    ] 
    622     return source 
     627    return iq+iqxy+imagnetic 
    623628 
    624629 
  • sasmodels/kernel.py

    r32e3c9b r9eb3632  
    1313 
    1414import numpy as np 
    15 from .details import mono_details, poly_details 
    1615 
    1716try: 
     
    4847        # type: () -> None 
    4948        pass 
    50  
    51 try: 
    52     np.meshgrid([]) 
    53     meshgrid = np.meshgrid 
    54 except ValueError: 
    55     # CRUFT: np.meshgrid requires multiple vectors 
    56     def meshgrid(*args): 
    57         if len(args) > 1: 
    58             return np.meshgrid(*args) 
    59         else: 
    60             return [np.asarray(v) for v in args] 
    61  
    62 def dispersion_mesh(model_info, pars): 
    63     """ 
    64     Create a mesh grid of dispersion parameters and weights. 
    65  
    66     Returns [p1,p2,...],w where pj is a vector of values for parameter j 
    67     and w is a vector containing the products for weights for each 
    68     parameter set in the vector. 
    69     """ 
    70     value, weight = zip(*pars) 
    71     weight = [w if w else [1.] for w in weight] 
    72     weight = np.vstack([v.flatten() for v in meshgrid(*weight)]) 
    73     weight = np.prod(weight, axis=0) 
    74     value = [v.flatten() for v in meshgrid(*value)] 
    75     lengths = [par.length for par in model_info.parameters.kernel_parameters 
    76                if par.type == 'volume'] 
    77     if any(n > 1 for n in lengths): 
    78         pars = [] 
    79         offset = 0 
    80         for n in lengths: 
    81             pars.append(np.vstack(value[offset:offset+n]) if n > 1 else value[offset]) 
    82             offset += n 
    83         value = pars 
    84     return value, weight 
    85  
    86  
    87  
    88 def build_details(kernel, pairs): 
    89     # type: (Kernel, Tuple[List[np.ndarray], List[np.ndarray]]) -> Tuple[CallDetails, np.ndarray, np.ndarray] 
    90     """ 
    91     Construct the kernel call details object for calling the particular kernel. 
    92     """ 
    93     values, weights = zip(*pairs) 
    94     scalars = [v[0] for v in values] 
    95     if all(len(w)==1 for w in weights): 
    96         call_details = mono_details(kernel.info) 
    97         data = np.array(scalars, dtype=kernel.dtype) 
    98     else: 
    99         call_details = poly_details(kernel.info, weights) 
    100         data = np.hstack(scalars+list(values)+list(weights)).astype(kernel.dtype) 
    101     return call_details, data 
  • sasmodels/kernel_iq.c

    rb966a96 r9eb3632  
    2929 
    3030typedef struct { 
    31     PARAMETER_TABLE 
     31    PARAMETER_TABLE; 
    3232} ParameterBlock; 
    33 #endif 
    34  
    35 #ifdef MAGNETIC 
    36 const int32_t magnetic[] = { MAGNETIC_PARS }; 
    37 #endif 
     33#endif // _PAR_BLOCK_ 
     34 
    3835 
    3936#ifdef MAGNETIC 
     
    6158} 
    6259 
    63 // Convert polar to rectangular coordinates. 
    64 static void polrec(double r, double theta, double phi, 
    65   double *x, double *y, double *z) 
    66 { 
    67   double cos_theta, sin_theta, cos_phi, sin_phi; 
    68   SINCOS(theta*M_PI_180, sin_theta, cos_theta); 
    69   SINCOS(phi*M_PI_180, sin_phi, cos_phi); 
    70   *x = r * cos_theta * cos_phi; 
    71   *y = r * sin_theta; 
    72   *z = -r * cos_theta * sin_phi; 
    73 } 
    74 #endif 
     60#endif // MAGNETIC 
    7561 
    7662kernel 
     
    8066    const int32_t pd_stop,      // where we are stopping in the polydispersity loop 
    8167    global const ProblemDetails *details, 
    82    // global const  // TODO: make it const again! 
    83     double *values, 
     68    global const double *values, 
    8469    global const double *q, // nq q values, with padding to boundary 
    85     global double *result,  // nq+3 return values, again with padding 
     70    global double *result,  // nq+1 return values, again with padding 
    8671    const double cutoff     // cutoff in the polydispersity weight product 
    8772    ) 
    8873{ 
     74 
    8975  // Storage for the current parameter values.  These will be updated as we 
    90   // walk the polydispersity cube. 
    91   ParameterBlock local_values;  // current parameter values 
    92   double *pvec = (double *)(&local_values);  // Alias named parameters with a vector 
     76  // walk the polydispersity cube.  local_values will be aliased to pvec. 
     77  ParameterBlock local_values; 
     78  double *pvec = (double *)&local_values; 
     79 
     80#ifdef MAGNETIC 
     81  // Location of the sld parameters in the parameter pvec. 
     82  // These parameters are updated with the effective sld due to magnetism. 
     83  const int32_t slds[] = { MAGNETIC_PARS }; 
     84 
     85  const double up_frac_i = values[NPARS+2]; 
     86  const double up_frac_f = values[NPARS+3]; 
     87  const double up_angle = values[NPARS+4]; 
     88  #define MX(_k) (values[NPARS+5+3*_k]) 
     89  #define MY(_k) (values[NPARS+6+3*_k]) 
     90  #define MZ(_k) (values[NPARS+7+3*_k]) 
     91 
     92  // TODO: could precompute these outside of the kernel. 
     93  // Interpret polarization cross section. 
     94  double uu, dd, ud, du; 
     95  double cos_mspin, sin_mspin; 
     96  spins(up_frac_i, up_frac_f, &uu, &dd, &ud, &du); 
     97  SINCOS(-up_angle*M_PI_180, sin_mspin, cos_mspin); 
     98#endif // MAGNETIC 
    9399 
    94100  // Fill in the initial variables 
     
    98104  #pragma omp parallel for 
    99105  #endif 
    100   for (int k=0; k < NPARS; k++) { 
    101     pvec[k] = values[k+2]; 
    102   } 
    103 #ifdef MAGNETIC 
    104   const double up_frac_i = values[NPARS+2]; 
    105   const double up_frac_f = values[NPARS+3]; 
    106   const double up_angle = values[NPARS+4]; 
    107   #define MX(_k) (values[NPARS+5+3*_k]) 
    108   #define MY(_k) (values[NPARS+6+3*_k]) 
    109   #define MZ(_k) (values[NPARS+7+3*_k]) 
    110  
    111   // TODO: precompute this on the python side 
    112   // Convert polar to rectangular coordinates in place. 
    113   if (pd_start == 0) {  // Update in place; only do this for the first hunk! 
    114 //printf("spin: %g %g %g\n", up_frac_i, up_frac_f, up_angle); 
    115     for (int mag=0; mag < NUM_MAGNETIC; mag++) { 
    116 //printf("mag %d: %g %g %g\n", mag, MX(mag), MY(mag), MZ(mag)); 
    117         polrec(MX(mag), MY(mag), MZ(mag), &MX(mag), &MY(mag), &MZ(mag)); 
    118 //printf("   ==>: %g %g %g\n", MX(mag), MY(mag), MZ(mag)); 
    119     } 
    120   } 
    121   // Interpret polarization cross section. 
    122   double uu, dd, ud, du; 
    123   double cos_mspin, sin_mspin; 
    124   spins(up_frac_i, up_frac_f, &uu, &dd, &ud, &du); 
    125   SINCOS(-up_angle*M_PI_180, sin_mspin, cos_mspin); 
    126 #endif 
    127  
    128   // Monodisperse computation 
    129   if (details->num_active == 0) { 
    130     double norm, scale, background; 
    131     #ifdef INVALID 
    132     if (INVALID(local_values)) { return; } 
    133     #endif 
    134  
    135     norm = CALL_VOLUME(local_values); 
    136     scale = values[0]; 
    137     background = values[1]; 
    138  
     106  for (int i=0; i < NPARS; i++) { 
     107    pvec[i] = values[2+i]; 
     108//printf("p%d = %g\n",i, pvec[i]); 
     109  } 
     110 
     111  double pd_norm; 
     112//printf("start: %d %d\n",pd_start, pd_stop); 
     113  if (pd_start == 0) { 
     114    pd_norm = 0.0; 
    139115    #ifdef USE_OPENMP 
    140116    #pragma omp parallel for 
    141117    #endif 
    142     for (int q_index=0; q_index < nq; q_index++) { 
     118    for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 
     119//printf("initializing %d\n", nq); 
     120  } else { 
     121    pd_norm = result[nq]; 
     122  } 
     123//printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
     124 
     125  global const double *pd_value = values + NUM_VALUES + 2; 
     126  global const double *pd_weight = pd_value + details->pd_sum; 
     127 
     128  // Jump into the middle of the polydispersity loop 
     129#if MAX_PD>4 
     130  int n4=details->pd_length[4]; 
     131  int i4=(pd_start/details->pd_stride[4])%n4; 
     132  const int p4=details->pd_par[4]; 
     133  global const double *v4 = pd_value + details->pd_offset[4]; 
     134  global const double *w4 = pd_weight + details->pd_offset[4]; 
     135#endif 
     136#if MAX_PD>3 
     137  int n3=details->pd_length[3]; 
     138  int i3=(pd_start/details->pd_stride[3])%n3; 
     139  const int p3=details->pd_par[3]; 
     140  global const double *v3 = pd_value + details->pd_offset[3]; 
     141  global const double *w3 = pd_weight + details->pd_offset[3]; 
     142//printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES); 
     143#endif 
     144#if MAX_PD>2 
     145  int n2=details->pd_length[2]; 
     146  int i2=(pd_start/details->pd_stride[2])%n2; 
     147  const int p2=details->pd_par[2]; 
     148  global const double *v2 = pd_value + details->pd_offset[2]; 
     149  global const double *w2 = pd_weight + details->pd_offset[2]; 
     150#endif 
     151#if MAX_PD>1 
     152  int n1=details->pd_length[1]; 
     153  int i1=(pd_start/details->pd_stride[1])%n1; 
     154  const int p1=details->pd_par[1]; 
     155  global const double *v1 = pd_value + details->pd_offset[1]; 
     156  global const double *w1 = pd_weight + details->pd_offset[1]; 
     157#endif 
     158#if MAX_PD>0 
     159  int n0=details->pd_length[0]; 
     160  int i0=(pd_start/details->pd_stride[0])%n0; 
     161  const int p0=details->pd_par[0]; 
     162  global const double *v0 = pd_value + details->pd_offset[0]; 
     163  global const double *w0 = pd_weight + details->pd_offset[0]; 
     164//printf("w0:%p, values:%p, diff:%d, %d\n",w0,values,(w0-values),NUM_VALUES); 
     165#endif 
     166 
     167 
     168  double spherical_correction=1.0; 
     169  const int theta_par = details->theta_par; 
     170#if MAX_PD>0 
     171  const int fast_theta = (theta_par == p0); 
     172  const int slow_theta = (theta_par >= 0 && !fast_theta); 
     173#else 
     174  const int slow_theta = (theta_par >= 0); 
     175#endif 
     176 
     177  int step = pd_start; 
     178 
     179 
     180#if MAX_PD>4 
     181  const double weight5 = 1.0; 
     182  while (i4 < n4) { 
     183    pvec[p4] = v4[i4]; 
     184    double weight4 = w4[i4] * weight5; 
     185//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, pvec[p4], weight4); 
     186#elif MAX_PD>3 
     187    const double weight4 = 1.0; 
     188#endif 
     189#if MAX_PD>3 
     190  while (i3 < n3) { 
     191    pvec[p3] = v3[i3]; 
     192    double weight3 = w3[i3] * weight4; 
     193//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, pvec[p3], weight3); 
     194#elif MAX_PD>2 
     195    const double weight3 = 1.0; 
     196#endif 
     197#if MAX_PD>2 
     198  while (i2 < n2) { 
     199    pvec[p2] = v2[i2]; 
     200    double weight2 = w2[i2] * weight3; 
     201//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, pvec[p2], weight2); 
     202#elif MAX_PD>1 
     203    const double weight2 = 1.0; 
     204#endif 
     205#if MAX_PD>1 
     206  while (i1 < n1) { 
     207    pvec[p1] = v1[i1]; 
     208    double weight1 = w1[i1] * weight2; 
     209//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, pvec[p1], weight1); 
     210#elif MAX_PD>0 
     211    const double weight1 = 1.0; 
     212#endif 
     213    if (slow_theta) { // Theta is not in inner loop 
     214      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[theta_par])), 1.e-6); 
     215    } 
     216#if MAX_PD>0 
     217  while(i0 < n0) { 
     218    pvec[p0] = v0[i0]; 
     219    double weight0 = w0[i0] * weight1; 
     220//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, pvec[p0], weight0); 
     221    if (fast_theta) { // Theta is in inner loop 
     222      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0])), 1.e-6); 
     223    } 
     224#else 
     225    const double weight0 = 1.0; 
     226#endif 
     227 
     228//printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NPARS; i++) printf("p%d=%g ",i, pvec[i]); printf("\n"); 
     229//printf("sphcor: %g\n", spherical_correction); 
     230 
     231    #ifdef INVALID 
     232    if (!INVALID(local_values)) 
     233    #endif 
     234    { 
     235      // Accumulate I(q) 
     236      // Note: weight==0 must always be excluded 
     237      if (weight0 > cutoff) { 
     238        // spherical correction has some nasty effects when theta is +90 or -90 
     239        // where it becomes zero. 
     240        const double weight = weight0 * spherical_correction; 
     241        pd_norm += weight * CALL_VOLUME(local_values); 
     242 
     243        #ifdef USE_OPENMP 
     244        #pragma omp parallel for 
     245        #endif 
     246        for (int q_index=0; q_index<nq; q_index++) { 
    143247#ifdef MAGNETIC 
    144       const double qx = q[2*q_index]; 
    145       const double qy = q[2*q_index+1]; 
    146       const double qsq = qx*qx + qy*qy; 
    147  
    148       // Constant across orientation, polydispersity for given qx, qy 
    149       double px, py, pz; 
    150       if (qsq > 1e-16) { 
    151         px = (qy*cos_mspin + qx*sin_mspin)/qsq; 
    152         py = (qy*sin_mspin - qx*cos_mspin)/qsq; 
    153         pz = 1.0; 
    154       } else { 
    155         px = py = pz = 0.0; 
    156       } 
    157  
    158       double scattering = 0.0; 
    159       if (uu > 1e-8) { 
    160         for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    161             const double perp = (qy*MX(mag) - qx*MY(mag)); 
    162             pvec[magnetic[mag]] = (values[magnetic[mag]+2] - perp*px)*uu; 
    163         } 
    164         scattering += CALL_IQ(q, q_index, local_values); 
    165       } 
    166       if (dd > 1e-8){ 
    167         for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    168             const double perp = (qy*MX(mag) - qx*MY(mag)); 
    169             pvec[magnetic[mag]] = (values[magnetic[mag]+2] + perp*px)*dd; 
    170         } 
    171         scattering += CALL_IQ(q, q_index, local_values); 
    172       } 
    173       if (ud > 1e-8){ 
    174         for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    175             const double perp = (qy*MX(mag) - qx*MY(mag)); 
    176             pvec[magnetic[mag]] = perp*py*ud; 
    177         } 
    178         scattering += CALL_IQ(q, q_index, local_values); 
    179         for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    180             pvec[magnetic[mag]] = MZ(mag)*pz*ud; 
    181         } 
    182         scattering += CALL_IQ(q, q_index, local_values); 
    183       } 
    184       if (du > 1e-8) { 
    185         for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    186             const double perp = (qy*MX(mag) - qx*MY(mag)); 
    187             pvec[magnetic[mag]] = perp*py*du; 
    188         } 
    189         scattering += CALL_IQ(q, q_index, local_values); 
    190         for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    191             pvec[magnetic[mag]] = -MZ(mag)*pz*du; 
    192         } 
    193         scattering += CALL_IQ(q, q_index, local_values); 
    194       } 
    195 #else 
    196       double scattering = CALL_IQ(q, q_index, local_values); 
    197 #endif 
    198       result[q_index] = (norm>0. ? scale*scattering/norm + background : background); 
    199     } 
    200     return; 
    201   } 
    202  
    203 #if MAX_PD > 0 
    204  
    205 #if MAGNETIC 
    206   const double *pd_value = values+2+NPARS+3+3*NUM_MAGNETIC; 
    207 #else 
    208   const double *pd_value = values+2+NPARS; 
    209 #endif 
    210   const double *pd_weight = pd_value+details->pd_sum; 
    211  
    212   // need product of weights at every Iq calc, so keep product of 
    213   // weights from the outer loops so that weight = partial_weight * fast_weight 
    214   double pd_norm; 
    215   double partial_weight; // product of weight w4*w3*w2 but not w1 
    216   double spherical_correction; // cosine correction for latitude variation 
    217   double weight; // product of partial_weight*w1*spherical_correction 
    218  
    219   // Number of elements in the longest polydispersity loop 
    220   const int p0_par = details->pd_par[0]; 
    221   const int p0_length = details->pd_length[0]; 
    222   const int p0_offset = details->pd_offset[0]; 
    223   const int p0_is_theta = (p0_par == details->theta_par); 
    224   int p0_index; 
    225  
    226   // Trigger the reset behaviour that happens at the end the fast loop 
    227   // by setting the initial index >= weight vector length. 
    228   p0_index = p0_length; 
    229  
    230   // Default the spherical correction to 1.0 in case it is not otherwise set 
    231   spherical_correction = 1.0; 
    232  
    233   // Since we are no longer looping over the entire polydispersity hypercube 
    234   // for each q, we need to track the result and normalization values between 
    235   // calls.  This means initializing them to 0 at the start and accumulating 
    236   // them between calls. 
    237   pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    238  
    239   if (pd_start == 0) { 
    240     #ifdef USE_OPENMP 
    241     #pragma omp parallel for 
    242     #endif 
    243     for (int q_index=0; q_index < nq; q_index++) { 
    244       result[q_index] = 0.0; 
    245     } 
    246   } 
    247  
    248   // Loop over the weights then loop over q, accumulating values 
    249   for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 
    250     // check if fast loop needs to be reset 
    251     if (p0_index == p0_length) { 
    252  
    253       // Compute position in polydispersity hypercube and partial weight 
    254       partial_weight = 1.0; 
    255       for (int k=1; k < details->num_active; k++) { 
    256         int pk = details->pd_par[k]; 
    257         int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k]; 
    258         pvec[pk] = pd_value[index]; 
    259         partial_weight *= pd_weight[index]; 
    260         if (pk == details->theta_par) { 
    261           spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6); 
     248          const double qx = q[2*q_index]; 
     249          const double qy = q[2*q_index+1]; 
     250          const double qsq = qx*qx + qy*qy; 
     251 
     252          // Constant across orientation, polydispersity for given qx, qy 
     253          double px, py, pz; 
     254          if (qsq > 1.e-16) { 
     255            px = (qy*cos_mspin + qx*sin_mspin)/qsq; 
     256            py = (qy*sin_mspin - qx*cos_mspin)/qsq; 
     257            pz = 1.0; 
     258          } else { 
     259            px = py = pz = 0.0; 
     260          } 
     261 
     262          double scattering = 0.0; 
     263          if (uu > 1.e-8) { 
     264            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     265                const double perp = (qy*MX(sk) - qx*MY(sk)); 
     266                pvec[slds[sk]] = (values[slds[sk]+2] - perp*px)*uu; 
     267            } 
     268            scattering += CALL_IQ(q, q_index, local_values); 
     269          } 
     270          if (dd > 1.e-8){ 
     271            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     272                const double perp = (qy*MX(sk) - qx*MY(sk)); 
     273                pvec[slds[sk]] = (values[slds[sk]+2] + perp*px)*dd; 
     274            } 
     275            scattering += CALL_IQ(q, q_index, local_values); 
     276          } 
     277          if (ud > 1.e-8){ 
     278            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     279                const double perp = (qy*MX(sk) - qx*MY(sk)); 
     280                pvec[slds[sk]] = perp*py*ud; 
     281            } 
     282            scattering += CALL_IQ(q, q_index, local_values); 
     283            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     284                pvec[slds[sk]] = MZ(sk)*pz*ud; 
     285            } 
     286            scattering += CALL_IQ(q, q_index, local_values); 
     287          } 
     288          if (du > 1.e-8) { 
     289            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     290                const double perp = (qy*MX(sk) - qx*MY(sk)); 
     291                pvec[slds[sk]] = perp*py*du; 
     292            } 
     293            scattering += CALL_IQ(q, q_index, local_values); 
     294            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     295                pvec[slds[sk]] = -MZ(sk)*pz*du; 
     296            } 
     297            scattering += CALL_IQ(q, q_index, local_values); 
     298          } 
     299#else  // !MAGNETIC 
     300          const double scattering = CALL_IQ(q, q_index, local_values); 
     301#endif // !MAGNETIC 
     302//printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0); 
     303          result[q_index] += weight * scattering; 
    262304        } 
    263305      } 
    264       p0_index = loop_index%p0_length; 
    265306    } 
    266  
    267     // Update parameter p0 
    268     weight = partial_weight*pd_weight[p0_offset + p0_index]; 
    269     pvec[p0_par] = pd_value[p0_offset + p0_index]; 
    270     if (p0_is_theta) { 
    271       spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6); 
    272     } 
    273     p0_index++; 
    274  
    275     #ifdef INVALID 
    276     if (INVALID(local_values)) continue; 
    277     #endif 
    278  
    279     // Accumulate I(q) 
    280     // Note: weight==0 must always be excluded 
    281     if (weight > cutoff) { 
    282       // spherical correction has some nasty effects when theta is +90 or -90 
    283       // where it becomes zero.  If the entirety of the correction 
    284       weight *= spherical_correction; 
    285       pd_norm += weight * CALL_VOLUME(local_values); 
    286  
    287       #ifdef USE_OPENMP 
    288       #pragma omp parallel for 
    289       #endif 
    290       for (int q_index=0; q_index < nq; q_index++) { 
    291 #ifdef MAGNETIC 
    292         const double qx = q[2*q_index]; 
    293         const double qy = q[2*q_index+1]; 
    294         const double qsq = qx*qx + qy*qy; 
    295  
    296         // Constant across orientation, polydispersity for given qx, qy 
    297         double px, py, pz; 
    298         if (qsq > 1e-16) { 
    299           px = (qy*cos_mspin + qx*sin_mspin)/qsq; 
    300           py = (qy*sin_mspin - qx*cos_mspin)/qsq; 
    301           pz = 1.0; 
    302         } else { 
    303           px = py = pz = 0.0; 
    304         } 
    305  
    306         double scattering = 0.0; 
    307         if (uu > 1e-8) { 
    308           for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    309               const double perp = (qy*MX(mag) - qx*MY(mag)); 
    310               pvec[magnetic[mag]] = (values[magnetic[mag]+2] - perp*px)*uu; 
    311           } 
    312           scattering += CALL_IQ(q, q_index, local_values); 
    313         } 
    314         if (dd > 1e-8){ 
    315           for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    316               const double perp = (qy*MX(mag) - qx*MY(mag)); 
    317               pvec[magnetic[mag]] = (values[magnetic[mag]+2] + perp*px)*dd; 
    318           } 
    319           scattering += CALL_IQ(q, q_index, local_values); 
    320         } 
    321         if (ud > 1e-8){ 
    322           for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    323               const double perp = (qy*MX(mag) - qx*MY(mag)); 
    324               pvec[magnetic[mag]] = perp*py*ud; 
    325           } 
    326           scattering += CALL_IQ(q, q_index, local_values); 
    327           for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    328               pvec[magnetic[mag]] = MZ(mag)*pz*ud; 
    329           } 
    330           scattering += CALL_IQ(q, q_index, local_values); 
    331         } 
    332         if (du > 1e-8) { 
    333           for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    334               const double perp = (qy*MX(mag) - qx*MY(mag)); 
    335               pvec[magnetic[mag]] = perp*py*du; 
    336           } 
    337           scattering += CALL_IQ(q, q_index, local_values); 
    338           for (int mag=0; mag<NUM_MAGNETIC; mag++) { 
    339               pvec[magnetic[mag]] = -MZ(mag)*pz*du; 
    340           } 
    341           scattering += CALL_IQ(q, q_index, local_values); 
    342         } 
    343 #else 
    344         double scattering = CALL_IQ(q, q_index, local_values); 
    345 #endif 
    346         result[q_index] += weight*scattering; 
    347       } 
    348     } 
    349   } 
    350  
    351   if (pd_stop >= details->pd_prod) { 
    352     // End of the PD loop we can normalize 
    353     double scale, background; 
    354     scale = values[0]; 
    355     background = values[1]; 
    356     #ifdef USE_OPENMP 
    357     #pragma omp parallel for 
    358     #endif 
    359     for (int q_index=0; q_index < nq; q_index++) { 
    360       result[q_index] = (pd_norm>0. ? scale*result[q_index]/pd_norm + background : background); 
    361     } 
    362   } 
    363  
     307    ++step; 
     308#if MAX_PD>0 
     309    if (step >= pd_stop) break; 
     310    ++i0; 
     311  } 
     312  i0 = 0; 
     313#endif 
     314#if MAX_PD>1 
     315    if (step >= pd_stop) break; 
     316    ++i1; 
     317  } 
     318  i1 = 0; 
     319#endif 
     320#if MAX_PD>2 
     321    if (step >= pd_stop) break; 
     322    ++i2; 
     323  } 
     324  i2 = 0; 
     325#endif 
     326#if MAX_PD>3 
     327    if (step >= pd_stop) break; 
     328    ++i3; 
     329  } 
     330  i3 = 0; 
     331#endif 
     332#if MAX_PD>4 
     333    if (step >= pd_stop) break; 
     334    ++i4; 
     335  } 
     336  i4 = 0; 
     337#endif 
     338 
     339//printf("res: %g/%g\n", result[0], pd_norm); 
    364340  // Remember the updated norm. 
    365341  result[nq] = pd_norm; 
    366 #endif // MAX_PD > 0 
    367342} 
  • sasmodels/kernel_iq.cl

    rb9c12fe5 r9eb3632  
    3131    PARAMETER_TABLE; 
    3232} ParameterBlock; 
    33 #endif 
    34  
     33#endif // _PAR_BLOCK_ 
     34 
     35 
     36#ifdef MAGNETIC 
     37 
     38// Return value restricted between low and high 
     39static double clip(double value, double low, double high) 
     40{ 
     41  return (value < low ? low : (value > high ? high : value)); 
     42} 
     43 
     44// Compute spin cross sections given in_spin and out_spin 
     45// To convert spin cross sections to sld b: 
     46//     uu * (sld - m_sigma_x); 
     47//     dd * (sld + m_sigma_x); 
     48//     ud * (m_sigma_y + 1j*m_sigma_z); 
     49//     du * (m_sigma_y - 1j*m_sigma_z); 
     50static void spins(double in_spin, double out_spin, 
     51    double *uu, double *dd, double *ud, double *du) 
     52{ 
     53  in_spin = clip(in_spin, 0.0, 1.0); 
     54  out_spin = clip(out_spin, 0.0, 1.0); 
     55  *uu = sqrt(sqrt(in_spin * out_spin)); 
     56  *dd = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); 
     57  *ud = sqrt(sqrt(in_spin * (1.0-out_spin))); 
     58  *du = sqrt(sqrt((1.0-in_spin) * out_spin)); 
     59} 
     60 
     61#endif // MAGNETIC 
    3562 
    3663kernel 
     
    4673    ) 
    4774{ 
     75 
     76  // who we are and what element we are working with 
     77  const int q_index = get_global_id(0); 
     78  if (q_index >= nq) return; 
     79 
    4880  // Storage for the current parameter values.  These will be updated as we 
    4981  // walk the polydispersity cube.  local_values will be aliased to pvec. 
     
    5183  double *pvec = (double *)&local_values; 
    5284 
    53   // who we are and what element we are working with 
    54   const int q_index = get_global_id(0); 
    55  
    5685  // Fill in the initial variables 
    5786  for (int i=0; i < NPARS; i++) { 
    5887    pvec[i] = values[2+i]; 
    59   } 
    60  
    61   // Monodisperse computation 
    62   if (details->num_active == 0) { 
    63     double norm, scale, background; 
    64     // TODO: only needs to be done by one process... 
     88//if (q_index==0) printf("p%d = %g\n",i, pvec[i]); 
     89  } 
     90 
     91#ifdef MAGNETIC 
     92  // Location of the sld parameters in the parameter pvec. 
     93  // These parameters are updated with the effective sld due to magnetism. 
     94  const int32_t slds[] = { MAGNETIC_PARS }; 
     95 
     96  const double up_frac_i = values[NPARS+2]; 
     97  const double up_frac_f = values[NPARS+3]; 
     98  const double up_angle = values[NPARS+4]; 
     99  #define MX(_k) (values[NPARS+5+3*_k]) 
     100  #define MY(_k) (values[NPARS+6+3*_k]) 
     101  #define MZ(_k) (values[NPARS+7+3*_k]) 
     102 
     103  // TODO: could precompute these outside of the kernel. 
     104  // Interpret polarization cross section. 
     105  double uu, dd, ud, du; 
     106  double cos_mspin, sin_mspin; 
     107  spins(up_frac_i, up_frac_f, &uu, &dd, &ud, &du); 
     108  SINCOS(-up_angle*M_PI_180, sin_mspin, cos_mspin); 
     109#endif // MAGNETIC 
     110 
     111  double pd_norm, this_result; 
     112  if (pd_start == 0) { 
     113    pd_norm = this_result = 0.0; 
     114  } else { 
     115    pd_norm = result[nq]; 
     116    this_result = result[q_index]; 
     117  } 
     118//if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, this_result); 
     119 
     120  global const double *pd_value = values + NUM_VALUES + 2; 
     121  global const double *pd_weight = pd_value + details->pd_sum; 
     122 
     123  // Jump into the middle of the polydispersity loop 
     124#if MAX_PD>4 
     125  int n4=details->pd_length[4]; 
     126  int i4=(pd_start/details->pd_stride[4])%n4; 
     127  const int p4=details->pd_par[4]; 
     128  global const double *v4 = pd_value + details->pd_offset[4]; 
     129  global const double *w4 = pd_weight + details->pd_offset[4]; 
     130#endif 
     131#if MAX_PD>3 
     132  int n3=details->pd_length[3]; 
     133  int i3=(pd_start/details->pd_stride[3])%n3; 
     134  const int p3=details->pd_par[3]; 
     135  global const double *v3 = pd_value + details->pd_offset[3]; 
     136  global const double *w3 = pd_weight + details->pd_offset[3]; 
     137//if (q_index==0) printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES); 
     138#endif 
     139#if MAX_PD>2 
     140  int n2=details->pd_length[2]; 
     141  int i2=(pd_start/details->pd_stride[2])%n2; 
     142  const int p2=details->pd_par[2]; 
     143  global const double *v2 = pd_value + details->pd_offset[2]; 
     144  global const double *w2 = pd_weight + details->pd_offset[2]; 
     145#endif 
     146#if MAX_PD>1 
     147  int n1=details->pd_length[1]; 
     148  int i1=(pd_start/details->pd_stride[1])%n1; 
     149  const int p1=details->pd_par[1]; 
     150  global const double *v1 = pd_value + details->pd_offset[1]; 
     151  global const double *w1 = pd_weight + details->pd_offset[1]; 
     152#endif 
     153#if MAX_PD>0 
     154  int n0=details->pd_length[0]; 
     155  int i0=(pd_start/details->pd_stride[0])%n0; 
     156  const int p0=details->pd_par[0]; 
     157  global const double *v0 = pd_value + details->pd_offset[0]; 
     158  global const double *w0 = pd_weight + details->pd_offset[0]; 
     159#endif 
     160 
     161 
     162  double spherical_correction=1.0; 
     163  const int theta_par = details->theta_par; 
     164#if MAX_PD>0 
     165  const int fast_theta = (theta_par == p0); 
     166  const int slow_theta = (theta_par >= 0 && !fast_theta); 
     167#else 
     168  const int slow_theta = (theta_par >= 0); 
     169#endif 
     170 
     171  int step = pd_start; 
     172 
     173 
     174#if MAX_PD>4 
     175  const double weight5 = 1.0; 
     176  while (i4 < n4) { 
     177    pvec[p4] = v4[i4]; 
     178    double weight4 = w4[i4] * weight5; 
     179//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, pvec[p4], weight4); 
     180#elif MAX_PD>3 
     181    const double weight4 = 1.0; 
     182#endif 
     183#if MAX_PD>3 
     184  while (i3 < n3) { 
     185    pvec[p3] = v3[i3]; 
     186    double weight3 = w3[i3] * weight4; 
     187//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, pvec[p3], weight3); 
     188#elif MAX_PD>2 
     189    const double weight3 = 1.0; 
     190#endif 
     191#if MAX_PD>2 
     192  while (i2 < n2) { 
     193    pvec[p2] = v2[i2]; 
     194    double weight2 = w2[i2] * weight3; 
     195//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, pvec[p2], weight2); 
     196#elif MAX_PD>1 
     197    const double weight2 = 1.0; 
     198#endif 
     199#if MAX_PD>1 
     200  while (i1 < n1) { 
     201    pvec[p1] = v1[i1]; 
     202    double weight1 = w1[i1] * weight2; 
     203//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, pvec[p1], weight1); 
     204#elif MAX_PD>0 
     205    const double weight1 = 1.0; 
     206#endif 
     207    if (slow_theta) { // Theta is not in inner loop 
     208      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[theta_par])), 1.e-6); 
     209    } 
     210#if MAX_PD>0 
     211  while(i0 < n0) { 
     212    pvec[p0] = v0[i0]; 
     213    double weight0 = w0[i0] * weight1; 
     214//if (q_index == 0) printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, pvec[p0], weight0); 
     215    if (fast_theta) { // Theta is in inner loop 
     216      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0])), 1.e-6); 
     217    } 
     218#else 
     219    const double weight0 = 1.0; 
     220#endif 
     221 
     222//if (q_index == 0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NPARS; i++) printf("p%d=%g ",i, pvec[i]); printf("\n"); } 
     223//if (q_index == 0) printf("sphcor: %g\n", spherical_correction); 
     224 
    65225    #ifdef INVALID 
    66     if (INVALID(local_values)) { return; } 
     226    if (!INVALID(local_values)) 
    67227    #endif 
    68  
    69     norm = CALL_VOLUME(local_values); 
    70     scale = values[0]; 
    71     background = values[1]; 
    72  
    73     if (q_index < nq) { 
    74       double scattering = CALL_IQ(q, q_index, local_values); 
    75       result[q_index] = (norm>0. ? scale*scattering/norm + background : background); 
     228    { 
     229      // Accumulate I(q) 
     230      // Note: weight==0 must always be excluded 
     231      if (weight0 > cutoff) { 
     232        // spherical correction has some nasty effects when theta is +90 or -90 
     233        // where it becomes zero. 
     234        const double weight = weight0 * spherical_correction; 
     235        pd_norm += weight * CALL_VOLUME(local_values); 
     236 
     237#ifdef MAGNETIC 
     238          const double qx = q[2*q_index]; 
     239          const double qy = q[2*q_index+1]; 
     240          const double qsq = qx*qx + qy*qy; 
     241 
     242          // Constant across orientation, polydispersity for given qx, qy 
     243          double px, py, pz; 
     244          if (qsq > 1.e-16) { 
     245            px = (qy*cos_mspin + qx*sin_mspin)/qsq; 
     246            py = (qy*sin_mspin - qx*cos_mspin)/qsq; 
     247            pz = 1.0; 
     248          } else { 
     249            px = py = pz = 0.0; 
     250          } 
     251 
     252          double scattering = 0.0; 
     253          if (uu > 1.e-8) { 
     254            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     255                const double perp = (qy*MX(sk) - qx*MY(sk)); 
     256                pvec[slds[sk]] = (values[slds[sk]+2] - perp*px)*uu; 
     257            } 
     258            scattering += CALL_IQ(q, q_index, local_values); 
     259          } 
     260 
     261          if (dd > 1.e-8){ 
     262            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     263                const double perp = (qy*MX(sk) - qx*MY(sk)); 
     264                pvec[slds[sk]] = (values[slds[sk]+2] + perp*px)*dd; 
     265            } 
     266            scattering += CALL_IQ(q, q_index, local_values); 
     267          } 
     268          if (ud > 1.e-8){ 
     269            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     270                const double perp = (qy*MX(sk) - qx*MY(sk)); 
     271                pvec[slds[sk]] = perp*py*ud; 
     272            } 
     273            scattering += CALL_IQ(q, q_index, local_values); 
     274            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     275                pvec[slds[sk]] = MZ(sk)*pz*ud; 
     276            } 
     277            scattering += CALL_IQ(q, q_index, local_values); 
     278          } 
     279          if (du > 1.e-8) { 
     280            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     281                const double perp = (qy*MX(sk) - qx*MY(sk)); 
     282                pvec[slds[sk]] = perp*py*du; 
     283            } 
     284            scattering += CALL_IQ(q, q_index, local_values); 
     285            for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     286                pvec[slds[sk]] = -MZ(sk)*pz*du; 
     287            } 
     288            scattering += CALL_IQ(q, q_index, local_values); 
     289          } 
     290#else  // !MAGNETIC 
     291        const double scattering = CALL_IQ(q, q_index, local_values); 
     292#endif // !MAGNETIC 
     293        this_result += weight * scattering; 
     294      } 
    76295    } 
    77     return; 
    78   } 
    79  
    80 #if MAX_PD > 0 
    81  
    82   double this_result; 
    83  
    84   //printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop); 
    85  
    86   global const double *pd_value = values+2+NPARS; 
    87   global const double *pd_weight = pd_value+details->pd_sum; 
    88  
    89   // need product of weights at every Iq calc, so keep product of 
    90   // weights from the outer loops so that weight = partial_weight * fast_weight 
    91   double pd_norm; 
    92   double partial_weight; // product of weight w4*w3*w2 but not w1 
    93   double spherical_correction; // cosine correction for latitude variation 
    94   double weight; // product of partial_weight*w1*spherical_correction 
    95   int p0_par; 
    96   int p0_length; 
    97   int p0_offset; 
    98   int p0_is_theta; 
    99   int p0_index; 
    100  
    101   // Number of elements in the longest polydispersity loop 
    102   p0_par = details->pd_par[0]; 
    103   p0_length = details->pd_length[0]; 
    104   p0_offset = details->pd_offset[0]; 
    105   p0_is_theta = (p0_par == details->theta_par); 
    106  
    107   // Trigger the reset behaviour that happens at the end the fast loop 
    108   // by setting the initial index >= weight vector length. 
    109   p0_index = p0_length; 
    110  
    111   // Default the spherical correction to 1.0 in case it is not otherwise set 
    112   spherical_correction = 1.0; 
    113   weight=1.0; 
    114  
    115   // Since we are no longer looping over the entire polydispersity hypercube 
    116   // for each q, we need to track the result and normalization values between 
    117   // calls.  This means initializing them to 0 at the start and accumulating 
    118   // them between calls. 
    119   pd_norm = pd_start == 0 ? 0.0 : result[nq]; 
    120  
    121   if (q_index < nq) { 
    122     this_result = pd_start == 0 ? 0.0 : result[q_index]; 
    123   } 
    124  
    125   // Loop over the weights then loop over q, accumulating values 
    126   for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 
    127     // check if fast loop needs to be reset 
    128     if (p0_index == p0_length) { 
    129       //printf("should be here with %d active\n", num_active); 
    130  
    131       // Compute position in polydispersity hypercube and partial weight 
    132       partial_weight = 1.0; 
    133       for (int k=1; k < details->num_active; k++) { 
    134         int pk = details->pd_par[k]; 
    135         int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k]; 
    136         pvec[pk] = pd_value[index]; 
    137         partial_weight *= pd_weight[index]; 
    138         //printf("index[%d] = %d\n",k,index); 
    139         if (pk == details->theta_par) { 
    140           spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6); 
    141         } 
    142       } 
    143       p0_index = loop_index%p0_length; 
    144       //printf("\n"); 
    145     } 
    146  
    147     // Update parameter p0 
    148     weight = partial_weight*pd_weight[p0_offset + p0_index]; 
    149     pvec[p0_par] = pd_value[p0_offset + p0_index]; 
    150     if (p0_is_theta) { 
    151       spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6); 
    152     } 
    153     p0_index++; 
    154     //printf("\n"); 
    155  
    156     // Increment fast index 
    157  
    158     #ifdef INVALID 
    159     if (INVALID(local_values)) continue; 
    160     #endif 
    161  
    162     // Accumulate I(q) 
    163     // Note: weight==0 must always be excluded 
    164     if (weight > cutoff) { 
    165       // spherical correction has some nasty effects when theta is +90 or -90 
    166       // where it becomes zero.  If the entirety of the correction 
    167       weight *= spherical_correction; 
    168       pd_norm += weight * CALL_VOLUME(local_values); 
    169  
    170       const double scattering = CALL_IQ(q, q_index, local_values); 
    171       this_result += weight*scattering; 
    172     } 
    173   } 
    174  
    175   if (q_index < nq) { 
    176     if (pd_stop >= details->pd_prod) { 
    177       // End of the PD loop we can normalize 
    178       double scale, background; 
    179       scale = values[0]; 
    180       background = values[1]; 
    181       result[q_index] = (pd_norm>0. ? scale*this_result/pd_norm + background : background); 
    182     } else { 
    183       // Partial result, so remember it but don't normalize it. 
    184       result[q_index] = this_result; 
    185     } 
    186  
    187     // Remember the updated norm. 
    188     if (q_index == 0) result[nq] = pd_norm; 
    189   } 
    190  
    191 #endif // MAX_PD > 0 
     296    ++step; 
     297#if MAX_PD>0 
     298    if (step >= pd_stop) break; 
     299    ++i0; 
     300  } 
     301  i0 = 0; 
     302#endif 
     303#if MAX_PD>1 
     304    if (step >= pd_stop) break; 
     305    ++i1; 
     306  } 
     307  i1 = 0; 
     308#endif 
     309#if MAX_PD>2 
     310    if (step >= pd_stop) break; 
     311    ++i2; 
     312  } 
     313  i2 = 0; 
     314#endif 
     315#if MAX_PD>3 
     316    if (step >= pd_stop) break; 
     317    ++i3; 
     318  } 
     319  i3 = 0; 
     320#endif 
     321#if MAX_PD>4 
     322    if (step >= pd_stop) break; 
     323    ++i4; 
     324  } 
     325  i4 = 0; 
     326#endif 
     327 
     328//if (q_index==0) printf("res: %g/%g\n", this_result, pd_norm); 
     329  // Remember the current result and the updated norm. 
     330  result[q_index] = this_result; 
     331  if (q_index == 0) result[nq] = pd_norm; 
    192332} 
  • sasmodels/kernelcl.py

    r32e3c9b r9eb3632  
    385385        self.program = None 
    386386 
    387     def make_kernel(self, q_vectors, magnetic=False): 
     387    def make_kernel(self, q_vectors): 
    388388        # type: (List[np.ndarray]) -> "GpuKernel" 
    389389        if self.program is None: 
     
    391391            self.program = compiler(self.info.name, self.source, 
    392392                                    self.dtype, self.fast) 
     393            names = [generate.kernel_name(self.info, variant) 
     394                     for variant in ("Iq", "Iqxy", "Imagnetic")] 
     395            self._kernels = [getattr(self.program, name) for name in names] 
    393396        is_2d = len(q_vectors) == 2 
    394         variant = "Imagnetic" if magnetic else "Iqxy" if is_2d else "Iq" 
    395         kernel_name = generate.kernel_name(self.info, variant) 
    396         kernel = getattr(self.program, kernel_name) 
     397        kernel = self._kernels[1:3] if is_2d else [self._kernels[0]]*2 
    397398        return GpuKernel(kernel, self.dtype, self.info, q_vectors) 
    398399 
     
    529530                             hostbuf=values) 
    530531 
     532        kernel = self.kernel[1 if magnetic else 0] 
     533        args = [ 
     534            np.uint32(self.q_input.nq), None, None, 
     535            details_b, values_b, self.q_input.q_b, self.result_b, 
     536            self.real(cutoff), 
     537        ] 
     538        #print("Calling OpenCL") 
     539        #print("values",values) 
    531540        # Call kernel and retrieve results 
     541        last_call = None 
    532542        step = 100 
    533         #print("calling OpenCL") 
    534543        for start in range(0, call_details.pd_prod, step): 
    535544            stop = min(start+step, call_details.pd_prod) 
    536             args = [ 
    537                 np.uint32(self.q_input.nq), np.int32(start), np.int32(stop), 
    538                 details_b, values_b, self.q_input.q_b, self.result_b, 
    539                 self.real(cutoff), 
    540             ] 
    541             self.kernel(self.queue, self.q_input.global_size, None, *args) 
     545            #print("queuing",start,stop) 
     546            args[1:3] = [np.int32(start), np.int32(stop)] 
     547            last_call = [kernel(self.queue, self.q_input.global_size, 
     548                                None, *args, wait_for=last_call)] 
    542549        cl.enqueue_copy(self.queue, self.result, self.result_b) 
    543550 
     
    546553            if v is not None: v.release() 
    547554 
    548         return self.result[:self.q_input.nq] 
     555        scale = values[0]/self.result[self.q_input.nq] 
     556        background = values[1] 
     557        #print("scale",scale,background) 
     558        return scale*self.result[:self.q_input.nq] + background 
     559        # return self.result[:self.q_input.nq] 
    549560 
    550561    def release(self): 
  • sasmodels/kerneldll.py

    rb966a96 r9eb3632  
    270270        # int, int, int, int*, double*, double*, double*, double*, double 
    271271        argtypes = [c_int32]*3 + [c_void_p]*4 + [fp] 
    272         self._Iq = self._dll[generate.kernel_name(self.info, "Iq")] 
    273         self._Iqxy = self._dll[generate.kernel_name(self.info, "Iqxy")] 
    274         self._Imagnetic = self._dll[generate.kernel_name(self.info, "Imagnetic")] 
    275         self._Iq.argtypes = argtypes 
    276         self._Iqxy.argtypes = argtypes 
    277         self._Imagnetic.argtypes = argtypes 
     272        names = [generate.kernel_name(self.info, variant) 
     273                 for variant in ("Iq", "Iqxy", "Imagnetic")] 
     274        self._kernels = [self._dll[name] for name in names] 
     275        for k in self._kernels: 
     276            k.argtypes = argtypes 
    278277 
    279278    def __getstate__(self): 
     
    286285        self._dll = None 
    287286 
    288     def make_kernel(self, q_vectors, magnetic=False): 
     287    def make_kernel(self, q_vectors): 
    289288        # type: (List[np.ndarray]) -> DllKernel 
    290289        q_input = PyInput(q_vectors, self.dtype) 
     
    292291        if self._dll is None: 
    293292            self._load_dll() 
    294         kernel = [self._Iqxy, self._Imagnetic] if q_input.is_2d else [self._Iq]*2 
     293        is_2d = len(q_vectors) == 2 
     294        kernel = self._kernels[1:3] if is_2d else [self._kernels[0]]*2 
    295295        return DllKernel(kernel, self.info, q_input) 
    296296 
     
    346346        # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 
    347347 
    348         #print("in kerneldll") 
    349         #print("values", values) 
    350         start, stop = 0, call_details.pd_prod 
     348        kernel = self.kernel[1 if magnetic else 0] 
    351349        args = [ 
    352350            self.q_input.nq, # nq 
    353             start, # pd_start 
    354             stop, # pd_stop pd_stride[MAX_PD] 
     351            None, # pd_start 
     352            None, # pd_stop pd_stride[MAX_PD] 
    355353            call_details.buffer.ctypes.data, # problem 
    356354            values.ctypes.data,  #pars 
     
    358356            self.result.ctypes.data,   # results 
    359357            self.real(cutoff), # cutoff 
    360             ] 
    361         #print("calling DLL") 
    362         self.kernel[1 if magnetic else 0](*args) # type: ignore 
    363         return self.result[:-1] 
     358        ] 
     359        #print("kerneldll values", values) 
     360        step = 100 
     361        for start in range(0, call_details.pd_prod, step): 
     362            stop = min(start+step, call_details.pd_prod) 
     363            args[1:3] = [start, stop] 
     364            #print("calling DLL") 
     365            kernel(*args) # type: ignore 
     366 
     367        #print("returned",self.q_input.q, self.result) 
     368        scale = values[0]/self.result[self.q_input.nq] 
     369        background = values[1] 
     370        #print("scale",scale,background) 
     371        return scale*self.result[:self.q_input.nq] + background 
    364372 
    365373    def release(self): 
  • sasmodels/model_test.py

    r639c4e3 r9eb3632  
    5151 
    5252from .core import list_models, load_model_info, build_model, HAVE_OPENCL 
    53 from .kernel import dispersion_mesh 
     53from .details import dispersion_mesh 
    5454from .direct_model import call_kernel, get_weights 
    5555from .exception import annotate_exception 
  • sasmodels/modelinfo.py

    r32e3c9b r9eb3632  
    416416 
    417417        self.npars = sum(p.length for p in self.kernel_parameters) 
    418         self.num_magnetic = sum(p.length for p in self.kernel_parameters 
    419                                 if p.type=='sld') 
     418        self.nmagnetic = sum(p.length for p in self.kernel_parameters 
     419                             if p.type=='sld') 
     420        self.nvalues = 2 + self.npars 
     421        if self.nmagnetic: 
     422            self.nvalues += 3 + 3*self.nmagnetic 
    420423 
    421424        self.call_parameters = self._get_call_parameters() 
     
    522525 
    523526        # Add the magnetic parameters to the end of the call parameter list. 
    524         if self.num_magnetic > 0: 
     527        if self.nmagnetic > 0: 
    525528            full_list.extend([ 
    526529                Parameter('up:frac_i', '', 0., [0., 1.], 
  • sasmodels/models/hardsphere.py

    rd2bb604 r9eb3632  
    7373    """ 
    7474 
    75 Iq = """ 
     75Iq = r""" 
    7676      double D,A,B,G,X,X2,X4,S,C,FF,HARDSPH; 
    7777      // these are c compiler instructions, can also put normal code inside the "if else" structure  
     
    8787      if(fabs(radius_effective) < 1.E-12) { 
    8888               HARDSPH=1.0; 
     89//printf("HS1 %g: %g\n",q,HARDSPH); 
    8990               return(HARDSPH); 
    9091      } 
     
    9899      if(X < 5.E-06) { 
    99100                 HARDSPH=1./A; 
     101//printf("HS2 %g: %g\n",q,HARDSPH); 
    100102                 return(HARDSPH); 
    101103      } 
     
    122124            //FF = (8 +2.*volfraction + ( volfraction/4. -0.8 +(volfraction/100. -1./35.)*X2 )*X2 )*A  + (3.0 -X2/3. +X4/40.)*2.*B; 
    123125            HARDSPH= 1./(1. + volfraction*FF ); 
     126//printf("HS3 %g: %g\n",q,HARDSPH); 
    124127            return(HARDSPH); 
    125128      } 
     
    146149//      FF=A*(S/X3-C/X2) + B*(2.*S/X3 - C/X2 +2.0*(C-1.0)/X4) + G*( (4./X -24./X3)*S -(1.0 -12./X2 +24./X4)*C +24./X4 )/X2; 
    147150//      HARDSPH= 1./(1. + 24.*volfraction*FF ); 
     151//printf("HS4 %g: %g\n",q,HARDSPH); 
    148152      return(HARDSPH); 
    149153   """ 
  • sasmodels/product.py

    r0ff62d4 r9eb3632  
    1414 
    1515from .modelinfo import suffix_parameter, ParameterTable, ModelInfo 
    16 from .kernel import KernelModel, Kernel, dispersion_mesh 
     16from .kernel import KernelModel, Kernel 
     17from .details import dispersion_mesh 
    1718 
    1819try: 
  • sasmodels/sasview_model.py

    r6a0d6aa r9eb3632  
    2323from . import weights 
    2424from . import modelinfo 
    25 from . import kernel 
     25from .details import build_details, dispersion_mesh 
    2626 
    2727try: 
     
    513513        parameters = self._model_info.parameters 
    514514        pairs = [self._get_weights(p) for p in parameters.call_parameters] 
    515         call_details, values = kernel.build_details(calculator, pairs) 
    516         # TODO: should test for 2d? 
    517         magnetic = any(values[k]!=0 for k in parameters.magnetism_index) 
     515        call_details, values, is_magnetic = build_details(calculator, pairs) 
    518516        result = calculator(call_details, values, cutoff=self.cutoff, 
    519                             magnetic=magnetic) 
     517                            magnetic=is_magnetic) 
    520518        calculator.release() 
    521519        return result 
     
    591589                for p in self._model_info.parameters.call_parameters 
    592590                if p.type == 'volume'] 
    593         return kernel.dispersion_mesh(self._model_info, pars) 
     591        return dispersion_mesh(self._model_info, pars) 
    594592 
    595593    def _get_weights(self, par): 
Note: See TracChangeset for help on using the changeset viewer.