Changeset ee72c70 in sasmodels


Ignore:
Timestamp:
Apr 18, 2016 7:43:26 PM (9 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
51973e1
Parents:
ae2b6b5 (diff), 1bf66d9 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'polydisp' of github.com:sasview/sasmodels into polydisp

Location:
sasmodels
Files:
1 added
9 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/model_test.py

    r0ff62d4 r38a9b07  
    259259                    self.assertTrue(np.isfinite(actual_yi), 
    260260                                    'invalid f(%s): %s' % (xi, actual_yi)) 
     261                elif np.isnan(yi): 
     262                    self.assertTrue(np.isnan(actual_yi), 
     263                                    'f(%s): expected:%s; actual:%s' 
     264                                    % (xi, yi, actual_yi)) 
    261265                else: 
    262                     self.assertTrue(is_near(yi, actual_yi, 5), 
     266                    # is_near does not work for infinite values, so also test 
     267                    # for exact values.  Note that this will not 
     268                    self.assertTrue(yi==actual_yi or is_near(yi, actual_yi, 5), 
    263269                                    'f(%s); expected:%s; actual:%s' 
    264270                                    % (xi, yi, actual_yi)) 
  • sasmodels/models/__init__.py

    r32c160a r1ca1fd9  
     1""" 
     2    1D Modeling for SAS 
     3""" 
     4#from sas.models import * 
     5 
     6import os 
     7from distutils.filelist import findall 
     8 
     9__version__ = "2.1.0" 
     10 
     11def get_data_path(media): 
     12    """ 
     13    """ 
     14    # Check for data path in the package 
     15    path = os.path.join(os.path.dirname(__file__), media) 
     16    if os.path.isdir(path): 
     17        return path 
     18 
     19    # Check for data path next to exe/zip file. 
     20    # If we are inside a py2exe zip file, we need to go up 
     21    # to get to the directory containing  
     22    # the media for this module 
     23    path = os.path.dirname(__file__) 
     24    #Look for maximum n_dir up of the current dir to find media 
     25    n_dir = 12 
     26    for i in range(n_dir): 
     27        path, _ = os.path.split(path) 
     28        media_path = os.path.join(path, media) 
     29        if os.path.isdir(media_path): 
     30            module_media_path = os.path.join(media_path, 'models_media') 
     31            if os.path.isdir(module_media_path): 
     32                return module_media_path 
     33            return media_path 
     34    
     35    raise RuntimeError('Could not find models media files') 
     36 
     37def data_files(): 
     38    """ 
     39    Return the data files associated with media. 
     40     
     41    The format is a list of (directory, [files...]) pairs which can be 
     42    used directly in setup(...,data_files=...) for setup.py. 
     43 
     44    """ 
     45    data_files = [] 
     46    path_img = get_data_path(media=os.path.join("sasmodels","sasmodels","models","img")) 
     47    #path_img = get_data_path(media="img") 
     48    im_list = findall(path_img) 
     49    #for f in findall(path): 
     50    #    if os.path.isfile(f) and f not in im_list: 
     51    #        data_files.append(('media/models_media', [f])) 
     52     
     53    for f in im_list: 
     54        data_files.append(('media/models_media/img', [f])) 
     55    return data_files 
     56 
  • sasmodels/models/porod.py

    rec45c4f r82923a6  
    2626""" 
    2727 
    28 from numpy import sqrt, power 
     28from numpy import sqrt, power, inf, errstate 
    2929 
    3030name = "porod" 
     
    4242    @param q: Input q-value 
    4343    """ 
    44     return 1.0/power(q, 4) 
     44    with errstate(divide='ignore'): 
     45        return power(q, -4) 
    4546 
    4647Iq.vectorized = True  # Iq accepts an array of q values 
     
    5859demo = dict(scale=1.5, background=0.5) 
    5960 
    60 tests = [[{'scale': 0.00001, 'background':0.01}, 0.04, 3.916250]] 
     61tests = [ 
     62    [{'scale': 0.00001, 'background':0.01}, 0.04, 3.916250], 
     63    [{}, 0.0, inf], 
     64] 
  • sasmodels/models/spherical_sld.c

    r299dcce r1bf66d9  
    1 //Headers 
    2 static double form_volume(double thick_inter[], 
    3     double thick_flat_[], 
    4     double core_radius, 
    5     int n_shells); 
    6  
    7 double Iq(double q, 
     1static double form_volume( 
    82    int n_shells, 
    9     double thick_inter[], 
    10     double func_inter[], 
    11     double sld_core, 
    12     double sld_solvent, 
    13     double sld_flat[], 
     3    double radius_core, 
    144    double thick_flat[], 
    15     double nu_inter[], 
    16     int npts_inter, 
    17     double core_radius); 
    18  
    19 double Iqxy(double qx, double qy, 
    20     int n_shells, 
    21     double thick_inter[], 
    22     double func_inter[], 
    23     double sld_core, 
    24     double sld_solvent, 
    25     double sld_flat[], 
    26     double thick_flat[], 
    27     double nu_inter[], 
    28     int npts_inter, 
    29     double core_radius); 
    30  
    31 //Main code 
    32 static double form_volume(double thick_inter[], 
    33     double thick_flat_[], 
    34     double core_radius, 
    35     int n) 
     5    double thick_inter[]) 
    366{ 
    37     double radius = 0.0; 
    387    int i; 
    39     double r = core_radius; 
    40     for (i=0; i < n; i++) { 
     8    double r = radius_core; 
     9    for (i=0; i < n_shells; i++) { 
    4110        r += thick_inter[i]; 
    4211        r += thick_flat[i]; 
     
    5625  double background = dp[6]; 
    5726  double npts = dp[57]; //number of sub_layers in each interface 
    58   double nsl=npts;//21.0; //nsl = Num_sub_layer:  MUST ODD number in double //no other number works now 
     27  double nsl=npts;//21.0; //nsl = Num_sub_layer:  must be ODD double number 
    5928  int n_s; 
    6029 
     
    6332  double pi; 
    6433 
    65   //int* fun_type; 
    66   //double* sld; 
    67   //double* thick_inter; 
    68   //double* thick; 
    69   //double* fun_coef; 
    70  
    7134  double total_thick=0.0; 
    7235 
    73   //fun_type = (int*)malloc((n+2)*sizeof(int)); 
    74   //sld = (double*)malloc((n+2)*sizeof(double)); 
    75   //thick_inter = (double*)malloc((n+2)*sizeof(double)); 
    76   //thick = (double*)malloc((n+2)*sizeof(double)); 
    77   //fun_coef = (double*)malloc((n+2)*sizeof(double)); 
    78  
    79   //TODO: Solution to avoid mallocs but probablyu can be done better 
    8036  int fun_type[12]; 
    8137  double sld[12]; 
     
    175131            if (fabs(slope) > 0.0 ){ 
    176132              //fun = sign * 3.0 * r * (2.0*qr*sin(qr)-((qr*qr)-2.0)*cos(qr))/(qr * qr * qr * qr); 
    177               fun = sign * r * sph_j1c(qr)  +  sign * 3.0 * sin(qr)/(qr * qr * q ) + sign * 6.0 * cos(qr)/(qr * qr * qr * q); 
     133              fun = sign * r * sph_j1c(qr) + sign * 3.0 * sin(qr)/(qr * qr * q ) 
     134                + sign * 6.0 * cos(qr)/(qr * qr * qr * q); 
    178135            } 
    179136          } 
     
    203160    } 
    204161  } 
    205   //vol += vol_sub; 
    206162  f2 = f * f / vol; 
    207   //f2 *= scale; 
    208   //f2 += background; 
    209   //free(fun_type); 
    210   //free(sld); 
    211   //free(thick_inter); 
    212   //free(thick); 
    213   //free(fun_coef); 
    214163 
    215164  return (f2); 
     
    222171 * @return: function value 
    223172 */ 
    224 double Iq(double q, 
     173static double Iq(double q, 
    225174    int n_shells, 
    226     double thick_inter[], 
    227     double func_inter[], 
     175    int npts_inter, 
     176    double radius_core, 
    228177    double sld_core, 
    229178    double sld_solvent, 
    230179    double sld_flat[], 
    231180    double thick_flat[], 
    232     double nu_inter[], 
    233     int npts_inter, 
    234     double core_radius 
    235     ) { 
     181    double func_inter[], 
     182    double thick_inter[], 
     183    double nu_inter[] ) { 
    236184 
    237185    //printf("Number of points %d\n",npts_inter); 
    238186    double intensity; 
    239     //TODO: Remove this container at later stage. It is only kept to minimize stupid errors now 
     187    //TODO: Remove this container at later stage. 
    240188    double dp[60]; 
    241189    dp[0] = n_shells; 
    242190    //This is scale will also have to be removed at some stage 
    243191    dp[1] = 1.0; 
    244     dp[2] = thick_inter_0; 
    245     dp[3] = func_inter_0; 
     192    dp[2] = thick_inter[0]; 
     193    dp[3] = func_inter[0]; 
    246194    dp[4] = sld_core; 
    247195    dp[5] = sld_solvent; 
    248196    dp[6] = 0.0; 
    249  
    250     for (i=0; i<n; i++){ 
     197    dp[7] = sld_flat[0]; 
     198    //TODO: Something is messed up with this data strcucture! 
     199    dp[17] = thick_inter[0]; 
     200    dp[27] = thick_flat[0]; 
     201    dp[37] = func_inter[0]; 
     202    dp[47] = nu_inter[0]; 
     203 
     204    for (int i=1; i<=n_shells; i++){ 
    251205        dp[i+7] = sld_flat[i]; 
    252206        dp[i+17] = thick_inter[i]; 
     
    257211 
    258212    dp[57] = npts_inter; 
    259     dp[58] = nu_inter_0; 
    260     dp[59] = rad_core_0; 
     213    dp[58] = nu_inter[0]; 
     214    dp[59] = radius_core; 
    261215 
    262216    intensity = 1.0e-4*sphere_sld_kernel(dp,q); 
     
    271225 * @return: function value 
    272226 */ 
    273 double Iqxy(double qx, double qy, 
     227 
     228/*static double Iqxy(double qx, double qy, 
    274229    int n_shells, 
    275     double thick_inter[], 
    276     double func_inter[], 
     230    int npts_inter, 
     231    double radius_core 
    277232    double sld_core, 
    278233    double sld_solvent, 
    279234    double sld_flat[], 
    280235    double thick_flat[], 
     236    double func_inter[], 
     237    double thick_inter[], 
    281238    double nu_inter[], 
    282     int npts_inter, 
    283     double core_radius 
    284239    ) { 
    285240 
    286241    double q = sqrt(qx*qx + qy*qy); 
    287     return Iq(q, n_shells, thick_inter_0, func_inter_0, core0_sld, solvent_sld, 
    288     flat1_sld, flat2_sld, flat3_sld, flat4_sld, flat5_sld, 
    289     flat6_sld, flat7_sld, flat8_sld, flat9_sld, flat10_sld, 
    290     thick_inter_1, thick_inter_2, thick_inter_3, thick_inter_4, thick_inter_5, 
    291     thick_inter_6, thick_inter_7, thick_inter_8, thick_inter_9, thick_inter_10, 
    292     thick_flat_1, thick_flat_2, thick_flat_3, thick_flat_4, thick_flat_5, 
    293     thick_flat_6, thick_flat_7, thick_flat_8, thick_flat_9, thick_flat_10, 
    294     func_inter_1, func_inter_2, func_inter_3, func_inter_4, func_inter_5, 
    295     func_inter_6, func_inter_7, func_inter_8, func_inter_9, func_inter_10, 
    296     nu_inter_1, nu_inter_2, nu_inter_3, nu_inter_4, nu_inter_5, 
    297     nu_inter_6, nu_inter_7, nu_inter_8, nu_inter_9, nu_inter_10, 
    298     npts_inter, nu_inter_0, rad_core_0); 
    299  
    300     //TODO: Check if evalute rphi is not needed? 
    301  
    302 } 
    303  
     242    return Iq(q, n_shells, npts_inter, radius_core, sld_core, sld_solvent, 
     243    sld_flat[], thick_flat[], func_inter[], thick_inter[], nu_inter[]) 
     244}*/ 
     245 
  • sasmodels/models/spherical_sld.py

    rd2bb604 r1bf66d9  
    170170# pylint: disable=bad-whitespace, line-too-long 
    171171#            ["name", "units", default, [lower, upper], "type", "description"], 
    172 parameters = [["n",                "",               1,      [0, 9],         "", "number of shells"], 
     172parameters = [["n_shells",                "",               1,      [0, 9],         "", "number of shells"], 
     173              ["npts_inter",       "",               35,     [0, 35],        "", "number of points in each sublayer Must be odd number"], 
    173174              ["radius_core",      "Ang",            50.0,   [0, inf],       "", "intern layer thickness"], 
    174175              ["sld_core",         "1e-6/Ang^2",     2.07,   [-inf, inf],    "", "sld function flat"], 
    175               ["sld_flat[n]",      "1e-6/Ang^2",     4.06,   [-inf, inf],    "", "sld function flat"], 
    176               ["thick_flat[n]",    "Ang",            100.0,  [0, inf],       "", "flat layer_thickness"], 
    177               ["func_inter[n]",    "",               0,      [0, 4],         "", "Erf:0, RPower:1, LPower:2, RExp:3, LExp:4"], 
    178               ["thick_inter[n]",   "Ang",            50.0,   [0, inf],       "", "intern layer thickness"], 
    179               ["inter_nu[n]",      "",               2.5,    [-inf, inf],    "", "steepness parameter"], 
    180               ["npts_inter",       "",               35,     [0, 35],        "", "number of points in each sublayer Must be odd number"], 
    181176              ["sld_solvent",      "1e-6/Ang^2",     1.0,    [-inf, inf],    "", "sld function solvent"], 
     177              ["sld_flat[n_shells]",      "1e-6/Ang^2",     4.06,   [-inf, inf],    "", "sld function flat"], 
     178              ["thick_flat[n_shells]",    "Ang",            100.0,  [0, inf],       "", "flat layer_thickness"], 
     179              ["func_inter[n_shells]",    "",               0,      [0, 4],         "", "Erf:0, RPower:1, LPower:2, RExp:3, LExp:4"], 
     180              ["thick_inter[n_shells]",   "Ang",            50.0,   [0, inf],       "", "intern layer thickness"], 
     181              ["nu_inter[n_shells]",      "",               2.5,    [-inf, inf],    "", "steepness parameter"], 
    182182              ] 
    183183# pylint: enable=bad-whitespace, line-too-long 
    184 #source = ["lib/librefl.c",  "lib/sph_j1c.c", "spherical_sld.c"] 
    185  
    186 def Iq(q, *args, **kw): 
    187     return q 
    188  
    189 def Iqxy(qx, *args, **kw): 
    190     return qx 
    191  
    192  
    193 demo = dict( 
    194     n=4, 
    195     scale=1.0, 
    196     solvent_sld=1.0, 
    197     background=0.0, 
    198     npts_inter=35.0, 
    199     ) 
     184source = ["lib/librefl.c",  "lib/sph_j1c.c", "spherical_sld.c"] 
     185 
     186#def Iq(q, *args, **kw): 
     187#    return q 
     188 
     189#def Iqxy(qx, *args, **kw): 
     190#    return qx 
     191 
     192def profile(n_shells, radius_core,  sld_core,  sld_solvent, sld_flat, 
     193            thick_flat, func_inter, thick_inter, nu_inter, npts_inter): 
     194    """ 
     195    Returns shape profile with x=radius, y=SLD. 
     196    """ 
     197 
     198    z = [] 
     199    beta = [] 
     200    z0 = 0 
     201    # two sld points for core 
     202    z.append(0) 
     203    beta.append(sld_core) 
     204    z.append(radius_core) 
     205    beta.append(sld_core) 
     206    z0 += radius_core 
     207 
     208    for i in range(1, n_shells+2): 
     209        dz = thick_inter[i-1]/npts_inter 
     210        # j=0 for interface, j=1 for flat layer 
     211        for j in range(0, 2): 
     212            # interation for sub-layers 
     213            for n_s in range(0, npts_inter+1): 
     214                if j == 1: 
     215                    if i == n_shells+1: 
     216                        break 
     217                    # shift half sub thickness for the first point 
     218                    z0 -= dz#/2.0 
     219                    z.append(z0) 
     220                    #z0 -= dz/2.0 
     221                    z0 += thick_flat[i] 
     222                    sld_i = sld_flat[i] 
     223                    beta.append(sld_flat[i]) 
     224                    dz = 0 
     225                else: 
     226                    nu = nu_inter[i-1] 
     227                    # decide which sld is which, sld_r or sld_l 
     228                    if i == 1: 
     229                        sld_l = sld_core 
     230                    else: 
     231                        sld_l = sld_flat[i-1] 
     232                    if i == n_shells+1: 
     233                        sld_r = sld_solvent 
     234                    else: 
     235                        sld_r = sld_flat[i] 
     236                    # get function type 
     237                    func_idx = func_inter[i-1] 
     238                    # calculate the sld 
     239                    sld_i = intersldfunc(func_idx, npts_inter, n_s, nu, 
     240                                            sld_l, sld_r) 
     241                # append to the list 
     242                z.append(z0) 
     243                beta.append(sld_i) 
     244                z0 += dz 
     245                if j == 1: 
     246                    break 
     247    z.append(z0) 
     248    beta.append(sld_solvent) 
     249    z_ext = z0/5.0 
     250    z.append(z0+z_ext) 
     251    beta.append(sld_solvent) 
     252    # return sld profile (r, beta) 
     253    return np.asarray(z), np.asarray(beta)*1e-6 
     254 
     255def ER(core_radius, n, thickness): 
     256    return np.sum(thickness[:n[0]], axis=0) + core_radius 
     257 
     258def VR(core_radius, n, thickness): 
     259    return 1.0, 1.0 
     260 
     261 
     262demo = { 
     263    "n_shells":4, 
     264    "npts_inter":35.0, 
     265    "radius_core":50.0, 
     266    "sld_core":2.07, 
     267    "sld_solvent": 1.0, 
     268    "sld_flat":[4.0,3.5,4.0,3.5,4.0], 
     269    "thick_flat":[100.0,100.0,100.0,100.0,100.0], 
     270    "func_inter":[0,0,0,0,0], 
     271    "thick_inter":[50.0,50.0,50.0,50.0,50.0], 
     272    "nu_inter":[2.5,2.5,2.5,2.5,2.5] 
     273    } 
    200274 
    201275#TODO: Not working yet 
  • sasmodels/generate.py

    rf2f67a6 rae2b6b5  
    473473    dll_code = load_template('kernel_iq.c') 
    474474    ocl_code = load_template('kernel_iq.cl') 
     475    #ocl_code = load_template('kernel_iq_local.cl') 
    475476    user_code = [open(f).read() for f in model_sources(model_info)] 
    476477 
  • sasmodels/kernel_iq.c

    rf2f67a6 rae2b6b5  
    5252  // Storage for the current parameter values.  These will be updated as we 
    5353  // walk the polydispersity cube. 
    54   local ParameterBlock local_values;  // current parameter values 
     54  ParameterBlock local_values;  // current parameter values 
    5555  double *pvec = (double *)(&local_values);  // Alias named parameters with a vector 
    5656  double norm; 
     
    7474    norm = CALL_VOLUME(local_values); 
    7575 
    76     const double scale = values[0]; 
    77     const double background = values[1]; 
    78     // result[nq] = norm; // Total volume normalization 
     76    double scale, background; 
     77    scale = values[0]; 
     78    background = values[1]; 
    7979 
    8080    #ifdef USE_OPENMP 
    8181    #pragma omp parallel for 
    8282    #endif 
    83     for (int i=0; i < nq; i++) { 
    84       double scattering = CALL_IQ(q, i, local_values); 
    85       result[i] = (norm>0. ? scale*scattering/norm + background : background); 
     83    for (int q_index=0; q_index < nq; q_index++) { 
     84      double scattering = CALL_IQ(q, q_index, local_values); 
     85      result[q_index] = (norm>0. ? scale*scattering/norm + background : background); 
    8686    } 
    8787    return; 
     
    8989 
    9090#if MAX_PD > 0 
    91   // If it is the first round initialize the result to zero, otherwise 
    92   // assume that the previous result has been passed back. 
    93   // Note: doing this even in the monodisperse case in order to handle the 
    94   // rare case where the model parameters are invalid and zero is returned. 
    95   // So slightly increased cost for slightly smaller code size. 
     91 
     92  // need product of weights at every Iq calc, so keep product of 
     93  // weights from the outer loops so that weight = partial_weight * fast_weight 
     94  double partial_weight; // product of weight w4*w3*w2 but not w1 
     95  double spherical_correction; // cosine correction for latitude variation 
     96  double weight; // product of partial_weight*w1*spherical_correction 
     97 
     98  // Location in the polydispersity hypercube, one index per dimension. 
     99  int pd_index[MAX_PD]; 
     100 
     101  // Location of the coordinated parameters in their own sub-cubes. 
     102  int offset[NPARS]; 
     103 
     104  // Number of coordinated indices 
     105  const int num_coord = details->num_coord; 
     106 
     107  // Number of elements in the longest polydispersity loop 
     108  const int fast_length = details->pd_length[0]; 
     109 
     110  // Trigger the reset behaviour that happens at the end the fast loop 
     111  // by setting the initial index >= weight vector length. 
     112  pd_index[0] = fast_length; 
     113 
     114  // Default the spherical correction to 1.0 in case it is not otherwise set 
     115  spherical_correction = 1.0; 
     116 
     117  // Since we are no longer looping over the entire polydispersity hypercube 
     118  // for each q, we need to track the result and normalization values between 
     119  // calls.  This means initializing them to 0 at the start and accumulating 
     120  // them between calls. 
     121  norm = pd_start == 0 ? 0.0 : result[nq]; 
    96122  if (pd_start == 0) { 
    97123    #ifdef USE_OPENMP 
    98124    #pragma omp parallel for 
    99125    #endif 
    100     for (int i=0; i < nq+1; i++) { 
    101       result[i] = 0.0; 
    102     } 
    103     norm = 0.0; 
    104   } else { 
    105     norm = result[nq]; 
    106   } 
    107  
    108   // need product of weights at every Iq calc, so keep product of 
    109   // weights from the outer loops so that weight = partial_weight * fast_weight 
    110   double partial_weight = NAN; // product of weight w4*w3*w2 but not w1 
    111   double spherical_correction = 1.0;  // cosine correction for latitude variation 
    112  
    113   // Location in the polydispersity hypercube, one index per dimension. 
    114   local int pd_index[MAX_PD]; 
    115  
    116   // Location of the coordinated parameters in their own sub-cubes. 
    117   local int offset[NPARS]; 
    118  
    119   // Trigger the reset behaviour that happens at the end the fast loop 
    120   // by setting the initial index >= weight vector length. 
    121   const int fast_length = details->pd_length[0]; 
    122   pd_index[0] = fast_length; 
    123  
    124   // Number of coordinated indices 
    125   const int num_coord = details->num_coord; 
     126    for (int q_index=0; q_index < nq; q_index++) { 
     127      result[q_index] = 0.0; 
     128    } 
     129  } 
    126130 
    127131  // Loop over the weights then loop over q, accumulating values 
     
    172176    } 
    173177 
    174     // Increment fast index 
    175     const double wi = weights[details->pd_offset[0] + pd_index[0]++]; 
    176     double weight = partial_weight*wi; 
     178    // Update fast parameters 
    177179    //printf("fast %d: ", loop_index); 
    178180    for (int k=0; k < num_coord; k++) { 
     
    189191    //printf("\n"); 
    190192 
     193    // Increment fast index 
     194    const double wi = weights[details->pd_offset[0] + pd_index[0]]; 
     195    weight = partial_weight*wi; 
     196    pd_index[0]++; 
     197 
    191198    #ifdef INVALID 
    192199    if (INVALID(local_values)) continue; 
     
    204211      #pragma omp parallel for 
    205212      #endif 
    206       for (int i=0; i < nq; i++) { 
    207         const double scattering = CALL_IQ(q, i, local_values); 
    208         result[i] += weight*scattering; 
    209       } 
    210     } 
    211   } 
    212  
    213   // End of the PD loop we can normalize 
     213      for (int q_index=0; q_index < nq; q_index++) { 
     214        const double scattering = CALL_IQ(q, q_index, local_values); 
     215        result[q_index] += weight*scattering; 
     216      } 
     217    } 
     218  } 
     219 
    214220  if (pd_stop >= details->total_pd) { 
    215     const double scale = values[0]; 
    216     const double background = values[1]; 
     221    // End of the PD loop we can normalize 
     222    double scale, background; 
     223    scale = values[0]; 
     224    background = values[1]; 
    217225    #ifdef USE_OPENMP 
    218226    #pragma omp parallel for 
    219227    #endif 
    220     for (int i=0; i < nq; i++) { 
    221       result[i] = (norm>0. ? scale*result[i]/norm + background : background); 
     228    for (int q_index=0; q_index < nq; q_index++) { 
     229      result[q_index] = (norm>0. ? scale*result[q_index]/norm + background : background); 
    222230    } 
    223231  } 
  • sasmodels/kernel_iq.cl

    rf2f67a6 rae2b6b5  
    5050    ) 
    5151{ 
    52   double norm; 
    53  
    54   // who we are and what element we are working with 
    55   const int q_index = get_global_id(0); 
    56  
    57   // number of active loops 
    58   const int num_active = details->num_active; 
    59  
    6052  // Storage for the current parameter values.  These will be updated as we 
    6153  // walk the polydispersity cube. 
    6254  ParameterBlock local_values;  // current parameter values 
    6355  double *pvec = (double *)(&local_values);  // Alias named parameters with a vector 
     56  double norm; 
     57 
     58  // who we are and what element we are working with 
     59  const int q_index = get_global_id(0); 
     60 
     61  // number of active loops 
     62  const int num_active = details->num_active; 
     63 
    6464  // Fill in the initial variables 
    65   for (int k = 0; k < NPARS; k++) { 
     65  for (int k=0; k < NPARS; k++) { 
    6666    pvec[k] = values[details->par_offset[k]]; 
    6767  } 
     
    7272    if (INVALID(local_values)) { return; } 
    7373    #endif 
     74    norm = CALL_VOLUME(local_values); 
    7475 
    7576    double scale, background; 
    76     norm = CALL_VOLUME(local_values); 
    7777    scale = values[0]; 
    7878    background = values[1]; 
    79  
    80     // if (i==0) result[nq] = norm; // Total volume normalization 
    8179 
    8280    if (q_index < nq) { 
     
    8987#if MAX_PD > 0 
    9088 
    91   // If it is the first round initialize the result to zero, otherwise 
    92   // assume that the previous result has been passed back. 
    93   // Note: doing this even in the monodisperse case in order to handle the 
    94   // rare case where the model parameters are invalid and zero is returned. 
    95   // So slightly increased cost for slightly smaller code size. 
    9689  double this_result; 
    9790 
     
    10295  // weights from the outer loops so that weight = partial_weight * fast_weight 
    10396  double partial_weight; // product of weight w4*w3*w2 but not w1 
    104   double spherical_correction;  // cosine correction for latitude variation 
     97  double spherical_correction; // cosine correction for latitude variation 
     98  double weight; // product of partial_weight*w1*spherical_correction 
    10599 
    106100  // Location in the polydispersity hypercube, one index per dimension. 
     
    110104  int offset[NPARS]; 
    111105 
     106  // Number of coordinated indices 
     107  const int num_coord = details->num_coord; 
     108 
    112109  // Number of elements in the longest polydispersity loop 
    113110  const int fast_length = details->pd_length[0]; 
    114111 
    115   // Number of coordinated indices 
    116   const int num_coord = details->num_coord; 
    117  
    118   // We could in theory spread this work across different threads, but 
    119   // lets keep it simple; 
    120   norm = pd_start == 0 ? 0.0 : result[nq]; 
    121   spherical_correction = 1.0;  // the usual case. 
    122   // partial_weight = NAN; 
    123112  // Trigger the reset behaviour that happens at the end the fast loop 
    124113  // by setting the initial index >= weight vector length. 
    125114  pd_index[0] = fast_length; 
     115 
     116  // Default the spherical correction to 1.0 in case it is not otherwise set 
     117  spherical_correction = 1.0; 
    126118 
    127119  // Since we are no longer looping over the entire polydispersity hypercube 
     
    129121  // calls.  This means initializing them to 0 at the start and accumulating 
    130122  // them between calls. 
     123  norm = pd_start == 0 ? 0.0 : result[nq]; 
    131124  if (q_index < nq) { 
    132125    this_result = pd_start == 0 ? 0.0 : result[q_index]; 
     
    141134      // Compute position in polydispersity hypercube 
    142135      for (int k=0; k < num_active; k++) { 
    143           pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 
    144           //printf("pd_index[%d] = %d\n",k,pd_index[k]); 
     136        pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 
     137        //printf("pd_index[%d] = %d\n",k,pd_index[k]); 
    145138      } 
    146139 
     
    163156      //printf("slow %d: ", loop_index); 
    164157      for (int k=0; k < num_coord; k++) { 
    165         if (k < num_coord) { 
    166           int par = details->par_coord[k]; 
    167           int coord = details->pd_coord[k]; 
    168           int this_offset = details->par_offset[par]; 
    169           int block_size = 1; 
    170           for (int bit=0; coord != 0; bit++) { 
    171             if (coord&1) { 
    172                 this_offset += block_size * pd_index[bit]; 
    173                 block_size *= details->pd_length[bit]; 
    174             } 
    175             coord >>= 1; 
     158        int par = details->par_coord[k]; 
     159        int coord = details->pd_coord[k]; 
     160        int this_offset = details->par_offset[par]; 
     161        int block_size = 1; 
     162        for (int bit=0; coord != 0; bit++) { 
     163          if (coord&1) { 
     164              this_offset += block_size * pd_index[bit]; 
     165              block_size *= details->pd_length[bit]; 
    176166          } 
    177           offset[par] = this_offset; 
    178           pvec[par] = values[this_offset]; 
    179           //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]); 
    180           // if theta is not coordinated with fast index, precompute spherical correction 
    181           if (par == details->theta_par && !(details->par_coord[k]&1)) { 
    182             spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
    183           } 
     167          coord >>= 1; 
    184168        } 
     169        offset[par] = this_offset; 
     170        pvec[par] = values[this_offset]; 
     171        //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]); 
     172        // if theta is not coordinated with fast index, precompute spherical correction 
     173        if (par == details->theta_par && !(details->par_coord[k]&1)) { 
     174          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
     175        } 
    185176      } 
    186177      //printf("\n"); 
    187178    } 
    188179 
    189     double weight; 
     180    // Update fast parameters 
     181    //printf("fast %d: ", loop_index); 
     182    for (int k=0; k < num_coord; k++) { 
     183      if (details->pd_coord[k]&1) { 
     184        const int par = details->par_coord[k]; 
     185        pvec[par] = values[offset[par]++]; 
     186        //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]); 
     187        // if theta is coordinated with fast index, compute spherical correction each time 
     188        if (par == details->theta_par) { 
     189          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
     190        } 
     191      } 
     192    } 
     193    //printf("\n"); 
     194 
     195    // Increment fast index 
    190196    const double wi = weights[details->pd_offset[0] + pd_index[0]]; 
    191197    weight = partial_weight*wi; 
    192198    pd_index[0]++; 
    193  
    194     // Increment fast index 
    195     //printf("fast %d: ", loop_index); 
    196     for (int k=0; k < num_coord; k++) { 
    197       if (k < num_coord) { 
    198         if (details->pd_coord[k]&1) { 
    199           const int par = details->par_coord[k]; 
    200           pvec[par] = values[offset[par]++]; 
    201           //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]); 
    202           // if theta is coordinated with fast index, compute spherical correction each time 
    203           if (par == details->theta_par) { 
    204             spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
    205           } 
    206         } 
    207       } 
    208     } 
    209     //printf("\n"); 
    210199 
    211200    #ifdef INVALID 
     
    229218    if (pd_stop >= details->total_pd) { 
    230219      // End of the PD loop we can normalize 
    231       const double scale = values[0]; 
    232       const double background = values[1]; 
     220      double scale, background; 
     221      scale = values[0]; 
     222      background = values[1]; 
    233223      result[q_index] = (norm>0. ? scale*this_result/norm + background : background); 
    234224    } else { 
     
    236226      result[q_index] = this_result; 
    237227    } 
    238     // Accumulate norm. 
     228 
     229    // Remember the updated norm. 
    239230    if (q_index == 0) result[nq] = norm; 
    240231  } 
  • sasmodels/kernelcl.py

    rf2f67a6 rae2b6b5  
    511511                             hostbuf=values) 
    512512 
    513         start, stop = 0, call_details.total_pd 
    514         args = [ 
    515             np.uint32(self.q_input.nq), np.int32(start), np.int32(stop), 
    516             details_b, weights_b, values_b, self.q_input.q_b, self.result_b, 
    517             self.real(cutoff), 
    518         ] 
    519         self.kernel(self.queue, self.q_input.global_size, None, *args) 
     513        # Call kernel and retrieve results 
     514        step = 100 
     515        for start in range(0, call_details.total_pd, step): 
     516            stop = min(start+step, call_details.total_pd) 
     517            args = [ 
     518                np.uint32(self.q_input.nq), np.int32(start), np.int32(stop), 
     519                details_b, weights_b, values_b, self.q_input.q_b, self.result_b, 
     520                self.real(cutoff), 
     521            ] 
     522            self.kernel(self.queue, self.q_input.global_size, None, *args) 
    520523        cl.enqueue_copy(self.queue, self.result, self.result_b) 
     524 
     525        # Free buffers 
    521526        for v in (details_b, weights_b, values_b): 
    522527            if v is not None: v.release() 
Note: See TracChangeset for help on using the changeset viewer.