source: sasmodels/sasmodels/compare_many.py @ 7ae2b7f

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 7ae2b7f was 7ae2b7f, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

still more linting; ignore numpy types

  • Property mode set to 100755
File size: 8.7 KB
Line 
1#!/usr/bin/env python
2"""
3Program to compare results from many random parameter sets for a given model.
4
5The result is a comma separated value (CSV) table that can be redirected
6from standard output into a file and loaded into a spreadsheet.
7
8The models are compared for each parameter set and if the difference is
9greater than expected for that precision, the parameter set is labeled
10as bad and written to the output, along with the random seed used to
11generate that parameter value.  This seed can be used with :mod:`compare`
12to reload and display the details of the model.
13"""
14from __future__ import print_function
15
16import sys
17import traceback
18
19import numpy as np  # type: ignore
20
21from . import core
22from .compare import (MODELS, randomize_pars, suppress_pd, make_data,
23                      make_engine, get_pars, columnize,
24                      constrain_pars, constrain_new_to_old)
25
26def calc_stats(target, value, index):
27    """
28    Calculate statistics between the target value and the computed value.
29
30    *target* and *value* are the vectors being compared, with the
31    difference normalized by *target* to get relative error.  Only
32    the elements listed in *index* are used, though index may be
33    and empty slice defined by *slice(None, None)*.
34
35    Returns:
36
37        *maxrel* the maximum relative difference
38
39        *rel95* the relative difference with the 5% biggest differences ignored
40
41        *maxabs* the maximum absolute difference for the 5% biggest differences
42
43        *maxval* the maximum value for the 5% biggest differences
44    """
45    resid = abs(value-target)[index]
46    relerr = resid/target[index]
47    sorted_rel_index = np.argsort(relerr)
48    #p90 = int(len(relerr)*0.90)
49    p95 = int(len(relerr)*0.95)
50    maxrel = np.max(relerr)
51    rel95 = relerr[sorted_rel_index[p95]]
52    maxabs = np.max(resid[sorted_rel_index[p95:]])
53    maxval = np.max(value[sorted_rel_index[p95:]])
54    return maxrel, rel95, maxabs, maxval
55
56def print_column_headers(pars, parts):
57    """
58    Generate column headers for the differences and for the parameters,
59    and print them to standard output.
60    """
61    stats = list('Max rel err|95% rel err|Max abs err above 90% rel|Max value above 90% rel'.split('|'))
62    groups = ['']
63    for p in parts:
64        groups.append(p)
65        groups.extend(['']*(len(stats)-1))
66    groups.append("Parameters")
67    columns = ['Seed'] + stats*len(parts) +  list(sorted(pars.keys()))
68    print(','.join('"%s"'%c for c in groups))
69    print(','.join('"%s"'%c for c in columns))
70
71# Target 'good' value for various precision levels.
72PRECISION = {
73    'fast': 1e-3,
74    'half': 1e-3,
75    'single': 5e-5,
76    'double': 5e-14,
77    'single!': 5e-5,
78    'double!': 5e-14,
79    'quad!': 5e-18,
80    'sasview': 5e-14,
81}
82def compare_instance(name, data, index, N=1, mono=True, cutoff=1e-5,
83                     base='sasview', comp='double'):
84    r"""
85    Compare the model under different calculation engines.
86
87    *name* is the name of the model.
88
89    *data* is the data object giving $q, \Delta q$ calculation points.
90
91    *index* is the active set of points.
92
93    *N* is the number of comparisons to make.
94
95    *cutoff* is the polydispersity weight cutoff to make the calculation
96    a little bit faster.
97
98    *base* and *comp* are the names of the calculation engines to compare.
99    """
100
101    is_2d = hasattr(data, 'qx_data')
102    model_info = core.load_model_info(name)
103    pars = get_pars(model_info, use_demo=True)
104    header = ('\n"Model","%s","Count","%d","Dimension","%s"'
105              % (name, N, "2D" if is_2d else "1D"))
106    if not mono: header += ',"Cutoff",%g'%(cutoff,)
107    print(header)
108
109    if is_2d:
110        if not model_info['parameters'].has_2d:
111            print(',"1-D only"')
112            return
113
114    # Some not very clean macros for evaluating the models and checking the
115    # results.  They freely use variables from the current scope, even some
116    # which have not been defined yet, complete with abuse of mutable lists
117    # to allow them to update values in the current scope since nonlocal
118    # declarations are not available in python 2.7.
119    def try_model(fn, pars):
120        """
121        Return the model evaluated at *pars*.  If there is an exception,
122        print it and return NaN of the right shape.
123        """
124        try:
125            result = fn(**pars)
126        except Exception:
127            traceback.print_exc()
128            print("when comparing %s for %d"%(name, seed))
129            if hasattr(data, 'qx_data'):
130                result = np.NaN*data.data
131            else:
132                result = np.NaN*data.x
133        return result
134    def check_model(pars):
135        """
136        Run the two calculators against *pars*, returning statistics
137        on the differences.  See :func:`calc_stats` for the list of stats.
138        """
139        base_value = try_model(calc_base, pars)
140        comp_value = try_model(calc_comp, pars)
141        stats = calc_stats(base_value, comp_value, index)
142        max_diff[0] = max(max_diff[0], stats[0])
143        good[0] = good[0] and (stats[0] < expected)
144        return list(stats)
145
146
147    calc_base = make_engine(model_info, data, base, cutoff)
148    calc_comp = make_engine(model_info, data, comp, cutoff)
149    expected = max(PRECISION[base], PRECISION[comp])
150
151    num_good = 0
152    first = True
153    max_diff = [0]
154    for k in range(N):
155        print("%s %d"%(name, k), file=sys.stderr)
156        seed = np.random.randint(1e6)
157        pars_i = randomize_pars(pars, seed)
158        constrain_pars(model_info, pars_i)
159        constrain_new_to_old(model_info, pars_i)
160        if mono:
161            pars_i = suppress_pd(pars_i)
162
163        good = [True]
164        columns = check_model(pars_i)
165        columns += [v for _, v in sorted(pars_i.items())]
166        if first:
167            labels = [" vs. ".join((calc_base.engine, calc_comp.engine))]
168            print_column_headers(pars_i, labels)
169            first = False
170        if good[0]:
171            num_good += 1
172        else:
173            print(("%d,"%seed)+','.join("%s"%v for v in columns))
174    print('"good","%d/%d","max diff",%g'%(num_good, N, max_diff[0]))
175
176
177def print_usage():
178    """
179    Print the command usage string.
180    """
181    print("usage: compare_many.py MODEL COUNT (1dNQ|2dNQ) (CUTOFF|mono) (single|double|quad)")
182
183
184def print_models():
185    """
186    Print the list of available models in columns.
187    """
188    print(columnize(MODELS, indent="  "))
189
190
191def print_help():
192    """
193    Print usage string, the option description and the list of available models.
194    """
195    print_usage()
196    print("""\
197
198MODEL is the model name of the model or "all" for all the models
199in alphabetical order.
200
201COUNT is the number of randomly generated parameter sets to try. A value
202of "10000" is a reasonable check for monodisperse models, or "100" for
203polydisperse models.   For a quick check, use "100" and "5" respectively.
204
205NQ is the number of Q values to calculate.  If it starts with "1d", then
206it is a 1-dimensional problem, with log spaced Q points from 1e-3 to 1.0.
207If it starts with "2d" then it is a 2-dimensional problem, with linearly
208spaced points Q points from -1.0 to 1.0 in each dimension. The usual
209values are "1d100" for 1-D and "2d32" for 2-D.
210
211CUTOFF is the cutoff value to use for the polydisperse distribution. Weights
212below the cutoff will be ignored.  Use "mono" for monodisperse models.  The
213choice of polydisperse parameters, and the number of points in the distribution
214is set in compare.py defaults for each model.
215
216PRECISION is the floating point precision to use for comparisons.  If two
217precisions are given, then compare one to the other, ignoring sasview.
218
219Available models:
220""")
221    print_models()
222
223def main():
224    """
225    Main program.
226    """
227    if len(sys.argv) not in (6, 7):
228        print_help()
229        sys.exit(1)
230
231    model = sys.argv[1]
232    if not (model in MODELS) and (model != "all"):
233        print('Bad model %s.  Use "all" or one of:'%model)
234        print_models()
235        sys.exit(1)
236    try:
237        count = int(sys.argv[2])
238        is2D = sys.argv[3].startswith('2d')
239        assert sys.argv[3][1] == 'd'
240        Nq = int(sys.argv[3][2:])
241        mono = sys.argv[4] == 'mono'
242        cutoff = float(sys.argv[4]) if not mono else 0
243        base = sys.argv[5]
244        comp = sys.argv[6] if len(sys.argv) > 6 else "sasview"
245    except Exception:
246        traceback.print_exc()
247        print_usage()
248        sys.exit(1)
249
250    data, index = make_data({'qmax':1.0, 'is2d':is2D, 'nq':Nq, 'res':0.,
251                             'accuracy': 'Low', 'view':'log'})
252    model_list = [model] if model != "all" else MODELS
253    for model in model_list:
254        compare_instance(model, data, index, N=count, mono=mono,
255                         cutoff=cutoff, base=base, comp=comp)
256
257if __name__ == "__main__":
258    #from .compare import push_seed
259    #with push_seed(1): main()
260    main()
Note: See TracBrowser for help on using the repository browser.