source: sasmodels/sasmodels/compare_many.py @ ed048b2

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since ed048b2 was ed048b2, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

fix multi-compare

  • Property mode set to 100755
File size: 8.7 KB
Line 
1#!/usr/bin/env python
2"""
3Program to compare results from many random parameter sets for a given model.
4
5The result is a comma separated value (CSV) table that can be redirected
6from standard output into a file and loaded into a spreadsheet.
7
8The models are compared for each parameter set and if the difference is
9greater than expected for that precision, the parameter set is labeled
10as bad and written to the output, along with the random seed used to
11generate that parameter value.  This seed can be used with :mod:`compare`
12to reload and display the details of the model.
13"""
14from __future__ import print_function
15
16import sys
17import traceback
18
19import numpy as np
20
21from . import core
22from . import generate
23from .compare import (MODELS, randomize_pars, suppress_pd, make_data,
24                      make_engine, get_demo_pars, columnize,
25                      constrain_pars, constrain_new_to_old)
26
27def calc_stats(target, value, index):
28    """
29    Calculate statistics between the target value and the computed value.
30
31    *target* and *value* are the vectors being compared, with the
32    difference normalized by *target* to get relative error.  Only
33    the elements listed in *index* are used, though index may be
34    and empty slice defined by *slice(None, None)*.
35
36    Returns:
37
38        *maxrel* the maximum relative difference
39
40        *rel95* the relative difference with the 5% biggest differences ignored
41
42        *maxabs* the maximum absolute difference for the 5% biggest differences
43
44        *maxval* the maximum value for the 5% biggest differences
45    """
46    resid = abs(value-target)[index]
47    relerr = resid/target[index]
48    sorted_rel_index = np.argsort(relerr)
49    #p90 = int(len(relerr)*0.90)
50    p95 = int(len(relerr)*0.95)
51    maxrel = np.max(relerr)
52    rel95 = relerr[sorted_rel_index[p95]]
53    maxabs = np.max(resid[sorted_rel_index[p95:]])
54    maxval = np.max(value[sorted_rel_index[p95:]])
55    return maxrel, rel95, maxabs, maxval
56
57def print_column_headers(pars, parts):
58    """
59    Generate column headers for the differences and for the parameters,
60    and print them to standard output.
61    """
62    stats = list('Max rel err|95% rel err|Max abs err above 90% rel|Max value above 90% rel'.split('|'))
63    groups = ['']
64    for p in parts:
65        groups.append(p)
66        groups.extend(['']*(len(stats)-1))
67    groups.append("Parameters")
68    columns = ['Seed'] + stats*len(parts) +  list(sorted(pars.keys()))
69    print(','.join('"%s"'%c for c in groups))
70    print(','.join('"%s"'%c for c in columns))
71
72# Target 'good' value for various precision levels.
73PRECISION = {
74    'fast': 1e-3,
75    'half': 1e-3,
76    'single': 5e-5,
77    'double': 5e-14,
78    'single!': 5e-5,
79    'double!': 5e-14,
80    'quad!': 5e-18,
81    'sasview': 5e-14,
82}
83def compare_instance(name, data, index, N=1, mono=True, cutoff=1e-5,
84                     base='sasview', comp='double'):
85    r"""
86    Compare the model under different calculation engines.
87
88    *name* is the name of the model.
89
90    *data* is the data object giving $q, \Delta q$ calculation points.
91
92    *index* is the active set of points.
93
94    *N* is the number of comparisons to make.
95
96    *cutoff* is the polydispersity weight cutoff to make the calculation
97    a little bit faster.
98
99    *base* and *comp* are the names of the calculation engines to compare.
100    """
101
102    is_2d = hasattr(data, 'qx_data')
103    model_info = core.load_model_info(name)
104    pars = get_demo_pars(model_info)
105    header = ('\n"Model","%s","Count","%d","Dimension","%s"'
106              % (name, N, "2D" if is_2d else "1D"))
107    if not mono: header += ',"Cutoff",%g'%(cutoff,)
108    print(header)
109
110    if is_2d:
111        if not model_info['has_2d']:
112            print(',"1-D only"')
113            return
114
115    # Some not very clean macros for evaluating the models and checking the
116    # results.  They freely use variables from the current scope, even some
117    # which have not been defined yet, complete with abuse of mutable lists
118    # to allow them to update values in the current scope since nonlocal
119    # declarations are not available in python 2.7.
120    def try_model(fn, pars):
121        """
122        Return the model evaluated at *pars*.  If there is an exception,
123        print it and return NaN of the right shape.
124        """
125        try:
126            result = fn(**pars)
127        except KeyboardInterrupt:
128            raise
129        except:
130            traceback.print_exc()
131            print("when comparing %s for %d"%(name, seed))
132            if hasattr(data, 'qx_data'):
133                result = np.NaN*data.data
134            else:
135                result = np.NaN*data.x
136        return result
137    def check_model(pars):
138        """
139        Run the two calculators against *pars*, returning statistics
140        on the differences.  See :func:`calc_stats` for the list of stats.
141        """
142        base_value = try_model(calc_base, pars)
143        comp_value = try_model(calc_comp, pars)
144        stats = calc_stats(base_value, comp_value, index)
145        max_diff[0] = max(max_diff[0], stats[0])
146        good[0] = good[0] and (stats[0] < expected)
147        return list(stats)
148
149
150    calc_base = make_engine(model_info, data, base, cutoff)
151    calc_comp = make_engine(model_info, data, comp, cutoff)
152    expected = max(PRECISION[base], PRECISION[comp])
153
154    num_good = 0
155    first = True
156    max_diff = [0]
157    for k in range(N):
158        print("%s %d"%(name, k), file=sys.stderr)
159        seed = np.random.randint(1e6)
160        pars_i = randomize_pars(pars, seed)
161        constrain_pars(model_info, pars_i)
162        constrain_new_to_old(model_info, pars_i)
163        if mono:
164            pars_i = suppress_pd(pars_i)
165
166        good = [True]
167        columns = check_model(pars_i)
168        columns += [v for _, v in sorted(pars_i.items())]
169        if first:
170            labels = [" vs. ".join((calc_base.engine, calc_comp.engine))]
171            print_column_headers(pars_i, labels)
172            first = False
173        if good[0]:
174            num_good += 1
175        else:
176            print(("%d,"%seed)+','.join("%s"%v for v in columns))
177    print('"good","%d/%d","max diff",%g'%(num_good, N, max_diff[0]))
178
179
180def print_usage():
181    """
182    Print the command usage string.
183    """
184    print("usage: compare_many.py MODEL COUNT (1dNQ|2dNQ) (CUTOFF|mono) (single|double|quad)")
185
186
187def print_models():
188    """
189    Print the list of available models in columns.
190    """
191    print(columnize(MODELS, indent="  "))
192
193
194def print_help():
195    """
196    Print usage string, the option description and the list of available models.
197    """
198    print_usage()
199    print("""\
200
201MODEL is the model name of the model or "all" for all the models
202in alphabetical order.
203
204COUNT is the number of randomly generated parameter sets to try. A value
205of "10000" is a reasonable check for monodisperse models, or "100" for
206polydisperse models.   For a quick check, use "100" and "5" respectively.
207
208NQ is the number of Q values to calculate.  If it starts with "1d", then
209it is a 1-dimensional problem, with log spaced Q points from 1e-3 to 1.0.
210If it starts with "2d" then it is a 2-dimensional problem, with linearly
211spaced points Q points from -1.0 to 1.0 in each dimension. The usual
212values are "1d100" for 1-D and "2d32" for 2-D.
213
214CUTOFF is the cutoff value to use for the polydisperse distribution. Weights
215below the cutoff will be ignored.  Use "mono" for monodisperse models.  The
216choice of polydisperse parameters, and the number of points in the distribution
217is set in compare.py defaults for each model.
218
219PRECISION is the floating point precision to use for comparisons.  If two
220precisions are given, then compare one to the other, ignoring sasview.
221
222Available models:
223""")
224    print_models()
225
226def main():
227    """
228    Main program.
229    """
230    if len(sys.argv) not in (6, 7):
231        print_help()
232        sys.exit(1)
233
234    model = sys.argv[1]
235    if not (model in MODELS) and (model != "all"):
236        print('Bad model %s.  Use "all" or one of:'%model)
237        print_models()
238        sys.exit(1)
239    try:
240        count = int(sys.argv[2])
241        is2D = sys.argv[3].startswith('2d')
242        assert sys.argv[3][1] == 'd'
243        Nq = int(sys.argv[3][2:])
244        mono = sys.argv[4] == 'mono'
245        cutoff = float(sys.argv[4]) if not mono else 0
246        base = sys.argv[5]
247        comp = sys.argv[6] if len(sys.argv) > 6 else "sasview"
248    except:
249        traceback.print_exc()
250        print_usage()
251        sys.exit(1)
252
253    data, index = make_data({'qmax':1.0, 'is2d':is2D, 'nq':Nq, 'res':0.,
254                             'accuracy': 'Low', 'view':'log'})
255    model_list = [model] if model != "all" else MODELS
256    for model in model_list:
257        compare_instance(model, data, index, N=count, mono=mono,
258                         cutoff=cutoff, base=base, comp=comp)
259
260if __name__ == "__main__":
261    #from .compare import push_seed
262    #with push_seed(1): main()
263    main()
Note: See TracBrowser for help on using the repository browser.