1 | #!/usr/bin/env python |
---|
2 | """ |
---|
3 | Program to compare results from many random parameter sets for a given model. |
---|
4 | |
---|
5 | The result is a comma separated value (CSV) table that can be redirected |
---|
6 | from standard output into a file and loaded into a spreadsheet. |
---|
7 | |
---|
8 | The models are compared for each parameter set and if the difference is |
---|
9 | greater than expected for that precision, the parameter set is labeled |
---|
10 | as bad and written to the output, along with the random seed used to |
---|
11 | generate that parameter value. This seed can be used with :mod:`compare` |
---|
12 | to reload and display the details of the model. |
---|
13 | """ |
---|
14 | from __future__ import print_function |
---|
15 | |
---|
16 | import sys |
---|
17 | import traceback |
---|
18 | |
---|
19 | import numpy as np |
---|
20 | |
---|
21 | from . import core |
---|
22 | from . import generate |
---|
23 | from .compare import (MODELS, randomize_pars, suppress_pd, make_data, |
---|
24 | make_engine, get_demo_pars, columnize, |
---|
25 | constrain_pars, constrain_new_to_old) |
---|
26 | |
---|
27 | def calc_stats(target, value, index): |
---|
28 | """ |
---|
29 | Calculate statistics between the target value and the computed value. |
---|
30 | |
---|
31 | *target* and *value* are the vectors being compared, with the |
---|
32 | difference normalized by *target* to get relative error. Only |
---|
33 | the elements listed in *index* are used, though index may be |
---|
34 | and empty slice defined by *slice(None, None)*. |
---|
35 | |
---|
36 | Returns: |
---|
37 | |
---|
38 | *maxrel* the maximum relative difference |
---|
39 | |
---|
40 | *rel95* the relative difference with the 5% biggest differences ignored |
---|
41 | |
---|
42 | *maxabs* the maximum absolute difference for the 5% biggest differences |
---|
43 | |
---|
44 | *maxval* the maximum value for the 5% biggest differences |
---|
45 | """ |
---|
46 | resid = abs(value-target)[index] |
---|
47 | relerr = resid/target[index] |
---|
48 | sorted_rel_index = np.argsort(relerr) |
---|
49 | #p90 = int(len(relerr)*0.90) |
---|
50 | p95 = int(len(relerr)*0.95) |
---|
51 | maxrel = np.max(relerr) |
---|
52 | rel95 = relerr[sorted_rel_index[p95]] |
---|
53 | maxabs = np.max(resid[sorted_rel_index[p95:]]) |
---|
54 | maxval = np.max(value[sorted_rel_index[p95:]]) |
---|
55 | return maxrel, rel95, maxabs, maxval |
---|
56 | |
---|
57 | def print_column_headers(pars, parts): |
---|
58 | """ |
---|
59 | Generate column headers for the differences and for the parameters, |
---|
60 | and print them to standard output. |
---|
61 | """ |
---|
62 | stats = list('Max rel err|95% rel err|Max abs err above 90% rel|Max value above 90% rel'.split('|')) |
---|
63 | groups = [''] |
---|
64 | for p in parts: |
---|
65 | groups.append(p) |
---|
66 | groups.extend(['']*(len(stats)-1)) |
---|
67 | groups.append("Parameters") |
---|
68 | columns = ['Seed'] + stats*len(parts) + list(sorted(pars.keys())) |
---|
69 | print(','.join('"%s"'%c for c in groups)) |
---|
70 | print(','.join('"%s"'%c for c in columns)) |
---|
71 | |
---|
72 | # Target 'good' value for various precision levels. |
---|
73 | PRECISION = { |
---|
74 | 'fast': 1e-3, |
---|
75 | 'half': 1e-3, |
---|
76 | 'single': 5e-5, |
---|
77 | 'double': 5e-14, |
---|
78 | 'single!': 5e-5, |
---|
79 | 'double!': 5e-14, |
---|
80 | 'quad!': 5e-18, |
---|
81 | 'sasview': 5e-14, |
---|
82 | } |
---|
83 | def compare_instance(name, data, index, N=1, mono=True, cutoff=1e-5, |
---|
84 | base='sasview', comp='double'): |
---|
85 | r""" |
---|
86 | Compare the model under different calculation engines. |
---|
87 | |
---|
88 | *name* is the name of the model. |
---|
89 | |
---|
90 | *data* is the data object giving $q, \Delta q$ calculation points. |
---|
91 | |
---|
92 | *index* is the active set of points. |
---|
93 | |
---|
94 | *N* is the number of comparisons to make. |
---|
95 | |
---|
96 | *cutoff* is the polydispersity weight cutoff to make the calculation |
---|
97 | a little bit faster. |
---|
98 | |
---|
99 | *base* and *comp* are the names of the calculation engines to compare. |
---|
100 | """ |
---|
101 | |
---|
102 | is_2d = hasattr(data, 'qx_data') |
---|
103 | model_definition = core.load_model_definition(name) |
---|
104 | pars = get_demo_pars(model_definition) |
---|
105 | header = ('\n"Model","%s","Count","%d","Dimension","%s"' |
---|
106 | % (name, N, "2D" if is_2d else "1D")) |
---|
107 | if not mono: header += ',"Cutoff",%g'%(cutoff,) |
---|
108 | print(header) |
---|
109 | |
---|
110 | if is_2d: |
---|
111 | info = generate.make_info(model_definition) |
---|
112 | partype = info['partype'] |
---|
113 | if not partype['orientation'] and not partype['magnetic']: |
---|
114 | print(',"1-D only"') |
---|
115 | return |
---|
116 | |
---|
117 | # Some not very clean macros for evaluating the models and checking the |
---|
118 | # results. They freely use variables from the current scope, even some |
---|
119 | # which have not been defined yet, complete with abuse of mutable lists |
---|
120 | # to allow them to update values in the current scope since nonlocal |
---|
121 | # declarations are not available in python 2.7. |
---|
122 | def try_model(fn, pars): |
---|
123 | """ |
---|
124 | Return the model evaluated at *pars*. If there is an exception, |
---|
125 | print it and return NaN of the right shape. |
---|
126 | """ |
---|
127 | try: |
---|
128 | result = fn(**pars) |
---|
129 | except KeyboardInterrupt: |
---|
130 | raise |
---|
131 | except: |
---|
132 | traceback.print_exc() |
---|
133 | print("when comparing %s for %d"%(name, seed)) |
---|
134 | if hasattr(data, 'qx_data'): |
---|
135 | result = np.NaN*data.data |
---|
136 | else: |
---|
137 | result = np.NaN*data.x |
---|
138 | return result |
---|
139 | def check_model(pars): |
---|
140 | """ |
---|
141 | Run the two calculators against *pars*, returning statistics |
---|
142 | on the differences. See :func:`calc_stats` for the list of stats. |
---|
143 | """ |
---|
144 | base_value = try_model(calc_base, pars) |
---|
145 | comp_value = try_model(calc_comp, pars) |
---|
146 | stats = calc_stats(base_value, comp_value, index) |
---|
147 | max_diff[0] = max(max_diff[0], stats[0]) |
---|
148 | good[0] = good[0] and (stats[0] < expected) |
---|
149 | return list(stats) |
---|
150 | |
---|
151 | |
---|
152 | calc_base = make_engine(model_definition, data, base, cutoff) |
---|
153 | calc_comp = make_engine(model_definition, data, comp, cutoff) |
---|
154 | expected = max(PRECISION[base], PRECISION[comp]) |
---|
155 | |
---|
156 | num_good = 0 |
---|
157 | first = True |
---|
158 | max_diff = [0] |
---|
159 | for k in range(N): |
---|
160 | print("%s %d"%(name, k), file=sys.stderr) |
---|
161 | seed = np.random.randint(1e6) |
---|
162 | pars_i = randomize_pars(pars, seed) |
---|
163 | constrain_pars(model_definition, pars_i) |
---|
164 | constrain_new_to_old(model_definition, pars_i) |
---|
165 | if mono: |
---|
166 | pars_i = suppress_pd(pars_i) |
---|
167 | |
---|
168 | good = [True] |
---|
169 | columns = check_model(pars_i) |
---|
170 | columns += [v for _, v in sorted(pars_i.items())] |
---|
171 | if first: |
---|
172 | labels = [" vs. ".join((calc_base.engine, calc_comp.engine))] |
---|
173 | print_column_headers(pars_i, labels) |
---|
174 | first = False |
---|
175 | if good[0]: |
---|
176 | num_good += 1 |
---|
177 | else: |
---|
178 | print(("%d,"%seed)+','.join("%s"%v for v in columns)) |
---|
179 | print('"good","%d/%d","max diff",%g'%(num_good, N, max_diff[0])) |
---|
180 | |
---|
181 | |
---|
182 | def print_usage(): |
---|
183 | """ |
---|
184 | Print the command usage string. |
---|
185 | """ |
---|
186 | print("usage: compare_many.py MODEL COUNT (1dNQ|2dNQ) (CUTOFF|mono) (single|double|quad)") |
---|
187 | |
---|
188 | |
---|
189 | def print_models(): |
---|
190 | """ |
---|
191 | Print the list of available models in columns. |
---|
192 | """ |
---|
193 | print(columnize(MODELS, indent=" ")) |
---|
194 | |
---|
195 | |
---|
196 | def print_help(): |
---|
197 | """ |
---|
198 | Print usage string, the option description and the list of available models. |
---|
199 | """ |
---|
200 | print_usage() |
---|
201 | print("""\ |
---|
202 | |
---|
203 | MODEL is the model name of the model or "all" for all the models |
---|
204 | in alphabetical order. |
---|
205 | |
---|
206 | COUNT is the number of randomly generated parameter sets to try. A value |
---|
207 | of "10000" is a reasonable check for monodisperse models, or "100" for |
---|
208 | polydisperse models. For a quick check, use "100" and "5" respectively. |
---|
209 | |
---|
210 | NQ is the number of Q values to calculate. If it starts with "1d", then |
---|
211 | it is a 1-dimensional problem, with log spaced Q points from 1e-3 to 1.0. |
---|
212 | If it starts with "2d" then it is a 2-dimensional problem, with linearly |
---|
213 | spaced points Q points from -1.0 to 1.0 in each dimension. The usual |
---|
214 | values are "1d100" for 1-D and "2d32" for 2-D. |
---|
215 | |
---|
216 | CUTOFF is the cutoff value to use for the polydisperse distribution. Weights |
---|
217 | below the cutoff will be ignored. Use "mono" for monodisperse models. The |
---|
218 | choice of polydisperse parameters, and the number of points in the distribution |
---|
219 | is set in compare.py defaults for each model. |
---|
220 | |
---|
221 | PRECISION is the floating point precision to use for comparisons. If two |
---|
222 | precisions are given, then compare one to the other, ignoring sasview. |
---|
223 | |
---|
224 | Available models: |
---|
225 | """) |
---|
226 | print_models() |
---|
227 | |
---|
228 | def main(): |
---|
229 | """ |
---|
230 | Main program. |
---|
231 | """ |
---|
232 | if len(sys.argv) not in (6, 7): |
---|
233 | print_help() |
---|
234 | sys.exit(1) |
---|
235 | |
---|
236 | model = sys.argv[1] |
---|
237 | if not (model in MODELS) and (model != "all"): |
---|
238 | print('Bad model %s. Use "all" or one of:'%model) |
---|
239 | print_models() |
---|
240 | sys.exit(1) |
---|
241 | try: |
---|
242 | count = int(sys.argv[2]) |
---|
243 | is2D = sys.argv[3].startswith('2d') |
---|
244 | assert sys.argv[3][1] == 'd' |
---|
245 | Nq = int(sys.argv[3][2:]) |
---|
246 | mono = sys.argv[4] == 'mono' |
---|
247 | cutoff = float(sys.argv[4]) if not mono else 0 |
---|
248 | base = sys.argv[5] |
---|
249 | comp = sys.argv[6] if len(sys.argv) > 6 else "sasview" |
---|
250 | except: |
---|
251 | traceback.print_exc() |
---|
252 | print_usage() |
---|
253 | sys.exit(1) |
---|
254 | |
---|
255 | data, index = make_data({'qmax':1.0, 'is2d':is2D, 'nq':Nq, 'res':0., |
---|
256 | 'accuracy': 'Low', 'view':'log'}) |
---|
257 | model_list = [model] if model != "all" else MODELS |
---|
258 | for model in model_list: |
---|
259 | compare_instance(model, data, index, N=count, mono=mono, |
---|
260 | cutoff=cutoff, base=base, comp=comp) |
---|
261 | |
---|
262 | if __name__ == "__main__": |
---|
263 | main() |
---|