[216a9e1] | 1 | #!/usr/bin/env python |
---|
[d15a908] | 2 | """ |
---|
| 3 | Program to compare results from many random parameter sets for a given model. |
---|
| 4 | |
---|
| 5 | The result is a comma separated value (CSV) table that can be redirected |
---|
| 6 | from standard output into a file and loaded into a spreadsheet. |
---|
| 7 | |
---|
| 8 | The models are compared for each parameter set and if the difference is |
---|
| 9 | greater than expected for that precision, the parameter set is labeled |
---|
| 10 | as bad and written to the output, along with the random seed used to |
---|
| 11 | generate that parameter value. This seed can be used with :mod:`compare` |
---|
| 12 | to reload and display the details of the model. |
---|
| 13 | """ |
---|
[a7f909a] | 14 | from __future__ import print_function |
---|
| 15 | |
---|
[216a9e1] | 16 | import sys |
---|
[7cf2cfd] | 17 | import traceback |
---|
[216a9e1] | 18 | |
---|
| 19 | import numpy as np |
---|
| 20 | |
---|
[e922c5d] | 21 | from . import core |
---|
[a7f909a] | 22 | from . import generate |
---|
| 23 | from .compare import (MODELS, randomize_pars, suppress_pd, make_data, |
---|
| 24 | make_engine, get_demo_pars, columnize, |
---|
| 25 | constrain_pars, constrain_new_to_old) |
---|
[216a9e1] | 26 | |
---|
[319ab14] | 27 | def calc_stats(target, value, index): |
---|
[d15a908] | 28 | """ |
---|
| 29 | Calculate statistics between the target value and the computed value. |
---|
| 30 | |
---|
| 31 | *target* and *value* are the vectors being compared, with the |
---|
| 32 | difference normalized by *target* to get relative error. Only |
---|
| 33 | the elements listed in *index* are used, though index may be |
---|
| 34 | and empty slice defined by *slice(None, None)*. |
---|
| 35 | |
---|
| 36 | Returns: |
---|
| 37 | |
---|
| 38 | *maxrel* the maximum relative difference |
---|
| 39 | |
---|
| 40 | *rel95* the relative difference with the 5% biggest differences ignored |
---|
| 41 | |
---|
| 42 | *maxabs* the maximum absolute difference for the 5% biggest differences |
---|
| 43 | |
---|
| 44 | *maxval* the maximum value for the 5% biggest differences |
---|
| 45 | """ |
---|
[216a9e1] | 46 | resid = abs(value-target)[index] |
---|
| 47 | relerr = resid/target[index] |
---|
[d15a908] | 48 | sorted_rel_index = np.argsort(relerr) |
---|
[7cf2cfd] | 49 | #p90 = int(len(relerr)*0.90) |
---|
[216a9e1] | 50 | p95 = int(len(relerr)*0.95) |
---|
| 51 | maxrel = np.max(relerr) |
---|
[d15a908] | 52 | rel95 = relerr[sorted_rel_index[p95]] |
---|
| 53 | maxabs = np.max(resid[sorted_rel_index[p95:]]) |
---|
| 54 | maxval = np.max(value[sorted_rel_index[p95:]]) |
---|
| 55 | return maxrel, rel95, maxabs, maxval |
---|
[216a9e1] | 56 | |
---|
| 57 | def print_column_headers(pars, parts): |
---|
[d15a908] | 58 | """ |
---|
| 59 | Generate column headers for the differences and for the parameters, |
---|
| 60 | and print them to standard output. |
---|
| 61 | """ |
---|
[216a9e1] | 62 | stats = list('Max rel err|95% rel err|Max abs err above 90% rel|Max value above 90% rel'.split('|')) |
---|
| 63 | groups = [''] |
---|
| 64 | for p in parts: |
---|
| 65 | groups.append(p) |
---|
| 66 | groups.extend(['']*(len(stats)-1)) |
---|
[7cf2cfd] | 67 | groups.append("Parameters") |
---|
[216a9e1] | 68 | columns = ['Seed'] + stats*len(parts) + list(sorted(pars.keys())) |
---|
| 69 | print(','.join('"%s"'%c for c in groups)) |
---|
| 70 | print(','.join('"%s"'%c for c in columns)) |
---|
| 71 | |
---|
[d15a908] | 72 | # Target 'good' value for various precision levels. |
---|
[ec7e360] | 73 | PRECISION = { |
---|
| 74 | 'fast': 1e-3, |
---|
| 75 | 'half': 1e-3, |
---|
| 76 | 'single': 5e-5, |
---|
| 77 | 'double': 5e-14, |
---|
| 78 | 'single!': 5e-5, |
---|
| 79 | 'double!': 5e-14, |
---|
| 80 | 'quad!': 5e-18, |
---|
| 81 | 'sasview': 5e-14, |
---|
| 82 | } |
---|
[319ab14] | 83 | def compare_instance(name, data, index, N=1, mono=True, cutoff=1e-5, |
---|
[ec7e360] | 84 | base='sasview', comp='double'): |
---|
[d15a908] | 85 | r""" |
---|
| 86 | Compare the model under different calculation engines. |
---|
| 87 | |
---|
| 88 | *name* is the name of the model. |
---|
| 89 | |
---|
| 90 | *data* is the data object giving $q, \Delta q$ calculation points. |
---|
| 91 | |
---|
| 92 | *index* is the active set of points. |
---|
| 93 | |
---|
| 94 | *N* is the number of comparisons to make. |
---|
| 95 | |
---|
| 96 | *cutoff* is the polydispersity weight cutoff to make the calculation |
---|
| 97 | a little bit faster. |
---|
| 98 | |
---|
| 99 | *base* and *comp* are the names of the calculation engines to compare. |
---|
| 100 | """ |
---|
| 101 | |
---|
| 102 | is_2d = hasattr(data, 'qx_data') |
---|
[7cf2cfd] | 103 | model_definition = core.load_model_definition(name) |
---|
[cd3dba0] | 104 | pars = get_demo_pars(model_definition) |
---|
[a7f909a] | 105 | header = ('\n"Model","%s","Count","%d","Dimension","%s"' |
---|
[d15a908] | 106 | % (name, N, "2D" if is_2d else "1D")) |
---|
[216a9e1] | 107 | if not mono: header += ',"Cutoff",%g'%(cutoff,) |
---|
| 108 | print(header) |
---|
[7cf2cfd] | 109 | |
---|
[d15a908] | 110 | if is_2d: |
---|
[a7f909a] | 111 | info = generate.make_info(model_definition) |
---|
| 112 | partype = info['partype'] |
---|
| 113 | if not partype['orientation'] and not partype['magnetic']: |
---|
| 114 | print(',"1-D only"') |
---|
| 115 | return |
---|
| 116 | |
---|
[319ab14] | 117 | # Some not very clean macros for evaluating the models and checking the |
---|
| 118 | # results. They freely use variables from the current scope, even some |
---|
| 119 | # which have not been defined yet, complete with abuse of mutable lists |
---|
| 120 | # to allow them to update values in the current scope since nonlocal |
---|
| 121 | # declarations are not available in python 2.7. |
---|
[ec7e360] | 122 | def try_model(fn, pars): |
---|
[d15a908] | 123 | """ |
---|
| 124 | Return the model evaluated at *pars*. If there is an exception, |
---|
| 125 | print it and return NaN of the right shape. |
---|
| 126 | """ |
---|
[7cf2cfd] | 127 | try: |
---|
[ec7e360] | 128 | result = fn(**pars) |
---|
[cd3dba0] | 129 | except KeyboardInterrupt: |
---|
| 130 | raise |
---|
[7cf2cfd] | 131 | except: |
---|
[9404dd3] | 132 | traceback.print_exc() |
---|
| 133 | print("when comparing %s for %d"%(name, seed)) |
---|
[cd3dba0] | 134 | if hasattr(data, 'qx_data'): |
---|
| 135 | result = np.NaN*data.data |
---|
| 136 | else: |
---|
| 137 | result = np.NaN*data.x |
---|
[7cf2cfd] | 138 | return result |
---|
[ec7e360] | 139 | def check_model(pars): |
---|
[d15a908] | 140 | """ |
---|
| 141 | Run the two calculators against *pars*, returning statistics |
---|
| 142 | on the differences. See :func:`calc_stats` for the list of stats. |
---|
| 143 | """ |
---|
[ec7e360] | 144 | base_value = try_model(calc_base, pars) |
---|
| 145 | comp_value = try_model(calc_comp, pars) |
---|
| 146 | stats = calc_stats(base_value, comp_value, index) |
---|
[319ab14] | 147 | max_diff[0] = max(max_diff[0], stats[0]) |
---|
[ec7e360] | 148 | good[0] = good[0] and (stats[0] < expected) |
---|
| 149 | return list(stats) |
---|
| 150 | |
---|
| 151 | |
---|
| 152 | calc_base = make_engine(model_definition, data, base, cutoff) |
---|
| 153 | calc_comp = make_engine(model_definition, data, comp, cutoff) |
---|
| 154 | expected = max(PRECISION[base], PRECISION[comp]) |
---|
[7cf2cfd] | 155 | |
---|
| 156 | num_good = 0 |
---|
[216a9e1] | 157 | first = True |
---|
[319ab14] | 158 | max_diff = [0] |
---|
[cd3dba0] | 159 | for k in range(N): |
---|
[a7f909a] | 160 | print("%s %d"%(name, k), file=sys.stderr) |
---|
[ec7e360] | 161 | seed = np.random.randint(1e6) |
---|
| 162 | pars_i = randomize_pars(pars, seed) |
---|
[cd3dba0] | 163 | constrain_pars(model_definition, pars_i) |
---|
[9a66e65] | 164 | constrain_new_to_old(model_definition, pars_i) |
---|
[f4f3919] | 165 | if mono: |
---|
| 166 | pars_i = suppress_pd(pars_i) |
---|
[7cf2cfd] | 167 | |
---|
[319ab14] | 168 | good = [True] |
---|
[ec7e360] | 169 | columns = check_model(pars_i) |
---|
[d15a908] | 170 | columns += [v for _, v in sorted(pars_i.items())] |
---|
[7cf2cfd] | 171 | if first: |
---|
[ec7e360] | 172 | labels = [" vs. ".join((calc_base.engine, calc_comp.engine))] |
---|
[cd3dba0] | 173 | print_column_headers(pars_i, labels) |
---|
[7cf2cfd] | 174 | first = False |
---|
[319ab14] | 175 | if good[0]: |
---|
[7cf2cfd] | 176 | num_good += 1 |
---|
[216a9e1] | 177 | else: |
---|
[ec7e360] | 178 | print(("%d,"%seed)+','.join("%s"%v for v in columns)) |
---|
[9404dd3] | 179 | print('"good","%d/%d","max diff",%g'%(num_good, N, max_diff[0])) |
---|
[7cf2cfd] | 180 | |
---|
| 181 | |
---|
| 182 | def print_usage(): |
---|
[d15a908] | 183 | """ |
---|
| 184 | Print the command usage string. |
---|
| 185 | """ |
---|
[9404dd3] | 186 | print("usage: compare_many.py MODEL COUNT (1dNQ|2dNQ) (CUTOFF|mono) (single|double|quad)") |
---|
[7cf2cfd] | 187 | |
---|
| 188 | |
---|
| 189 | def print_models(): |
---|
[d15a908] | 190 | """ |
---|
| 191 | Print the list of available models in columns. |
---|
| 192 | """ |
---|
[7cf2cfd] | 193 | print(columnize(MODELS, indent=" ")) |
---|
[216a9e1] | 194 | |
---|
| 195 | |
---|
[7cf2cfd] | 196 | def print_help(): |
---|
[d15a908] | 197 | """ |
---|
| 198 | Print usage string, the option description and the list of available models. |
---|
| 199 | """ |
---|
[7cf2cfd] | 200 | print_usage() |
---|
| 201 | print("""\ |
---|
| 202 | |
---|
| 203 | MODEL is the model name of the model or "all" for all the models |
---|
| 204 | in alphabetical order. |
---|
[216a9e1] | 205 | |
---|
| 206 | COUNT is the number of randomly generated parameter sets to try. A value |
---|
| 207 | of "10000" is a reasonable check for monodisperse models, or "100" for |
---|
| 208 | polydisperse models. For a quick check, use "100" and "5" respectively. |
---|
| 209 | |
---|
| 210 | NQ is the number of Q values to calculate. If it starts with "1d", then |
---|
| 211 | it is a 1-dimensional problem, with log spaced Q points from 1e-3 to 1.0. |
---|
| 212 | If it starts with "2d" then it is a 2-dimensional problem, with linearly |
---|
| 213 | spaced points Q points from -1.0 to 1.0 in each dimension. The usual |
---|
| 214 | values are "1d100" for 1-D and "2d32" for 2-D. |
---|
| 215 | |
---|
| 216 | CUTOFF is the cutoff value to use for the polydisperse distribution. Weights |
---|
| 217 | below the cutoff will be ignored. Use "mono" for monodisperse models. The |
---|
| 218 | choice of polydisperse parameters, and the number of points in the distribution |
---|
| 219 | is set in compare.py defaults for each model. |
---|
[7cf2cfd] | 220 | |
---|
[ec7e360] | 221 | PRECISION is the floating point precision to use for comparisons. If two |
---|
| 222 | precisions are given, then compare one to the other, ignoring sasview. |
---|
[319ab14] | 223 | |
---|
[7cf2cfd] | 224 | Available models: |
---|
| 225 | """) |
---|
| 226 | print_models() |
---|
| 227 | |
---|
| 228 | def main(): |
---|
[d15a908] | 229 | """ |
---|
| 230 | Main program. |
---|
| 231 | """ |
---|
| 232 | if len(sys.argv) not in (6, 7): |
---|
[7cf2cfd] | 233 | print_help() |
---|
| 234 | sys.exit(1) |
---|
| 235 | |
---|
| 236 | model = sys.argv[1] |
---|
| 237 | if not (model in MODELS) and (model != "all"): |
---|
[6458608] | 238 | print('Bad model %s. Use "all" or one of:'%model) |
---|
[7cf2cfd] | 239 | print_models() |
---|
| 240 | sys.exit(1) |
---|
| 241 | try: |
---|
| 242 | count = int(sys.argv[2]) |
---|
| 243 | is2D = sys.argv[3].startswith('2d') |
---|
| 244 | assert sys.argv[3][1] == 'd' |
---|
| 245 | Nq = int(sys.argv[3][2:]) |
---|
| 246 | mono = sys.argv[4] == 'mono' |
---|
| 247 | cutoff = float(sys.argv[4]) if not mono else 0 |
---|
[ec7e360] | 248 | base = sys.argv[5] |
---|
| 249 | comp = sys.argv[6] if len(sys.argv) > 6 else "sasview" |
---|
[7cf2cfd] | 250 | except: |
---|
[319ab14] | 251 | traceback.print_exc() |
---|
[7cf2cfd] | 252 | print_usage() |
---|
[216a9e1] | 253 | sys.exit(1) |
---|
| 254 | |
---|
[ec7e360] | 255 | data, index = make_data({'qmax':1.0, 'is2d':is2D, 'nq':Nq, 'res':0., |
---|
[d15a908] | 256 | 'accuracy': 'Low', 'view':'log'}) |
---|
[ab55943] | 257 | model_list = [model] if model != "all" else MODELS |
---|
[216a9e1] | 258 | for model in model_list: |
---|
[319ab14] | 259 | compare_instance(model, data, index, N=count, mono=mono, |
---|
[ec7e360] | 260 | cutoff=cutoff, base=base, comp=comp) |
---|
[216a9e1] | 261 | |
---|
| 262 | if __name__ == "__main__": |
---|
[4f2478e] | 263 | #from .compare import push_seed |
---|
| 264 | #with push_seed(1): main() |
---|
[216a9e1] | 265 | main() |
---|