[216a9e1] | 1 | #!/usr/bin/env python |
---|
[d15a908] | 2 | """ |
---|
| 3 | Program to compare results from many random parameter sets for a given model. |
---|
| 4 | |
---|
| 5 | The result is a comma separated value (CSV) table that can be redirected |
---|
| 6 | from standard output into a file and loaded into a spreadsheet. |
---|
| 7 | |
---|
| 8 | The models are compared for each parameter set and if the difference is |
---|
| 9 | greater than expected for that precision, the parameter set is labeled |
---|
| 10 | as bad and written to the output, along with the random seed used to |
---|
| 11 | generate that parameter value. This seed can be used with :mod:`compare` |
---|
| 12 | to reload and display the details of the model. |
---|
| 13 | """ |
---|
[a7f909a] | 14 | from __future__ import print_function |
---|
| 15 | |
---|
[216a9e1] | 16 | import sys |
---|
[7cf2cfd] | 17 | import traceback |
---|
[216a9e1] | 18 | |
---|
| 19 | import numpy as np |
---|
| 20 | |
---|
[e922c5d] | 21 | from . import core |
---|
[a7f909a] | 22 | from .compare import (MODELS, randomize_pars, suppress_pd, make_data, |
---|
[ce346b6] | 23 | make_engine, get_pars, columnize, |
---|
[a7f909a] | 24 | constrain_pars, constrain_new_to_old) |
---|
[216a9e1] | 25 | |
---|
[319ab14] | 26 | def calc_stats(target, value, index): |
---|
[d15a908] | 27 | """ |
---|
| 28 | Calculate statistics between the target value and the computed value. |
---|
| 29 | |
---|
| 30 | *target* and *value* are the vectors being compared, with the |
---|
| 31 | difference normalized by *target* to get relative error. Only |
---|
| 32 | the elements listed in *index* are used, though index may be |
---|
| 33 | and empty slice defined by *slice(None, None)*. |
---|
| 34 | |
---|
| 35 | Returns: |
---|
| 36 | |
---|
| 37 | *maxrel* the maximum relative difference |
---|
| 38 | |
---|
| 39 | *rel95* the relative difference with the 5% biggest differences ignored |
---|
| 40 | |
---|
| 41 | *maxabs* the maximum absolute difference for the 5% biggest differences |
---|
| 42 | |
---|
| 43 | *maxval* the maximum value for the 5% biggest differences |
---|
| 44 | """ |
---|
[216a9e1] | 45 | resid = abs(value-target)[index] |
---|
| 46 | relerr = resid/target[index] |
---|
[d15a908] | 47 | sorted_rel_index = np.argsort(relerr) |
---|
[7cf2cfd] | 48 | #p90 = int(len(relerr)*0.90) |
---|
[216a9e1] | 49 | p95 = int(len(relerr)*0.95) |
---|
| 50 | maxrel = np.max(relerr) |
---|
[d15a908] | 51 | rel95 = relerr[sorted_rel_index[p95]] |
---|
| 52 | maxabs = np.max(resid[sorted_rel_index[p95:]]) |
---|
| 53 | maxval = np.max(value[sorted_rel_index[p95:]]) |
---|
| 54 | return maxrel, rel95, maxabs, maxval |
---|
[216a9e1] | 55 | |
---|
| 56 | def print_column_headers(pars, parts): |
---|
[d15a908] | 57 | """ |
---|
| 58 | Generate column headers for the differences and for the parameters, |
---|
| 59 | and print them to standard output. |
---|
| 60 | """ |
---|
[216a9e1] | 61 | stats = list('Max rel err|95% rel err|Max abs err above 90% rel|Max value above 90% rel'.split('|')) |
---|
| 62 | groups = [''] |
---|
| 63 | for p in parts: |
---|
| 64 | groups.append(p) |
---|
| 65 | groups.extend(['']*(len(stats)-1)) |
---|
[7cf2cfd] | 66 | groups.append("Parameters") |
---|
[216a9e1] | 67 | columns = ['Seed'] + stats*len(parts) + list(sorted(pars.keys())) |
---|
| 68 | print(','.join('"%s"'%c for c in groups)) |
---|
| 69 | print(','.join('"%s"'%c for c in columns)) |
---|
| 70 | |
---|
[d15a908] | 71 | # Target 'good' value for various precision levels. |
---|
[ec7e360] | 72 | PRECISION = { |
---|
| 73 | 'fast': 1e-3, |
---|
| 74 | 'half': 1e-3, |
---|
| 75 | 'single': 5e-5, |
---|
| 76 | 'double': 5e-14, |
---|
| 77 | 'single!': 5e-5, |
---|
| 78 | 'double!': 5e-14, |
---|
| 79 | 'quad!': 5e-18, |
---|
| 80 | 'sasview': 5e-14, |
---|
| 81 | } |
---|
[319ab14] | 82 | def compare_instance(name, data, index, N=1, mono=True, cutoff=1e-5, |
---|
[ec7e360] | 83 | base='sasview', comp='double'): |
---|
[d15a908] | 84 | r""" |
---|
| 85 | Compare the model under different calculation engines. |
---|
| 86 | |
---|
| 87 | *name* is the name of the model. |
---|
| 88 | |
---|
| 89 | *data* is the data object giving $q, \Delta q$ calculation points. |
---|
| 90 | |
---|
| 91 | *index* is the active set of points. |
---|
| 92 | |
---|
| 93 | *N* is the number of comparisons to make. |
---|
| 94 | |
---|
| 95 | *cutoff* is the polydispersity weight cutoff to make the calculation |
---|
| 96 | a little bit faster. |
---|
| 97 | |
---|
| 98 | *base* and *comp* are the names of the calculation engines to compare. |
---|
| 99 | """ |
---|
| 100 | |
---|
| 101 | is_2d = hasattr(data, 'qx_data') |
---|
[17bbadd] | 102 | model_info = core.load_model_info(name) |
---|
[ce346b6] | 103 | pars = get_pars(model_info, use_demo=True) |
---|
[a7f909a] | 104 | header = ('\n"Model","%s","Count","%d","Dimension","%s"' |
---|
[d15a908] | 105 | % (name, N, "2D" if is_2d else "1D")) |
---|
[216a9e1] | 106 | if not mono: header += ',"Cutoff",%g'%(cutoff,) |
---|
| 107 | print(header) |
---|
[7cf2cfd] | 108 | |
---|
[d15a908] | 109 | if is_2d: |
---|
[8bd7b77] | 110 | if not model_info['parameters'].has_2d: |
---|
[a7f909a] | 111 | print(',"1-D only"') |
---|
| 112 | return |
---|
| 113 | |
---|
[319ab14] | 114 | # Some not very clean macros for evaluating the models and checking the |
---|
| 115 | # results. They freely use variables from the current scope, even some |
---|
| 116 | # which have not been defined yet, complete with abuse of mutable lists |
---|
| 117 | # to allow them to update values in the current scope since nonlocal |
---|
| 118 | # declarations are not available in python 2.7. |
---|
[ec7e360] | 119 | def try_model(fn, pars): |
---|
[d15a908] | 120 | """ |
---|
| 121 | Return the model evaluated at *pars*. If there is an exception, |
---|
| 122 | print it and return NaN of the right shape. |
---|
| 123 | """ |
---|
[7cf2cfd] | 124 | try: |
---|
[ec7e360] | 125 | result = fn(**pars) |
---|
[ee8f734] | 126 | except Exception: |
---|
[9404dd3] | 127 | traceback.print_exc() |
---|
| 128 | print("when comparing %s for %d"%(name, seed)) |
---|
[cd3dba0] | 129 | if hasattr(data, 'qx_data'): |
---|
| 130 | result = np.NaN*data.data |
---|
| 131 | else: |
---|
| 132 | result = np.NaN*data.x |
---|
[7cf2cfd] | 133 | return result |
---|
[ec7e360] | 134 | def check_model(pars): |
---|
[d15a908] | 135 | """ |
---|
| 136 | Run the two calculators against *pars*, returning statistics |
---|
| 137 | on the differences. See :func:`calc_stats` for the list of stats. |
---|
| 138 | """ |
---|
[ec7e360] | 139 | base_value = try_model(calc_base, pars) |
---|
| 140 | comp_value = try_model(calc_comp, pars) |
---|
| 141 | stats = calc_stats(base_value, comp_value, index) |
---|
[319ab14] | 142 | max_diff[0] = max(max_diff[0], stats[0]) |
---|
[ec7e360] | 143 | good[0] = good[0] and (stats[0] < expected) |
---|
| 144 | return list(stats) |
---|
| 145 | |
---|
| 146 | |
---|
[17bbadd] | 147 | calc_base = make_engine(model_info, data, base, cutoff) |
---|
| 148 | calc_comp = make_engine(model_info, data, comp, cutoff) |
---|
[ec7e360] | 149 | expected = max(PRECISION[base], PRECISION[comp]) |
---|
[7cf2cfd] | 150 | |
---|
| 151 | num_good = 0 |
---|
[216a9e1] | 152 | first = True |
---|
[319ab14] | 153 | max_diff = [0] |
---|
[cd3dba0] | 154 | for k in range(N): |
---|
[a7f909a] | 155 | print("%s %d"%(name, k), file=sys.stderr) |
---|
[ec7e360] | 156 | seed = np.random.randint(1e6) |
---|
| 157 | pars_i = randomize_pars(pars, seed) |
---|
[ed048b2] | 158 | constrain_pars(model_info, pars_i) |
---|
| 159 | constrain_new_to_old(model_info, pars_i) |
---|
[f4f3919] | 160 | if mono: |
---|
| 161 | pars_i = suppress_pd(pars_i) |
---|
[7cf2cfd] | 162 | |
---|
[319ab14] | 163 | good = [True] |
---|
[ec7e360] | 164 | columns = check_model(pars_i) |
---|
[d15a908] | 165 | columns += [v for _, v in sorted(pars_i.items())] |
---|
[7cf2cfd] | 166 | if first: |
---|
[ec7e360] | 167 | labels = [" vs. ".join((calc_base.engine, calc_comp.engine))] |
---|
[cd3dba0] | 168 | print_column_headers(pars_i, labels) |
---|
[7cf2cfd] | 169 | first = False |
---|
[319ab14] | 170 | if good[0]: |
---|
[7cf2cfd] | 171 | num_good += 1 |
---|
[216a9e1] | 172 | else: |
---|
[ec7e360] | 173 | print(("%d,"%seed)+','.join("%s"%v for v in columns)) |
---|
[9404dd3] | 174 | print('"good","%d/%d","max diff",%g'%(num_good, N, max_diff[0])) |
---|
[7cf2cfd] | 175 | |
---|
| 176 | |
---|
| 177 | def print_usage(): |
---|
[d15a908] | 178 | """ |
---|
| 179 | Print the command usage string. |
---|
| 180 | """ |
---|
[9404dd3] | 181 | print("usage: compare_many.py MODEL COUNT (1dNQ|2dNQ) (CUTOFF|mono) (single|double|quad)") |
---|
[7cf2cfd] | 182 | |
---|
| 183 | |
---|
| 184 | def print_models(): |
---|
[d15a908] | 185 | """ |
---|
| 186 | Print the list of available models in columns. |
---|
| 187 | """ |
---|
[7cf2cfd] | 188 | print(columnize(MODELS, indent=" ")) |
---|
[216a9e1] | 189 | |
---|
| 190 | |
---|
[7cf2cfd] | 191 | def print_help(): |
---|
[d15a908] | 192 | """ |
---|
| 193 | Print usage string, the option description and the list of available models. |
---|
| 194 | """ |
---|
[7cf2cfd] | 195 | print_usage() |
---|
| 196 | print("""\ |
---|
| 197 | |
---|
| 198 | MODEL is the model name of the model or "all" for all the models |
---|
| 199 | in alphabetical order. |
---|
[216a9e1] | 200 | |
---|
| 201 | COUNT is the number of randomly generated parameter sets to try. A value |
---|
| 202 | of "10000" is a reasonable check for monodisperse models, or "100" for |
---|
| 203 | polydisperse models. For a quick check, use "100" and "5" respectively. |
---|
| 204 | |
---|
| 205 | NQ is the number of Q values to calculate. If it starts with "1d", then |
---|
| 206 | it is a 1-dimensional problem, with log spaced Q points from 1e-3 to 1.0. |
---|
| 207 | If it starts with "2d" then it is a 2-dimensional problem, with linearly |
---|
| 208 | spaced points Q points from -1.0 to 1.0 in each dimension. The usual |
---|
| 209 | values are "1d100" for 1-D and "2d32" for 2-D. |
---|
| 210 | |
---|
| 211 | CUTOFF is the cutoff value to use for the polydisperse distribution. Weights |
---|
| 212 | below the cutoff will be ignored. Use "mono" for monodisperse models. The |
---|
| 213 | choice of polydisperse parameters, and the number of points in the distribution |
---|
| 214 | is set in compare.py defaults for each model. |
---|
[7cf2cfd] | 215 | |
---|
[ec7e360] | 216 | PRECISION is the floating point precision to use for comparisons. If two |
---|
| 217 | precisions are given, then compare one to the other, ignoring sasview. |
---|
[319ab14] | 218 | |
---|
[7cf2cfd] | 219 | Available models: |
---|
| 220 | """) |
---|
| 221 | print_models() |
---|
| 222 | |
---|
| 223 | def main(): |
---|
[d15a908] | 224 | """ |
---|
| 225 | Main program. |
---|
| 226 | """ |
---|
| 227 | if len(sys.argv) not in (6, 7): |
---|
[7cf2cfd] | 228 | print_help() |
---|
| 229 | sys.exit(1) |
---|
| 230 | |
---|
| 231 | model = sys.argv[1] |
---|
| 232 | if not (model in MODELS) and (model != "all"): |
---|
[6458608] | 233 | print('Bad model %s. Use "all" or one of:'%model) |
---|
[7cf2cfd] | 234 | print_models() |
---|
| 235 | sys.exit(1) |
---|
| 236 | try: |
---|
| 237 | count = int(sys.argv[2]) |
---|
| 238 | is2D = sys.argv[3].startswith('2d') |
---|
| 239 | assert sys.argv[3][1] == 'd' |
---|
| 240 | Nq = int(sys.argv[3][2:]) |
---|
| 241 | mono = sys.argv[4] == 'mono' |
---|
| 242 | cutoff = float(sys.argv[4]) if not mono else 0 |
---|
[ec7e360] | 243 | base = sys.argv[5] |
---|
| 244 | comp = sys.argv[6] if len(sys.argv) > 6 else "sasview" |
---|
[ee8f734] | 245 | except Exception: |
---|
[319ab14] | 246 | traceback.print_exc() |
---|
[7cf2cfd] | 247 | print_usage() |
---|
[216a9e1] | 248 | sys.exit(1) |
---|
| 249 | |
---|
[ec7e360] | 250 | data, index = make_data({'qmax':1.0, 'is2d':is2D, 'nq':Nq, 'res':0., |
---|
[d15a908] | 251 | 'accuracy': 'Low', 'view':'log'}) |
---|
[ab55943] | 252 | model_list = [model] if model != "all" else MODELS |
---|
[216a9e1] | 253 | for model in model_list: |
---|
[319ab14] | 254 | compare_instance(model, data, index, N=count, mono=mono, |
---|
[ec7e360] | 255 | cutoff=cutoff, base=base, comp=comp) |
---|
[216a9e1] | 256 | |
---|
| 257 | if __name__ == "__main__": |
---|
[4f2478e] | 258 | #from .compare import push_seed |
---|
| 259 | #with push_seed(1): main() |
---|
[216a9e1] | 260 | main() |
---|