Source code for sas.fit.ParkFitting

"""
ParkFitting module contains SasParameter,Model,Data
FitArrange, ParkFit,Parameter classes.All listed classes work together
to perform a simple fit with park optimizer.
"""
#import time
import numpy
import math
from  numpy.linalg.linalg import LinAlgError
#import park
from park import fit
from park import fitresult
from  park.fitresult import FitParameter
import park.simplex
from park.assembly import Assembly
from park.assembly import Part
from park.fitmc import FitSimplex 
import park.fitmc
from park.fit import Fitter
from park.formatnum import format_uncertainty
from sas.fit.AbstractFitEngine import FitEngine
from sas.fit.AbstractFitEngine import FResult

class SasParameter(park.Parameter):
    """
    SAS model parameters for use in the PARK fitting service.
[docs] The parameter attribute value is redirected to the underlying parameter value in the SAS model. """ def __init__(self, name, model, data): """ :param name: the name of the model parameter :param model: the sas model to wrap as a park model """ park.Parameter.__init__(self, name) #self._model, self._name = model, name self.data = data self.model = model #set the value for the parameter of the given name self.set(model.getParam(name)) # TODO: model is missing parameter ranges for dispersion parameters if name not in model.details: #print "setting details for",name model.details[name] = ["", None, None] def _getvalue(self): """ override the _getvalue of park parameter :return value the parameter associates with self.name """ return self.model.getParam(self.name) def _setvalue(self, value): """ override the _setvalue pf park parameter :param value: the value to set on a given parameter """ self.model.setParam(self.name, value) value = property(_getvalue, _setvalue) def _getrange(self): """ Override _getrange of park parameter return the range of parameter """ #if not self.name in self._model.getDispParamList(): lo, hi = self.model.details[self.name][1:3] if lo is None: lo = -numpy.inf if hi is None: hi = numpy.inf if lo > hi: raise ValueError, "wrong fit range for parameters" return lo, hi def get_name(self): """ """
[docs] return self._getname() def _setrange(self, r): """ override _setrange of park parameter
:param r: the value of the range to set """ self.model.details[self.name][1:3] = r range = property(_getrange, _setrange) class ParkModel(park.Model): """ PARK wrapper for SAS models.
[docs] """ def __init__(self, sas_model, sas_data=None, **kw): """ :param sas_model: the sas model to wrap using park interface """ park.Model.__init__(self, **kw) self.model = sas_model self.name = sas_model.name self.data = sas_data #list of parameters names self.sasp = sas_model.getParamList() #list of park parameter self.parkp = [SasParameter(p, sas_model, sas_data) for p in self.sasp] #list of parameter set self.parameterset = park.ParameterSet(sas_model.name, pars=self.parkp) self.pars = [] def get_params(self, fitparams): """ return a list of value of paramter to fit
[docs] :param fitparams: list of paramaters name to fit """ list_params = [] self.pars = fitparams for item in fitparams: for element in self.parkp: if element.name == str(item): list_params.append(element.value) return list_params def set_params(self, paramlist, params): """ Set value for parameters to fit
[docs] :param params: list of value for parameters to fit """ try: for i in range(len(self.parkp)): for j in range(len(paramlist)): if self.parkp[i].name == paramlist[j]: self.parkp[i].value = params[j] self.model.setParam(self.parkp[i].name, params[j]) except: raise def eval(self, x): """ Override eval method of park model.
[docs] :param x: the x value used to compute a function """ try: return self.model.evalDistribution(x) except: raise def eval_derivs(self, x, pars=[]): """ Evaluate the model and derivatives wrt pars at x.
[docs] pars is a list of the names of the parameters for which derivatives are desired. This method needs to be specialized in the model to evaluate the model function. Alternatively, the model can implement is own version of residuals which calculates the residuals directly instead of calling eval. """ return [] class SasFitResult(fitresult.FitResult): def __init__(self, *args, **kwrds): fitresult.FitResult.__init__(self, *args, **kwrds)
[docs] self.theory = None self.inputs = [] class SasFitSimplex(FitSimplex): """ Local minimizer using Nelder-Mead simplex algorithm.
[docs] Simplex is robust and derivative free, though not very efficient. This class wraps the bounds contrained Nelder-Mead simplex implementation for `park.simplex.simplex`. """ radius = 0.05 """Size of the initial simplex; this is a portion between 0 and 1""" xtol = 1 #xtol = 1e-4 """Stop when simplex vertices are within xtol of each other""" ftol = 5e-5 """Stop when vertex values are within ftol of each other""" maxiter = None """Maximum number of iterations before fit terminates""" def __init__(self, ftol=5e-5): self.ftol = ftol def fit(self, fitness, x0): """Run the fit""" self.cancel = False
[docs] pars = fitness.fit_parameters() bounds = numpy.array([p.range for p in pars]).T result = park.simplex.simplex(fitness, x0, bounds=bounds, radius=self.radius, xtol=self.xtol, ftol=self.ftol, maxiter=self.maxiter, abort_test=self._iscancelled) #print "calls:",result.calls #print "simplex returned",result.x,result.fx # Need to make our own copy of the fit results so that the # values don't get stomped on by the next fit iteration. fitpars = [SasFitParameter(pars[i].name,pars[i].range,v, pars[i].model, pars[i].data) for i,v in enumerate(result.x)] res = SasFitResult(fitpars, result.calls, result.fx) res.inputs = [(pars[i].model, pars[i].data) for i,v in enumerate(result.x)] # Compute the parameter uncertainties from the jacobian res.calc_cov(fitness) return res class SasFitter(Fitter): """ """
[docs] def fit(self, fitness, handler): """ Global optimizer.
[docs] This function should return immediately """ # Determine initial value and bounds pars = fitness.fit_parameters() bounds = numpy.array([p.range for p in pars]).T x0 = [p.value for p in pars] # Initialize the monitor and results. # Need to make our own copy of the fit results so that the # values don't get stomped on by the next fit iteration. handler.done = False self.handler = handler fitpars = [SasFitParameter(pars[i].name, pars[i].range, v, pars[i].model, pars[i].data) for i,v in enumerate(x0)] handler.result = fitresult.FitResult(fitpars, 0, numpy.NaN) # Run the fit (fit should perform _progress and _improvement updates) # This function may return before the fit is complete. self._fit(fitness, x0, bounds) class SasFitMC(SasFitter): """ Monte Carlo optimizer.
[docs] This implements `park.fit.Fitter`. """ localfit = SasFitSimplex() start_points = 10 def __init__(self, localfit, start_points=10): self.localfit = localfit self.start_points = start_points def _fit(self, objective, x0, bounds): """ Run a monte carlo fit. This procedure maps a local optimizer across a set of initial points. """ try: park.fitmc.fitmc(objective, x0, bounds, self.localfit, self.start_points, self.handler) except: raise ValueError, "Fit did not converge.\n" class SasPart(Part): """ Part of a fitting assembly. Part holds the model itself and
[docs] associated data. The part can be initialized with a fitness object or with a pair (model,data) for the default fitness function. fitness (Fitness) object implementing the `park.assembly.Fitness` interface. In particular, fitness should provide a parameterset attribute containing a ParameterSet and a residuals method returning a vector of residuals. weight (dimensionless) weight for the model. See comments in assembly.py for details. isfitted (boolean) True if the model residuals should be included in the fit. The model parameters may still be used in parameter expressions, but there will be no comparison to the data. residuals (vector) Residuals for the model if they have been calculated, or None degrees_of_freedom Number of residuals minus number of fitted parameters. Degrees of freedom for individual models does not make sense in the presence of expressions combining models, particularly in the case where a model has many parameters but no data or many computed parameters. The degrees of freedom for the model is set to be at least one. chisq sum(residuals**2); use chisq/degrees_of_freedom to get the reduced chisq value. Get/set the weight on the given model. assembly.weight(3) returns the weight on model 3 (0-origin) assembly.weight(3,0.5) sets the weight on model 3 (0-origin) """ def __init__(self, fitness, weight=1., isfitted=True): Part.__init__(self, fitness=fitness, weight=weight, isfitted=isfitted) self.model, self.data = fitness[0], fitness[1] class SasFitParameter(FitParameter): """ Fit result for an individual parameter.
[docs] """ def __init__(self, name, range, value, model, data): FitParameter.__init__(self, name, range, value) self.model = model self.data = data def summarize(self): """ Return parameter range string.
[docs] E.g., " Gold .....|.... 5.2043 in [2,7]" """ bar = ['.']*10 lo,hi = self.range if numpy.isfinite(lo)and numpy.isfinite(hi): portion = (self.value-lo)/(hi-lo) if portion < 0: portion = 0. elif portion >= 1: portion = 0.99999999 barpos = int(math.floor(portion*len(bar))) bar[barpos] = '|' bar = "".join(bar) lostr = "[%g"%lo if numpy.isfinite(lo) else "(-inf" histr = "%g]"%hi if numpy.isfinite(hi) else "inf)" valstr = format_uncertainty(self.value, self.stderr) model_name = str(None) if self.model is not None: model_name = self.model.name data_name = str(None) if self.data is not None: data_name = self.data.name return "%25s %s %s in %s,%s, %s, %s" % (self.name,bar,valstr,lostr,histr, model_name, data_name) def __repr__(self): #return "FitParameter('%s')"%self.name return str(self.__class__)
class MyAssembly(Assembly): def __init__(self, models, curr_thread=None): """Build an assembly from a list of models."""
[docs] self.parts = [] for m in models: self.parts.append(SasPart(m)) self.curr_thread = curr_thread self.chisq = None self._cancel = False self.theory = None self._reset() def fit_parameters(self): """ Return an alphabetical list of the fitting parameters.
[docs] This function is called once at the beginning of a fit, and serves as a convenient place to precalculate what can be precalculated such as the set of fitting parameters and the parameter expressions evaluator. """ self.parameterset.setprefix() self._fitparameters = self.parameterset.fitted self._restraints = self.parameterset.restrained pars = self.parameterset.flatten() context = self.parameterset.gather_context() self._fitexpression = park.expression.build_eval(pars,context) #print "constraints",self._fitexpression.__doc__ self._fitparameters.sort(lambda a,b: cmp(a.path,b.path)) # Convert to fitparameter a object fitpars = [SasFitParameter(p.path,p.range,p.value, p.model, p.data) for p in self._fitparameters] #print "fitpars", fitpars return fitpars def extend_results_with_calculated_parameters(self, result): """ Extend result from the fit with the calculated parameters.
[docs] """ calcpars = [SasFitParameter(p.path,p.range,p.value, p.model, p.data) for p in self.parameterset.computed] result.parameters += calcpars result.theory = self.theory def eval(self): """ Recalculate the theory functions, and from them, the
[docs] residuals and chisq. :note: Call this after the parameters have been updated. """ # Handle abort from a separate thread. self._cancel = False if self.curr_thread != None: try: self.curr_thread.isquit() except: self._cancel = True # Evaluate the computed parameters try: self._fitexpression() except NameError: pass # Check that the resulting parameters are in a feasible region. if not self.isfeasible(): return numpy.inf resid = [] k = len(self._fitparameters) for m in self.parts: # In order to support abort, need to be able to propagate an # external abort signal from self.abort() into an abort signal # for the particular model. Can't see a way to do this which # doesn't involve setting a state variable. self._current_model = m if self._cancel: return numpy.inf if m.isfitted and m.weight != 0: m.residuals, self.theory = m.fitness.residuals() N = len(m.residuals) m.degrees_of_freedom = N-k if N>k else 1 # dividing residuals by N in order to be consistent with Scipy m.chisq = numpy.sum(m.residuals**2/N) resid.append(m.weight*m.residuals) self.residuals = numpy.hstack(resid) N = len(self.residuals) self.degrees_of_freedom = N-k if N>k else 1 self.chisq = numpy.sum(self.residuals**2) return self.chisq/self.degrees_of_freedom class ParkFit(FitEngine): """ ParkFit performs the Fit.This class can be used as follow:
[docs] #Do the fit Park create an engine: engine = ParkFit() Use data must be of type plottable Use a sas model Add data with a dictionnary of FitArrangeList where Uid is a key and data is saved in FitArrange object. engine.set_data(data,Uid) Set model parameter "M1"= model.name add {model.parameter.name:value}. ..note:: Set_param() if used must always preceded set_model() for the fit to be performed. ``engine.set_param( model,"M1", {'A':2,'B':4})`` Add model with a dictionnary of FitArrangeList{} where Uid is a key and model is save in FitArrange object. engine.set_model(model,Uid) engine.fit return chisqr,[model.parameter 1,2,..],[[err1....][..err2...]] chisqr1, out1, cov1=engine.fit({model.parameter.name:value},qmin,qmax) ..note:: {model.parameter.name:value} is ignored in fit function since the user should make sure to call set_param himself. """ def __init__(self): """ Creates a dictionary (self.fitArrangeList={})of FitArrange elements with Uid as keys """ FitEngine.__init__(self) self.fit_arrange_dict = {} self.param_list = [] def create_assembly(self, curr_thread, reset_flag=False): """ Extract sasmodel and sasdata from
[docs] self.FitArrangelist ={Uid:FitArrange} Create parkmodel and park data ,form a list couple of parkmodel and parkdata create an assembly self.problem= park.Assembly([(parkmodel,parkdata)]) """ mylist = [] #listmodel = [] #i = 0 fitproblems = [] for fproblem in self.fit_arrange_dict.itervalues(): if fproblem.get_to_fit() == 1: fitproblems.append(fproblem) if len(fitproblems) == 0: raise RuntimeError, "No Assembly scheduled for Park fitting." for item in fitproblems: model = item.get_model() parkmodel = ParkModel(model.model, model.data) parkmodel.pars = item.pars if reset_flag: # reset the initial value; useful for batch for name in item.pars: ind = item.pars.index(name) parkmodel.model.setParam(name, item.vals[ind]) # set the constraints into the model for p,v in item.constraints: parkmodel.parameterset[str(p)].set(str(v)) for p in parkmodel.parameterset: ## does not allow status change for constraint parameters if p.status != 'computed': if p.get_name() in item.pars: ## make parameters selected for #fit will be between boundaries p.set(p.range) else: p.status = 'fixed' data_list = item.get_data() parkdata = data_list fitness = (parkmodel, parkdata) mylist.append(fitness) self.problem = MyAssembly(models=mylist, curr_thread=curr_thread) def fit(self, msg_q=None, q=None, handler=None, curr_thread=None, ftol=1.49012e-8, reset_flag=False):
[docs] """ Performs fit with park.fit module.It can perform fit with one model and a set of data, more than two fit of one model and sets of data or fit with more than two model associated with their set of data and constraints :param pars: Dictionary of parameter names for the model and their values. :param qmin: The minimum value of data's range to be fit :param qmax: The maximum value of data's range to be fit :note: all parameter are ignored most of the time.Are just there to keep ScipyFit and ParkFit interface the same. :return: result.fitness Value of the goodness of fit metric :return: result.pvec list of parameter with the best value found during fitting :return: result.cov Covariance matrix """ self.create_assembly(curr_thread=curr_thread, reset_flag=reset_flag) localfit = SasFitSimplex() localfit.ftol = ftol localfit.xtol = 1e-6 # See `park.fitresult.FitHandler` for details. fitter = SasFitMC(localfit=localfit, start_points=1) if handler == None: handler = fitresult.ConsoleUpdate(improvement_delta=0.1) result_list = [] try: result = fit.fit(self.problem, fitter=fitter, handler=handler) self.problem.extend_results_with_calculated_parameters(result) except LinAlgError: raise ValueError, "SVD did not converge" if result is None: raise RuntimeError("park did not return a fit result") for m in self.problem.parts: residuals, theory = m.fitness.residuals() small_result = FResult(model=m.model, data=m.data.sas_data) small_result.fitter_id = self.fitter_id small_result.theory = theory small_result.residuals = residuals small_result.index = m.data.idx small_result.fitness = result.fitness # Extract the parameters that are part of this model; make sure # they match the fitted parameters for this model, and place them # in the same order as they occur in the model. pars = {} for p in result.parameters: #if p.data.name == small_result.data.name and if p.model.name == small_result.model.name: model_name, par_name = p.name.split('.', 1) pars[par_name] = (p.value, p.stderr) #assert len(pars.keys()) == len(m.model.pars) v,dv = zip(*[pars[p] for p in m.model.pars]) small_result.pvec = v small_result.stderr = dv small_result.param_list = m.model.pars # normalize chisq by degrees of freedom dof = len(small_result.residuals)-len(small_result.pvec) small_result.fitness = numpy.sum(residuals**2)/dof result_list.append(small_result) if q != None: q.put(result_list) return q return result_list