1 | # This program is public domain |
---|
2 | """ |
---|
3 | Service to handle serialization and deserialization of fit objects. |
---|
4 | |
---|
5 | Object serialization is useful for long term storage, interlanguage |
---|
6 | communication and network transmission. In all cases, the process |
---|
7 | involves an initial encode() followed by a later decode(). |
---|
8 | |
---|
9 | We need the following properties for serialization/deserialization: |
---|
10 | |
---|
11 | 1. human readable so that disaster recovery is possible |
---|
12 | 2. readable/writable by other languages and environments |
---|
13 | 3. support for numerics: complex, nan, inf, arrays, full precision |
---|
14 | 4. version support: load object into newer versions of the |
---|
15 | program even if the class structure has changed |
---|
16 | 5. refactoring support: load object into newer versions of the |
---|
17 | program even if the classes have been moved or renamed |
---|
18 | |
---|
19 | A complete solution would also support self referential data structures, |
---|
20 | but that is beyond our needs. |
---|
21 | |
---|
22 | Python's builtin serialization, pickle/cPickle, cannot meet these |
---|
23 | needs. It is python specific, and not friendly to human readers |
---|
24 | or readers from other environments such as IDL which may want to |
---|
25 | load or receive data from a python program. Pickle inf/nan doesn't |
---|
26 | work on windows --- some of our models may use inf data, and some of |
---|
27 | our results may be nan. pickle has minimal support for versioning: |
---|
28 | users can write __setstate__ which accepts a dictionary and adjusts |
---|
29 | it accordingly. Beware though that version must be an instance |
---|
30 | variable rather than a class variable, since class variables are not |
---|
31 | seen by pickle. If the class is renamed, then pickle can do nothing |
---|
32 | to recover it. |
---|
33 | |
---|
34 | Instead of pickle, we break the problem into steps: structucture and |
---|
35 | encoding. A pair of functions deconstruct() and reconstruct() work |
---|
36 | directly with the structure. Deconstruct extracts the state of the |
---|
37 | python object defined using a limited set of python primitives. |
---|
38 | Reconstruct takes an extracted state and rebuilds the complete python |
---|
39 | object. See documentation on the individual functions for details. |
---|
40 | |
---|
41 | For serial encoding we will use json. The json format is human |
---|
42 | readable and easily parsed. json iteself does not define support |
---|
43 | of Inf/NaN, though some json tools support it using the native |
---|
44 | javascript values of Infinity and Nan. Various xml encodings are |
---|
45 | also possible, though somewhat more difficult to work with. |
---|
46 | |
---|
47 | Object persistence for long term storage places particular burdens |
---|
48 | on the serialization protocol. In particular, the class may have |
---|
49 | changed since the instance was serialized. To aid the process of |
---|
50 | maintaining classes over the long term, the class definition can |
---|
51 | contain the following magic names: |
---|
52 | |
---|
53 | __version__ |
---|
54 | Strict version number of the class. See isnewer() for |
---|
55 | details, or distutils.version.StrictVersion. |
---|
56 | __factory__ |
---|
57 | Name of a factory function to return a new instance of |
---|
58 | the class. This will be stored as the class name, and |
---|
59 | should include the complete path so that it can be |
---|
60 | imported by python. |
---|
61 | __reconstruct__ |
---|
62 | Method which takes a structure tree and rebuilds the object. |
---|
63 | This is different from __setstate__ in that __setstate__ |
---|
64 | assumes its children have already been reconstructed. This |
---|
65 | is the difference between top-down and bottom-up |
---|
66 | interpretation. Bottom-up is usually easiear and sufficient, |
---|
67 | but top-down is required for radical restructuring of the |
---|
68 | object representation. |
---|
69 | |
---|
70 | |
---|
71 | Example |
---|
72 | ======= |
---|
73 | |
---|
74 | The following example shows how to use reconstruct and factory to get |
---|
75 | maximum flexibility when restoring an object:: |
---|
76 | |
---|
77 | from danse.common.serial import isnewer, reconstruct, setstate |
---|
78 | def data(): |
---|
79 | from data import Data |
---|
80 | return Data() |
---|
81 | class Data(object): |
---|
82 | __version__ = '1.2' |
---|
83 | __factory__ = 'danse.builder.data' |
---|
84 | def __reconstruct__(self,instance): |
---|
85 | ''' |
---|
86 | Reconstruct the state from |
---|
87 | ''' |
---|
88 | if isnewer('1.0',instance['version']): |
---|
89 | raise RuntimeError('pre-1.0 data objects no longer supported') |
---|
90 | if isnewer('1.1',instance['version']): |
---|
91 | # Version 1.1 added uncertainty; default it to zero |
---|
92 | instance['state']['uncertainty'] = 0 |
---|
93 | setstate(self,reconstruct(instance['state'])) |
---|
94 | """ |
---|
95 | |
---|
96 | import types |
---|
97 | import sys |
---|
98 | import demjson |
---|
99 | |
---|
100 | def encode(obj): |
---|
101 | """ |
---|
102 | Convert structure to a string. |
---|
103 | |
---|
104 | Basic python types (list, string, dictionary, numbers, boolean, None) |
---|
105 | are converted directly to the corresponding string representation. |
---|
106 | tuples and sets are converted to lists, and str is converted to unicode. |
---|
107 | |
---|
108 | Python objects are represented by:: |
---|
109 | |
---|
110 | { |
---|
111 | '.class': 'module.classname', |
---|
112 | '.version': 'versionstring', |
---|
113 | '.state': { object state } |
---|
114 | } |
---|
115 | |
---|
116 | where state comes from the object __getstate__, the object __dict__ or |
---|
117 | the object __slots__. See the pickle documentation for details. |
---|
118 | |
---|
119 | Python functions are represented by:: |
---|
120 | |
---|
121 | { |
---|
122 | '.function': 'module.functionname' |
---|
123 | } |
---|
124 | |
---|
125 | """ |
---|
126 | return demjson.encode(deconstruct(obj)) |
---|
127 | |
---|
128 | def decode(string): |
---|
129 | """ |
---|
130 | Convert string to structure, reconstructing classes as needed. See |
---|
131 | pickle documentation for details. This function will fail with a |
---|
132 | RuntimeError if the version of the class in the string is newer |
---|
133 | than the version of the class in the python path. |
---|
134 | """ |
---|
135 | return reconstruct(demjson.decode(string)) |
---|
136 | |
---|
137 | |
---|
138 | def deconstruct(obj): |
---|
139 | """ |
---|
140 | Convert an object hierarchy into python primitives. |
---|
141 | |
---|
142 | The primitives used are int, float, str, unicode, bool, None, |
---|
143 | list, tuple, and dict. |
---|
144 | |
---|
145 | Classes are encoded as a dict with keys '.class', '.version', and '.state'. |
---|
146 | Version is copied from the attribute __version__ if it exists. |
---|
147 | |
---|
148 | Functions are encoded as a dict with key '.function'. |
---|
149 | |
---|
150 | Raises RuntimeError if object cannot be deconstructed. For example, |
---|
151 | deconstruct on deconstruct will cause problems since '.class' will |
---|
152 | be in the dictionary of a deconstructed object. |
---|
153 | """ |
---|
154 | if type(obj) in [int, float, str, unicode, bool] or obj is None: |
---|
155 | return obj |
---|
156 | elif type(obj) in [list, tuple, set]: |
---|
157 | return type(obj)(deconstruct(el) for el in obj) |
---|
158 | elif type(obj) == dict: |
---|
159 | # Check for errors |
---|
160 | for name in ['.class', '.function']: |
---|
161 | if name in obj: |
---|
162 | raise RuntimeError("Cannot deconstruct dict containing "+name) |
---|
163 | return dict((k,deconstruct(v)) for k,v in obj.items()) |
---|
164 | elif type(obj) == types.FunctionType: |
---|
165 | return { |
---|
166 | '.function' : obj.__module__+'.'+obj.__name__ |
---|
167 | } |
---|
168 | else: |
---|
169 | cls = _getclass(obj) |
---|
170 | version = _getversion(obj) |
---|
171 | return { |
---|
172 | '.class' : _getclass(obj), |
---|
173 | '.version' : _getversion(obj), |
---|
174 | '.state' : deconstruct(_getstate(obj)) |
---|
175 | } |
---|
176 | |
---|
177 | def reconstruct(tree): |
---|
178 | """ |
---|
179 | Reconstruct an object hierarchy from a tree of primitives. |
---|
180 | |
---|
181 | The tree is generated by deconstruct from python primitives |
---|
182 | (list, dict, string, number, boolean, None) with classes |
---|
183 | encoded as a particular kind of dict. |
---|
184 | |
---|
185 | Unlike pickle, we do not make an exact copy of the original |
---|
186 | object. In particular, the serialization format may not |
---|
187 | distinguish between list and tuples, or str and unicode. We |
---|
188 | also have no support for self-referential structures. |
---|
189 | |
---|
190 | Raises RuntimeError if could not reconstruct |
---|
191 | """ |
---|
192 | if type(tree) in [int, float, str, unicode, bool] or tree is None: |
---|
193 | return tree |
---|
194 | elif type(tree) in [list, tuple, set]: |
---|
195 | return type(tree)(reconstruct(el) for el in tree) |
---|
196 | elif type(tree) == dict: |
---|
197 | if '.class' in tree: |
---|
198 | # Chain if program version is newer than stored version (too cold) |
---|
199 | fn = _lookup_refactor(tree['.class'],tree['.version']) |
---|
200 | if fn is not None: return fn(tree) |
---|
201 | |
---|
202 | # Fail if program version is older than stored version (too hot) |
---|
203 | obj = _createobj(tree['.class']) |
---|
204 | if isnewer(tree['.version'],_getversion(obj)): |
---|
205 | raise RuntimeError('Version of %s is out of date'%tree['.class']) |
---|
206 | # Reconstruct if program version matches stored version (just right) |
---|
207 | if hasattr(obj, '__reconstruct__'): |
---|
208 | obj.__reconstruct__(tree['.state']) |
---|
209 | else: |
---|
210 | _setstate(obj,reconstruct(tree['.state'])) |
---|
211 | return obj |
---|
212 | elif '.function' in tree: |
---|
213 | return _import_symbol(tree['.function']) |
---|
214 | else: |
---|
215 | return dict((k,reconstruct(v)) for k,v in tree.items()) |
---|
216 | else: |
---|
217 | raise RuntimeError('Could not reconstruct '+type(obj).__name__) |
---|
218 | |
---|
219 | def _getversion(obj): |
---|
220 | version = getattr(obj,'__version__','0.0') |
---|
221 | try: |
---|
222 | # Force parsing of version number to check format |
---|
223 | isnewer(version,'0.0') |
---|
224 | except ValueError,msg: |
---|
225 | raise ValueError("%s for class %s"%(msg,obj.__class__.__name__)) |
---|
226 | return version |
---|
227 | |
---|
228 | def _getclass(obj): |
---|
229 | if hasattr(obj,'__factory__'): return obj.__factory__ |
---|
230 | return obj.__class__.__module__+'.'+obj.__class__.__name__ |
---|
231 | |
---|
232 | def _getstate(obj): |
---|
233 | if hasattr(obj,'__getinitargs__') or hasattr(obj,'__getnewargs__'): |
---|
234 | # Laziness: we could fetch the initargs and store them, but until |
---|
235 | # we need to do so, I'm not going to add the complexity. |
---|
236 | raise RuntimeError('Cannot serialize a class with initialization arguments') |
---|
237 | elif hasattr(obj,'__getstate__'): |
---|
238 | state = obj.__getstate__() |
---|
239 | elif hasattr(obj,'__slots__'): |
---|
240 | state = dict((s,getattr(obj,s)) for s in obj.__slots__ if hasattr(obj,s)) |
---|
241 | elif hasattr(obj,'__dict__'): |
---|
242 | state = obj.__dict__ |
---|
243 | else: |
---|
244 | state = {} |
---|
245 | return state |
---|
246 | |
---|
247 | def _setstate(obj,kw): |
---|
248 | if hasattr(obj,'__setstate__'): |
---|
249 | obj.__setstate__(kw) |
---|
250 | elif hasattr(obj,'__slots__'): |
---|
251 | for k,v in kw.items(): setattr(obj,k,v) |
---|
252 | elif hasattr(obj,'__dict__'): |
---|
253 | obj.__dict__ = kw |
---|
254 | else: |
---|
255 | pass |
---|
256 | return obj |
---|
257 | |
---|
258 | def _lookup_refactor(cls,ver): |
---|
259 | return None |
---|
260 | |
---|
261 | class _EmptyClass: pass |
---|
262 | def _import_symbol(path): |
---|
263 | """ |
---|
264 | Recover symbol from path. |
---|
265 | """ |
---|
266 | parts = path.split('.') |
---|
267 | module_name = ".".join(parts[:-1]) |
---|
268 | symbol_name = parts[-1] |
---|
269 | __import__(module_name) |
---|
270 | module = sys.modules[module_name] |
---|
271 | symbol = getattr(module,symbol_name) |
---|
272 | return symbol |
---|
273 | |
---|
274 | def _createobj(path): |
---|
275 | """ |
---|
276 | Create an empty object which we can update with __setstate__ |
---|
277 | """ |
---|
278 | factory = _import_symbol(path) |
---|
279 | if type(factory) is types.FunctionType: |
---|
280 | # Factory method to return an empty class instance |
---|
281 | obj = factory() |
---|
282 | elif type(factory) is types.ClassType: |
---|
283 | # Old-style class: create an empty class and override its __class__ |
---|
284 | obj = _EmptyClass() |
---|
285 | obj.__class__ = factory |
---|
286 | elif type(factory) is types.TypeType: |
---|
287 | obj = factory.__new__(factory) |
---|
288 | else: |
---|
289 | raise RuntimeError('%s should be a function, class or type'%path) |
---|
290 | return obj |
---|
291 | |
---|
292 | def isnewer(version,target): |
---|
293 | """ |
---|
294 | Version comparison function. Returns true if version is at least |
---|
295 | as new as the target version. |
---|
296 | |
---|
297 | A version number consists of two or three dot-separated numeric |
---|
298 | components, with an optional "pre-release" tag on the end. The |
---|
299 | pre-release tag consists of the letter 'a' or 'b' followed by |
---|
300 | a number. If the numeric components of two version numbers |
---|
301 | are equal, then one with a pre-release tag will always |
---|
302 | be deemed earlier (lesser) than one without. |
---|
303 | |
---|
304 | The following will be true for version numbers:: |
---|
305 | |
---|
306 | 8.2 < 8.19a1 < 8.19 == 8.19.0 |
---|
307 | |
---|
308 | |
---|
309 | You should follow the rule of incrementing the minor version number |
---|
310 | if you add attributes to your models, and the major version number |
---|
311 | if you remove attributes. Then assuming you are working with |
---|
312 | e.g., version 2.2, your model loading code will look like:: |
---|
313 | |
---|
314 | if isnewer(version, Model.__version__): |
---|
315 | raise IOError('software is older than model') |
---|
316 | elif isnewer(xml.version, '2.0'): |
---|
317 | instantiate current model from xml |
---|
318 | elif isnewer(xml.version, '1.0'): |
---|
319 | instantiate old model from xml |
---|
320 | copy old model format to new model format |
---|
321 | else: |
---|
322 | raise IOError('pre-1.0 models not supported') |
---|
323 | |
---|
324 | Based on distutils.version.StrictVersion |
---|
325 | """ |
---|
326 | from distutils.version import StrictVersion as Version |
---|
327 | return Version(version) > Version(target) |
---|
328 | |
---|
329 | class _RefactoringRegistry(object): |
---|
330 | """ |
---|
331 | Directory of renamed classes. |
---|
332 | |
---|
333 | """ |
---|
334 | registry = {} |
---|
335 | |
---|
336 | @classmethod |
---|
337 | def register(cls,oldname,newname,asof_version): |
---|
338 | """ |
---|
339 | As of the target version, references to the old name are no |
---|
340 | longer valid (e.g., when reconstructing stored objects), and |
---|
341 | should be resolved by the new name (or None if they should |
---|
342 | just raise an error.) The old name can then be reused for |
---|
343 | new objects or abandoned. |
---|
344 | """ |
---|
345 | # Insert (asof_version,newname) in the right place in the |
---|
346 | # list of rename targets for the object. This list will |
---|
347 | # be empty unless the name is reused. |
---|
348 | if name not in cls.registry: cls.registry[name] = [] |
---|
349 | for idx,(version,name) in cls.registry[name]: |
---|
350 | if isnewer(asof_version, version): |
---|
351 | cls.registry[name].insert(idx,(asof_version, newname)) |
---|
352 | break |
---|
353 | else: |
---|
354 | cls.registry[name].append((asof_version, newname)) |
---|
355 | |
---|
356 | @classmethod |
---|
357 | def redirect(cls, oldname, newname, version): |
---|
358 | if oldname not in cls.registry[oldname]: return None |
---|
359 | for idx,(target_version,newname) in cls.registry[name]: |
---|
360 | if isnewer(target_version, version): |
---|
361 | return target_version |
---|
362 | # error conditions at this point |
---|
363 | |
---|
364 | def refactor(oldname,newname,asof_version): |
---|
365 | """ |
---|
366 | Register the renaming of a class. |
---|
367 | |
---|
368 | As code is developed and maintained over time, it is sometimes |
---|
369 | beneficial to restructure the source to support new features. |
---|
370 | However, the structure and location of particular objects is |
---|
371 | encoded in the saved file format. |
---|
372 | |
---|
373 | When you move a class that may be stored in a model, |
---|
374 | be sure to put an entry into the registry saying where |
---|
375 | the model was moved, or None if the model is no longer |
---|
376 | supported. |
---|
377 | |
---|
378 | reconstructor as a function to build a python object from |
---|
379 | a particular class/version, presumably older than the current |
---|
380 | version. This is necessary, e.g., to set default values for new |
---|
381 | fields or to modify components of the model which are now |
---|
382 | represented differently. |
---|
383 | |
---|
384 | The reconstructor function takes the structure above as |
---|
385 | its argument and returns a python instance. You are free |
---|
386 | to restructure the state and version fields as needed to |
---|
387 | bring the object in line with the next version, then call |
---|
388 | setstate(tree) to build the return object. Indeed this |
---|
389 | technique will chain, and you can morph an ancient version |
---|
390 | of your models into the latest version. |
---|
391 | """ |
---|
392 | |
---|
393 | return _RefactoringRegistry.redirect(oldname, newname, asof_version) |
---|
394 | |
---|
395 | # Test classes need to be at the top level for reconstruct to find them |
---|
396 | class _Simple: x = 5 |
---|
397 | class _SimpleNew(object): x = 5 |
---|
398 | class _Slotted(object): __slots__ = ['a','b'] |
---|
399 | class _Controlled: |
---|
400 | def __getstate__(self): return ["mystate",self.__dict__] |
---|
401 | def __setstate__(self, state): |
---|
402 | if state[0] != "mystate": raise RuntimeError("didn't get back my state") |
---|
403 | self.__dict__ = state[1] |
---|
404 | class _Factory: __factory__ = __name__ + "._factory" |
---|
405 | def _factory(): |
---|
406 | obj = _Factory() |
---|
407 | # Note: can't modify obj because state will be overridden |
---|
408 | _Factory.fromfactory = True |
---|
409 | return obj |
---|
410 | class _VersionError: |
---|
411 | __version__ = "3.5." |
---|
412 | def _hello(): |
---|
413 | return 'hello' |
---|
414 | def test(): |
---|
415 | primitives = ['list',1,{'of':'dict',2:'really'},True,None] |
---|
416 | assert deconstruct(primitives) == primitives |
---|
417 | # Hmmm... dicts with non-string keys are not permitted by strict json |
---|
418 | # I'm not sure we care for our purposes, but it would be best to avoid |
---|
419 | # them and instead have a list of tuples which can be converted to and |
---|
420 | # from a dict if the need arises |
---|
421 | assert encode(primitives) == '["list",1,{"of":"dict",2:"really"},true,null]' |
---|
422 | |
---|
423 | h = _Simple() |
---|
424 | h.a = 2 |
---|
425 | #print encode(deconstruct(h)) |
---|
426 | assert decode(encode(h)).a == h.a |
---|
427 | |
---|
428 | assert decode(encode(_hello))() == 'hello' |
---|
429 | |
---|
430 | h = _SimpleNew() |
---|
431 | h.a = 2 |
---|
432 | #print encode(deconstruct(h)) |
---|
433 | assert decode(encode(h)).a == h.a |
---|
434 | |
---|
435 | h = _Slotted() |
---|
436 | h.a = 2 |
---|
437 | #print encode(deconstruct(h)) |
---|
438 | assert decode(encode(h)).a == h.a |
---|
439 | |
---|
440 | h = _Controlled() |
---|
441 | h.a = 2 |
---|
442 | #print encode(deconstruct(h)) |
---|
443 | assert decode(encode(h)).a == h.a |
---|
444 | |
---|
445 | h = _Factory() |
---|
446 | h.a = 2 |
---|
447 | #print encode(deconstruct(h)) |
---|
448 | assert decode(encode(h)).a == h.a |
---|
449 | assert hasattr(h,'fromfactory') |
---|
450 | |
---|
451 | try: |
---|
452 | deconstruct(_VersionError()) |
---|
453 | raise RuntimeError("should have raised a version error") |
---|
454 | except ValueError,msg: |
---|
455 | assert "_VersionError" in str(msg) |
---|
456 | |
---|
457 | if __name__ == "__main__": test() |
---|