1 | """ |
---|
2 | CanSAS 2D data reader for reading HDF5 formatted CanSAS files. |
---|
3 | """ |
---|
4 | |
---|
5 | import h5py |
---|
6 | import numpy as np |
---|
7 | import re |
---|
8 | import os |
---|
9 | import sys |
---|
10 | |
---|
11 | from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D, Data1D, Data2D, Sample, Source |
---|
12 | from sas.sascalc.dataloader.data_info import Process, Aperture, Collimation, TransmissionSpectrum, Detector |
---|
13 | |
---|
14 | |
---|
15 | class Reader(): |
---|
16 | """ |
---|
17 | A class for reading in CanSAS v2.0 data files. The existing iteration opens Mantid generated HDF5 formatted files |
---|
18 | with file extension .h5/.H5. Any number of data sets may be present within the file and any dimensionality of data |
---|
19 | may be used. Currently 1D and 2D SAS data sets are supported, but future implementations will include 1D and 2D |
---|
20 | SESANS data. This class assumes a single data set for each sasentry. |
---|
21 | |
---|
22 | :Dependencies: |
---|
23 | The CanSAS HDF5 reader requires h5py v2.5.0 or later. |
---|
24 | """ |
---|
25 | |
---|
26 | ## CanSAS version |
---|
27 | cansas_version = 2.0 |
---|
28 | ## Logged warnings or messages |
---|
29 | logging = None |
---|
30 | ## List of errors for the current data set |
---|
31 | errors = None |
---|
32 | ## Raw file contents to be processed |
---|
33 | raw_data = None |
---|
34 | ## Data set being modified |
---|
35 | current_dataset = None |
---|
36 | ## For recursion and saving purposes, remember parent objects |
---|
37 | parent_list = None |
---|
38 | ## Data type name |
---|
39 | type_name = "CanSAS 2.0" |
---|
40 | ## Wildcards |
---|
41 | type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"] |
---|
42 | ## List of allowed extensions |
---|
43 | ext = ['.h5', '.H5'] |
---|
44 | ## Flag to bypass extension check |
---|
45 | allow_all = False |
---|
46 | ## List of files to return |
---|
47 | output = None |
---|
48 | |
---|
49 | def __init__(self): |
---|
50 | """ |
---|
51 | Create the reader object and define initial states for class variables |
---|
52 | """ |
---|
53 | self.current_dataset = None |
---|
54 | self.datasets = [] |
---|
55 | self.raw_data = None |
---|
56 | self.errors = set() |
---|
57 | self.logging = [] |
---|
58 | self.parent_list = [] |
---|
59 | self.output = [] |
---|
60 | self.detector = Detector() |
---|
61 | self.collimation = Collimation() |
---|
62 | self.aperture = Aperture() |
---|
63 | self.process = Process() |
---|
64 | self.sample = Sample() |
---|
65 | self.source = Source() |
---|
66 | self.trans_spectrum = TransmissionSpectrum() |
---|
67 | |
---|
68 | def read(self, filename): |
---|
69 | """ |
---|
70 | This is the general read method that all SasView data_loaders must have. |
---|
71 | |
---|
72 | :param filename: A path for an HDF5 formatted CanSAS 2D data file. |
---|
73 | :return: List of Data1D/2D objects or a list of errors. |
---|
74 | """ |
---|
75 | |
---|
76 | ## Reinitialize the class when loading a new data file to reset all class variables |
---|
77 | self.__init__() |
---|
78 | ## Check that the file exists |
---|
79 | if os.path.isfile(filename): |
---|
80 | basename = os.path.basename(filename) |
---|
81 | _, extension = os.path.splitext(basename) |
---|
82 | # If the file type is not allowed, return empty list |
---|
83 | if extension in self.ext or self.allow_all: |
---|
84 | ## Load the data file |
---|
85 | self.raw_data = h5py.File(filename, 'r') |
---|
86 | ## Read in all child elements of top level SASroot |
---|
87 | self.read_children(self.raw_data) |
---|
88 | ## Add the last data set to the list of outputs |
---|
89 | self.add_data_set() |
---|
90 | ## Return data set(s) |
---|
91 | return self.output |
---|
92 | |
---|
93 | def read_children(self, data, parent=u'SASroot'): |
---|
94 | """ |
---|
95 | A recursive method for stepping through the hierarchical data file. |
---|
96 | |
---|
97 | :param data: h5py Group object of any kind |
---|
98 | :param parent: h5py Group parent name |
---|
99 | :return: None |
---|
100 | """ |
---|
101 | |
---|
102 | ## Create regex for base sasentry and for parent |
---|
103 | parent_prog = re.compile(parent) |
---|
104 | |
---|
105 | ## Loop through each element of the parent and process accordingly |
---|
106 | for key in data.keys(): |
---|
107 | ## Get all information for the current key |
---|
108 | value = data.get(key) |
---|
109 | attr_keys = value.attrs.keys() |
---|
110 | attr_values = value.attrs.values() |
---|
111 | if value.attrs.get(u'canSAS_class') is not None: |
---|
112 | class_name = value.attrs.get(u'canSAS_class') |
---|
113 | else: |
---|
114 | class_name = value.attrs.get(u'NX_class') |
---|
115 | if class_name is not None: |
---|
116 | class_prog = re.compile(class_name) |
---|
117 | else: |
---|
118 | class_prog = re.compile(value.name) |
---|
119 | |
---|
120 | if isinstance(value, h5py.Group): |
---|
121 | ##TODO: Rework this for multiple SASdata objects within a single SASentry to allow for both 1D and 2D |
---|
122 | ##TODO: data within the same SASentry - One 1D and one 2D data object for all SASdata sets? |
---|
123 | ## If this is a new sasentry, store the current data set and create a fresh Data1D/2D object |
---|
124 | if class_prog.match(u'SASentry'): |
---|
125 | self.add_data_set(key) |
---|
126 | ## Recursion step to access data within the group |
---|
127 | self.read_children(value, class_name) |
---|
128 | self.add_intermediate(class_name) |
---|
129 | |
---|
130 | elif isinstance(value, h5py.Dataset): |
---|
131 | ## If this is a dataset, store the data appropriately |
---|
132 | data_set = data[key][:] |
---|
133 | |
---|
134 | for data_point in data_set: |
---|
135 | ## Top Level Meta Data |
---|
136 | unit = self._get_unit(value) |
---|
137 | if key == u'definition': |
---|
138 | self.current_dataset.meta_data['reader'] = data_point |
---|
139 | elif key == u'run': |
---|
140 | self.current_dataset.run.append(data_point) |
---|
141 | elif key == u'title': |
---|
142 | self.current_dataset.title = data_point |
---|
143 | elif key == u'SASnote': |
---|
144 | self.current_dataset.notes.append(data_point) |
---|
145 | |
---|
146 | ## I and Q Data |
---|
147 | elif key == u'I': |
---|
148 | if type(self.current_dataset) is Data2D: |
---|
149 | self.current_dataset.data = np.append(self.current_dataset.data, data_point) |
---|
150 | self.current_dataset.zaxis("Intensity", unit) |
---|
151 | else: |
---|
152 | self.current_dataset.y = np.append(self.current_dataset.y, data_point) |
---|
153 | self.current_dataset.yaxis("Intensity", unit) |
---|
154 | elif key == u'Idev': |
---|
155 | if type(self.current_dataset) is Data2D: |
---|
156 | self.current_dataset.err_data = np.append(self.current_dataset.err_data, data_point) |
---|
157 | else: |
---|
158 | self.current_dataset.dy = np.append(self.current_dataset.dy, data_point) |
---|
159 | elif key == u'Q': |
---|
160 | self.current_dataset.xaxis("Q", unit) |
---|
161 | if type(self.current_dataset) is Data2D: |
---|
162 | self.current_dataset.q = np.append(self.current_dataset.q, data_point) |
---|
163 | else: |
---|
164 | self.current_dataset.x = np.append(self.current_dataset.x, data_point) |
---|
165 | elif key == u'Qy': |
---|
166 | self.current_dataset.yaxis("Q_y", unit) |
---|
167 | self.current_dataset.qy_data = np.append(self.current_dataset.qy_data, data_point) |
---|
168 | elif key == u'Qydev': |
---|
169 | self.current_dataset.dqy_data = np.append(self.current_dataset.dqy_data, data_point) |
---|
170 | elif key == u'Qx': |
---|
171 | self.current_dataset.xaxis("Q_x", unit) |
---|
172 | self.current_dataset.qx_data = np.append(self.current_dataset.qx_data, data_point) |
---|
173 | elif key == u'Qxdev': |
---|
174 | self.current_dataset.dqx_data = np.append(self.current_dataset.dqx_data, data_point) |
---|
175 | elif key == u'Mask': |
---|
176 | self.current_dataset.mask = np.append(self.current_dataset.mask, data_point) |
---|
177 | |
---|
178 | ## Sample Information |
---|
179 | elif key == u'Title' and parent == u'SASsample': |
---|
180 | self.sample.name = data_point |
---|
181 | elif key == u'thickness' and parent == u'SASsample': |
---|
182 | self.sample.thickness = data_point |
---|
183 | elif key == u'temperature' and parent == u'SASsample': |
---|
184 | self.sample.temperature = data_point |
---|
185 | |
---|
186 | ## Instrumental Information |
---|
187 | elif key == u'name' and parent == u'SASinstrument': |
---|
188 | self.current_dataset.instrument = data_point |
---|
189 | elif key == u'name' and parent == u'SASdetector': |
---|
190 | self.detector.name = data_point |
---|
191 | elif key == u'SDD' and parent == u'SASdetector': |
---|
192 | self.detector.distance = data_point |
---|
193 | self.detector.distance_unit = unit |
---|
194 | elif key == u'SSD' and parent == u'SAScollimation': |
---|
195 | self.collimation.length = data_point |
---|
196 | self.collimation.length_unit = unit |
---|
197 | elif key == u'name' and parent == u'SAScollimation': |
---|
198 | self.collimation.name = data_point |
---|
199 | |
---|
200 | ## Process Information |
---|
201 | elif key == u'name' and parent == u'SASprocess': |
---|
202 | self.process.name = data_point |
---|
203 | elif key == u'Title' and parent == u'SASprocess': |
---|
204 | self.process.name = data_point |
---|
205 | elif key == u'description' and parent == u'SASprocess': |
---|
206 | self.process.description = data_point |
---|
207 | elif key == u'date' and parent == u'SASprocess': |
---|
208 | self.process.date = data_point |
---|
209 | elif parent == u'SASprocess': |
---|
210 | self.process.notes.append(data_point) |
---|
211 | |
---|
212 | ## Transmission Spectrum |
---|
213 | elif key == u'T' and parent == u'SAStransmission_spectrum': |
---|
214 | self.trans_spectrum.transmission.append(data_point) |
---|
215 | elif key == u'Tdev' and parent == u'SAStransmission_spectrum': |
---|
216 | self.trans_spectrum.transmission_deviation.append(data_point) |
---|
217 | elif key == u'lambda' and parent == u'SAStransmission_spectrum': |
---|
218 | self.trans_spectrum.wavelength.append(data_point) |
---|
219 | |
---|
220 | ## Other Information |
---|
221 | elif key == u'wavelength' and parent == u'SASdata': |
---|
222 | self.source.wavelength = data_point |
---|
223 | self.source.wavelength.unit = unit |
---|
224 | elif key == u'radiation' and parent == u'SASsource': |
---|
225 | self.source.radiation = data_point |
---|
226 | elif key == u'transmission' and parent == u'SASdata': |
---|
227 | self.sample.transmission = data_point |
---|
228 | |
---|
229 | ## Everything else goes in meta_data |
---|
230 | else: |
---|
231 | new_key = self._create_unique_key(self.current_dataset.meta_data, key) |
---|
232 | self.current_dataset.meta_data[new_key] = data_point |
---|
233 | |
---|
234 | else: |
---|
235 | ## I don't know if this reachable code |
---|
236 | self.errors.add("ShouldNeverHappenException") |
---|
237 | |
---|
238 | def add_intermediate(self, parent): |
---|
239 | """ |
---|
240 | This method stores any intermediate objects within the final data set after fully reading the set. |
---|
241 | |
---|
242 | :param parent: The NXclass name for the h5py Group object that just finished being processed |
---|
243 | :return: |
---|
244 | """ |
---|
245 | |
---|
246 | if parent == u'SASprocess': |
---|
247 | self.current_dataset.process.append(self.process) |
---|
248 | self.process = Process() |
---|
249 | elif parent == u'SASdetector': |
---|
250 | self.current_dataset.detector.append(self.detector) |
---|
251 | self.detector = Detector() |
---|
252 | elif parent == u'SAStransmission_spectrum': |
---|
253 | self.current_dataset.trans_spectrum.append(self.trans_spectrum) |
---|
254 | self.trans_spectrum = TransmissionSpectrum() |
---|
255 | elif parent == u'SASsource': |
---|
256 | self.current_dataset.source = self.source |
---|
257 | self.source = Source() |
---|
258 | elif parent == u'SASsample': |
---|
259 | self.current_dataset.sample = self.sample |
---|
260 | self.sample = Sample() |
---|
261 | elif parent == u'SAScollimation': |
---|
262 | self.current_dataset.collimation.append(self.collimation) |
---|
263 | self.collimation = Collimation() |
---|
264 | elif parent == u'SASaperture': |
---|
265 | self.collimation.aperture.append(self.aperture) |
---|
266 | self.aperture = Aperture() |
---|
267 | |
---|
268 | def final_data_cleanup(self): |
---|
269 | """ |
---|
270 | Does some final cleanup and formatting on self.current_dataset |
---|
271 | """ |
---|
272 | |
---|
273 | ## Type cast data arrays to float64 and find min/max as appropriate |
---|
274 | if type(self.current_dataset) is Data2D: |
---|
275 | self.current_dataset.data = np.delete(self.current_dataset.data, [0]) |
---|
276 | self.current_dataset.data = self.current_dataset.data.astype(np.float64) |
---|
277 | self.current_dataset.err_data = np.delete(self.current_dataset.err_data, [0]) |
---|
278 | self.current_dataset.err_data = self.current_dataset.err_data.astype(np.float64) |
---|
279 | self.current_dataset.mask = np.delete(self.current_dataset.mask, [0]) |
---|
280 | if self.current_dataset.qx_data is not None: |
---|
281 | self.current_dataset.qx_data = np.delete(self.current_dataset.qx_data, [0]) |
---|
282 | self.current_dataset.xmin = np.min(self.current_dataset.qx_data) |
---|
283 | self.current_dataset.xmax = np.max(self.current_dataset.qx_data) |
---|
284 | self.current_dataset.qx_data = self.current_dataset.qx_data.astype(np.float64) |
---|
285 | if self.current_dataset.dqx_data is not None: |
---|
286 | self.current_dataset.dqx_data = np.delete(self.current_dataset.dqx_data, [0]) |
---|
287 | self.current_dataset.dqx_data = self.current_dataset.dqx_data.astype(np.float64) |
---|
288 | if self.current_dataset.qy_data is not None: |
---|
289 | self.current_dataset.qy_data = np.delete(self.current_dataset.qy_data, [0]) |
---|
290 | self.current_dataset.ymin = np.min(self.current_dataset.qy_data) |
---|
291 | self.current_dataset.ymax = np.max(self.current_dataset.qy_data) |
---|
292 | self.current_dataset.qy_data = self.current_dataset.qy_data.astype(np.float64) |
---|
293 | if self.current_dataset.dqy_data is not None: |
---|
294 | self.current_dataset.dqy_data = np.delete(self.current_dataset.dqy_data, [0]) |
---|
295 | self.current_dataset.dqy_data = self.current_dataset.dqy_data.astype(np.float64) |
---|
296 | if self.current_dataset.q_data is not None: |
---|
297 | self.current_dataset.q_data = np.delete(self.current_dataset.q_data, [0]) |
---|
298 | self.current_dataset.q_data = self.current_dataset.q_data.astype(np.float64) |
---|
299 | zeros = np.ones(self.current_dataset.data.size, dtype=bool) |
---|
300 | try: |
---|
301 | for i in range (0, self.current_dataset.mask.size - 1): |
---|
302 | zeros[i] = self.current_dataset.mask[i] |
---|
303 | except: |
---|
304 | self.errors.add(sys.exc_value) |
---|
305 | self.current_dataset.mask = zeros |
---|
306 | |
---|
307 | ## Calculate the actual Q matrix |
---|
308 | try: |
---|
309 | if self.current_dataset.q_data.size <= 1: |
---|
310 | self.current_dataset.q_data = np.sqrt(self.current_dataset.qx_data * self.current_dataset.qx_data + |
---|
311 | self.current_dataset.qy_data * self.current_dataset.qy_data) |
---|
312 | except: |
---|
313 | self.current_dataset.q_data = None |
---|
314 | |
---|
315 | elif type(self.current_dataset) is Data1D: |
---|
316 | if self.current_dataset.x is not None: |
---|
317 | self.current_dataset.x = np.delete(self.current_dataset.x, [0]) |
---|
318 | self.current_dataset.x = self.current_dataset.x.astype(np.float64) |
---|
319 | self.current_dataset.xmin = np.min(self.current_dataset.x) |
---|
320 | self.current_dataset.xmax = np.max(self.current_dataset.x) |
---|
321 | if self.current_dataset.y is not None: |
---|
322 | self.current_dataset.y = np.delete(self.current_dataset.y, [0]) |
---|
323 | self.current_dataset.y = self.current_dataset.y.astype(np.float64) |
---|
324 | self.current_dataset.ymin = np.min(self.current_dataset.y) |
---|
325 | self.current_dataset.ymax = np.max(self.current_dataset.y) |
---|
326 | if self.current_dataset.dx is not None: |
---|
327 | self.current_dataset.dx = np.delete(self.current_dataset.dx, [0]) |
---|
328 | self.current_dataset.dx = self.current_dataset.dx.astype(np.float64) |
---|
329 | if self.current_dataset.dxl is not None: |
---|
330 | self.current_dataset.dxl = np.delete(self.current_dataset.dxl, [0]) |
---|
331 | self.current_dataset.dxl = self.current_dataset.dxl.astype(np.float64) |
---|
332 | if self.current_dataset.dxw is not None: |
---|
333 | self.current_dataset.dxw = np.delete(self.current_dataset.dxw, [0]) |
---|
334 | self.current_dataset.dxw = self.current_dataset.dxw.astype(np.float64) |
---|
335 | if self.current_dataset.dy is not None: |
---|
336 | self.current_dataset.dy = np.delete(self.current_dataset.dy, [0]) |
---|
337 | self.current_dataset.dy =self.current_dataset.dy.astype(np.float64) |
---|
338 | |
---|
339 | if len(self.current_dataset.trans_spectrum) is not 0: |
---|
340 | spectrum_list = [] |
---|
341 | for spectrum in self.current_dataset.trans_spectrum: |
---|
342 | spectrum.transmission = np.delete(spectrum.transmission, [0]) |
---|
343 | spectrum.transmission = spectrum.transmission.astype(np.float64) |
---|
344 | spectrum.transmission_deviation = np.delete(spectrum.transmission_deviation, [0]) |
---|
345 | spectrum.transmission_deviation = spectrum.transmission_deviation.astype(np.float64) |
---|
346 | spectrum.wavelength = np.delete(spectrum.wavelength, [0]) |
---|
347 | spectrum.wavelength = spectrum.wavelength.astype(np.float64) |
---|
348 | spectrum_list.append(spectrum) |
---|
349 | self.current_dataset.trans_spectrum = spectrum_list |
---|
350 | |
---|
351 | else: |
---|
352 | self.errors.add("ShouldNeverHappenException") |
---|
353 | |
---|
354 | ## Append intermediate objects to data |
---|
355 | self.current_dataset.sample = self.sample |
---|
356 | self.current_dataset.source = self.source |
---|
357 | self.current_dataset.collimation.append(self.collimation) |
---|
358 | |
---|
359 | ## Append errors to dataset and reset class errors |
---|
360 | self.current_dataset.errors = self.errors |
---|
361 | self.errors.clear() |
---|
362 | |
---|
363 | def add_data_set(self, key=""): |
---|
364 | """ |
---|
365 | Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a |
---|
366 | private method to generate a new data set. |
---|
367 | |
---|
368 | :param key: NeXus group name for current tree level |
---|
369 | :return: None |
---|
370 | """ |
---|
371 | if self.current_dataset is not None: |
---|
372 | self.final_data_cleanup() |
---|
373 | self.output.append(self.current_dataset) |
---|
374 | self._initialize_new_data_set(key) |
---|
375 | |
---|
376 | def _initialize_new_data_set(self, key=""): |
---|
377 | """ |
---|
378 | A private class method to generate a new 1D or 2D data object based on the type of data within the set. |
---|
379 | Outside methods should call add_data_set() to be sure any existing data is stored properly. |
---|
380 | |
---|
381 | :param key: NeXus group name for current tree level |
---|
382 | :return: None |
---|
383 | """ |
---|
384 | entry = self._find_intermediate(key, "sasentry*") |
---|
385 | data = entry.get("sasdata") |
---|
386 | if data.get("Qx") is not None: |
---|
387 | self.current_dataset = Data2D() |
---|
388 | else: |
---|
389 | x = np.array(0) |
---|
390 | y = np.array(0) |
---|
391 | self.current_dataset = Data1D(x, y) |
---|
392 | self.current_dataset.filename = self.raw_data.filename |
---|
393 | |
---|
394 | def _find_intermediate(self, key="", basename=""): |
---|
395 | """ |
---|
396 | A private class used to find an entry by either using a direct key or knowing the approximate basename. |
---|
397 | |
---|
398 | :param key: Exact keyname of an entry |
---|
399 | :param basename: Approximate name of an entry |
---|
400 | :return: |
---|
401 | """ |
---|
402 | entry = [] |
---|
403 | if key is not "": |
---|
404 | entry = self.raw_data.get(key) |
---|
405 | else: |
---|
406 | key_prog = re.compile(basename) |
---|
407 | for key in self.raw_data.keys(): |
---|
408 | if (key_prog.match(key)): |
---|
409 | entry = self.raw_data.get(key) |
---|
410 | break |
---|
411 | return entry |
---|
412 | |
---|
413 | def _create_unique_key(self, dictionary, name, numb=0): |
---|
414 | """ |
---|
415 | Create a unique key value for any dictionary to prevent overwriting |
---|
416 | Recurses until a unique key value is found. |
---|
417 | |
---|
418 | :param dictionary: A dictionary with any number of entries |
---|
419 | :param name: The index of the item to be added to dictionary |
---|
420 | :param numb: The number to be appended to the name, starts at 0 |
---|
421 | """ |
---|
422 | if dictionary.get(name) is not None: |
---|
423 | numb += 1 |
---|
424 | name = name.split("_")[0] |
---|
425 | name += "_{0}".format(numb) |
---|
426 | name = self._create_unique_key(dictionary, name, numb) |
---|
427 | return name |
---|
428 | |
---|
429 | def _get_unit(self, value): |
---|
430 | """ |
---|
431 | Find the unit for a particular value within the h5py dictionary |
---|
432 | |
---|
433 | :param value: attribute dictionary for a particular value set |
---|
434 | :return: |
---|
435 | """ |
---|
436 | unit = value.attrs.get(u'units') |
---|
437 | if unit == None: |
---|
438 | unit = value.attrs.get(u'unit') |
---|
439 | |
---|
440 | ## Convert the unit formats |
---|
441 | if unit == "1/A": |
---|
442 | unit = "A^{-1}" |
---|
443 | elif unit == "1/cm": |
---|
444 | unit = "cm^{-1}" |
---|
445 | |
---|
446 | return unit |
---|