1 | """ |
---|
2 | CanSAS 2D data reader for reading HDF5 formatted CanSAS files. |
---|
3 | """ |
---|
4 | |
---|
5 | import h5py |
---|
6 | import numpy as np |
---|
7 | import re |
---|
8 | import os |
---|
9 | import sys |
---|
10 | |
---|
11 | from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D, Data1D, Data2D, DataInfo, Process, Aperture |
---|
12 | from sas.sascalc.dataloader.data_info import Collimation, TransmissionSpectrum, Detector |
---|
13 | from sas.sascalc.dataloader.data_info import combine_data_info_with_plottable |
---|
14 | |
---|
15 | |
---|
16 | |
---|
17 | class Reader(): |
---|
18 | """ |
---|
19 | A class for reading in CanSAS v2.0 data files. The existing iteration opens Mantid generated HDF5 formatted files |
---|
20 | with file extension .h5/.H5. Any number of data sets may be present within the file and any dimensionality of data |
---|
21 | may be used. Currently 1D and 2D SAS data sets are supported, but future implementations will include 1D and 2D |
---|
22 | SESANS data. |
---|
23 | |
---|
24 | Any number of SASdata sets may be present in a SASentry and the data within can be either 1D I(Q) or 2D I(Qx, Qy). |
---|
25 | |
---|
26 | :Dependencies: |
---|
27 | The CanSAS HDF5 reader requires h5py => v2.5.0 or later. |
---|
28 | """ |
---|
29 | |
---|
30 | ## CanSAS version |
---|
31 | cansas_version = 2.0 |
---|
32 | ## Logged warnings or messages |
---|
33 | logging = None |
---|
34 | ## List of errors for the current data set |
---|
35 | errors = None |
---|
36 | ## Raw file contents to be processed |
---|
37 | raw_data = None |
---|
38 | ## Data info currently being read in |
---|
39 | current_datainfo = None |
---|
40 | ## SASdata set currently being read in |
---|
41 | current_dataset = None |
---|
42 | ## List of plottable1D objects that should be linked to the current_datainfo |
---|
43 | data1d = None |
---|
44 | ## List of plottable2D objects that should be linked to the current_datainfo |
---|
45 | data2d = None |
---|
46 | ## Data type name |
---|
47 | type_name = "CanSAS 2.0" |
---|
48 | ## Wildcards |
---|
49 | type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"] |
---|
50 | ## List of allowed extensions |
---|
51 | ext = ['.h5', '.H5'] |
---|
52 | ## Flag to bypass extension check |
---|
53 | allow_all = False |
---|
54 | ## List of files to return |
---|
55 | output = None |
---|
56 | |
---|
57 | def read(self, filename): |
---|
58 | """ |
---|
59 | This is the general read method that all SasView data_loaders must have. |
---|
60 | |
---|
61 | :param filename: A path for an HDF5 formatted CanSAS 2D data file. |
---|
62 | :return: List of Data1D/2D objects and/or a list of errors. |
---|
63 | """ |
---|
64 | ## Reinitialize the class when loading a new data file to reset all class variables |
---|
65 | self.reset_class_variables() |
---|
66 | ## Check that the file exists |
---|
67 | if os.path.isfile(filename): |
---|
68 | basename = os.path.basename(filename) |
---|
69 | _, extension = os.path.splitext(basename) |
---|
70 | # If the file type is not allowed, return empty list |
---|
71 | if extension in self.ext or self.allow_all: |
---|
72 | ## Load the data file |
---|
73 | self.raw_data = h5py.File(filename, 'r') |
---|
74 | ## Read in all child elements of top level SASroot |
---|
75 | self.read_children(self.raw_data, []) |
---|
76 | ## Add the last data set to the list of outputs |
---|
77 | self.add_data_set() |
---|
78 | ## Return data set(s) |
---|
79 | return self.output |
---|
80 | |
---|
81 | def reset_class_variables(self): |
---|
82 | """ |
---|
83 | Create the reader object and define initial states for class variables |
---|
84 | """ |
---|
85 | self.current_datainfo = None |
---|
86 | self.current_dataset = None |
---|
87 | self.data1d = [] |
---|
88 | self.data2d = [] |
---|
89 | self.raw_data = None |
---|
90 | self.errors = set() |
---|
91 | self.logging = [] |
---|
92 | self.output = [] |
---|
93 | self.parent_class = u'' |
---|
94 | self.detector = Detector() |
---|
95 | self.collimation = Collimation() |
---|
96 | self.aperture = Aperture() |
---|
97 | self.process = Process() |
---|
98 | self.trans_spectrum = TransmissionSpectrum() |
---|
99 | |
---|
100 | def read_children(self, data, parent_list): |
---|
101 | """ |
---|
102 | A recursive method for stepping through the hierarchical data file. |
---|
103 | |
---|
104 | :param data: h5py Group object of any kind |
---|
105 | :param parent: h5py Group parent name |
---|
106 | """ |
---|
107 | |
---|
108 | ## Loop through each element of the parent and process accordingly |
---|
109 | for key in data.keys(): |
---|
110 | ## Get all information for the current key |
---|
111 | value = data.get(key) |
---|
112 | if value.attrs.get(u'canSAS_class') is not None: |
---|
113 | class_name = value.attrs.get(u'canSAS_class') |
---|
114 | else: |
---|
115 | class_name = value.attrs.get(u'NX_class') |
---|
116 | if class_name is not None: |
---|
117 | class_prog = re.compile(class_name) |
---|
118 | else: |
---|
119 | class_prog = re.compile(value.name) |
---|
120 | |
---|
121 | if isinstance(value, h5py.Group): |
---|
122 | self.parent_class = class_name |
---|
123 | parent_list.append(key) |
---|
124 | ## If this is a new sasentry, store the current data sets and create a fresh Data1D/2D object |
---|
125 | if class_prog.match(u'SASentry'): |
---|
126 | self.add_data_set(key) |
---|
127 | elif class_prog.match(u'SASdata'): |
---|
128 | self._initialize_new_data_set(parent_list) |
---|
129 | ## Recursion step to access data within the group |
---|
130 | self.read_children(value, parent_list) |
---|
131 | self.add_intermediate() |
---|
132 | parent_list.remove(key) |
---|
133 | |
---|
134 | elif isinstance(value, h5py.Dataset): |
---|
135 | ## If this is a dataset, store the data appropriately |
---|
136 | data_set = data[key][:] |
---|
137 | unit = self._get_unit(value) |
---|
138 | |
---|
139 | ## I and Q Data |
---|
140 | if key == u'I': |
---|
141 | if type(self.current_dataset) is plottable_2D: |
---|
142 | self.current_dataset.data = data_set |
---|
143 | self.current_dataset.zaxis("Intensity", unit) |
---|
144 | else: |
---|
145 | self.current_dataset.y = data_set.flatten() |
---|
146 | self.current_dataset.yaxis("Intensity", unit) |
---|
147 | continue |
---|
148 | elif key == u'Idev': |
---|
149 | if type(self.current_dataset) is plottable_2D: |
---|
150 | self.current_dataset.err_data = data_set.flatten() |
---|
151 | else: |
---|
152 | self.current_dataset.dy = data_set.flatten() |
---|
153 | continue |
---|
154 | elif key == u'Q': |
---|
155 | self.current_dataset.xaxis("Q", unit) |
---|
156 | if type(self.current_dataset) is plottable_2D: |
---|
157 | self.current_dataset.q = data_set.flatten() |
---|
158 | else: |
---|
159 | self.current_dataset.x = data_set.flatten() |
---|
160 | continue |
---|
161 | elif key == u'Qy': |
---|
162 | self.current_dataset.yaxis("Q_y", unit) |
---|
163 | self.current_dataset.qy_data = data_set.flatten() |
---|
164 | continue |
---|
165 | elif key == u'Qydev': |
---|
166 | self.current_dataset.dqy_data = data_set.flatten() |
---|
167 | continue |
---|
168 | elif key == u'Qx': |
---|
169 | self.current_dataset.xaxis("Q_x", unit) |
---|
170 | self.current_dataset.qx_data = data_set.flatten() |
---|
171 | continue |
---|
172 | elif key == u'Qxdev': |
---|
173 | self.current_dataset.dqx_data = data_set.flatten() |
---|
174 | continue |
---|
175 | elif key == u'Mask': |
---|
176 | self.current_dataset.mask = data_set.flatten() |
---|
177 | continue |
---|
178 | |
---|
179 | for data_point in data_set: |
---|
180 | ## Top Level Meta Data |
---|
181 | if key == u'definition': |
---|
182 | self.current_datainfo.meta_data['reader'] = data_point |
---|
183 | elif key == u'run': |
---|
184 | self.current_datainfo.run.append(data_point) |
---|
185 | elif key == u'title': |
---|
186 | self.current_datainfo.title = data_point |
---|
187 | elif key == u'SASnote': |
---|
188 | self.current_datainfo.notes.append(data_point) |
---|
189 | |
---|
190 | ## Sample Information |
---|
191 | elif key == u'Title' and self.parent_class == u'SASsample': |
---|
192 | self.current_datainfo.sample.name = data_point |
---|
193 | elif key == u'thickness' and self.parent_class == u'SASsample': |
---|
194 | self.current_datainfo.sample.thickness = data_point |
---|
195 | elif key == u'temperature' and self.parent_class == u'SASsample': |
---|
196 | self.current_datainfo.sample.temperature = data_point |
---|
197 | |
---|
198 | ## Instrumental Information |
---|
199 | elif key == u'name' and self.parent_class == u'SASinstrument': |
---|
200 | self.current_datainfo.instrument = data_point |
---|
201 | elif key == u'name' and self.parent_class == u'SASdetector': |
---|
202 | self.detector.name = data_point |
---|
203 | elif key == u'SDD' and self.parent_class == u'SASdetector': |
---|
204 | self.detector.distance = float(data_point) |
---|
205 | self.detector.distance_unit = unit |
---|
206 | elif key == u'SSD' and self.parent_class == u'SAScollimation': |
---|
207 | self.collimation.length = data_point |
---|
208 | self.collimation.length_unit = unit |
---|
209 | elif key == u'name' and self.parent_class == u'SAScollimation': |
---|
210 | self.collimation.name = data_point |
---|
211 | |
---|
212 | ## Process Information |
---|
213 | elif key == u'name' and self.parent_class == u'SASprocess': |
---|
214 | self.process.name = data_point |
---|
215 | elif key == u'Title' and self.parent_class == u'SASprocess': |
---|
216 | self.process.name = data_point |
---|
217 | elif key == u'description' and self.parent_class == u'SASprocess': |
---|
218 | self.process.description = data_point |
---|
219 | elif key == u'date' and self.parent_class == u'SASprocess': |
---|
220 | self.process.date = data_point |
---|
221 | elif self.parent_class == u'SASprocess': |
---|
222 | self.process.notes.append(data_point) |
---|
223 | |
---|
224 | ## Transmission Spectrum |
---|
225 | elif key == u'T' and self.parent_class == u'SAStransmission_spectrum': |
---|
226 | self.trans_spectrum.transmission.append(data_point) |
---|
227 | elif key == u'Tdev' and self.parent_class == u'SAStransmission_spectrum': |
---|
228 | self.trans_spectrum.transmission_deviation.append(data_point) |
---|
229 | elif key == u'lambda' and self.parent_class == u'SAStransmission_spectrum': |
---|
230 | self.trans_spectrum.wavelength.append(data_point) |
---|
231 | |
---|
232 | ## Other Information |
---|
233 | elif key == u'wavelength' and self.parent_class == u'SASdata': |
---|
234 | self.current_datainfo.source.wavelength = data_point |
---|
235 | self.current_datainfo.source.wavelength.unit = unit |
---|
236 | elif key == u'radiation' and self.parent_class == u'SASsource': |
---|
237 | self.current_datainfo.source.radiation = data_point |
---|
238 | elif key == u'transmission' and self.parent_class == u'SASdata': |
---|
239 | self.current_datainfo.sample.transmission = data_point |
---|
240 | |
---|
241 | ## Everything else goes in meta_data |
---|
242 | else: |
---|
243 | new_key = self._create_unique_key(self.current_datainfo.meta_data, key) |
---|
244 | self.current_datainfo.meta_data[new_key] = data_point |
---|
245 | |
---|
246 | else: |
---|
247 | ## I don't know if this reachable code |
---|
248 | self.errors.add("ShouldNeverHappenException") |
---|
249 | |
---|
250 | def add_intermediate(self): |
---|
251 | """ |
---|
252 | This method stores any intermediate objects within the final data set after fully reading the set. |
---|
253 | |
---|
254 | :param parent: The NXclass name for the h5py Group object that just finished being processed |
---|
255 | """ |
---|
256 | |
---|
257 | if self.parent_class == u'SASprocess': |
---|
258 | self.current_datainfo.process.append(self.process) |
---|
259 | self.process = Process() |
---|
260 | elif self.parent_class == u'SASdetector': |
---|
261 | self.current_datainfo.detector.append(self.detector) |
---|
262 | self.detector = Detector() |
---|
263 | elif self.parent_class == u'SAStransmission_spectrum': |
---|
264 | self.current_datainfo.trans_spectrum.append(self.trans_spectrum) |
---|
265 | self.trans_spectrum = TransmissionSpectrum() |
---|
266 | elif self.parent_class == u'SAScollimation': |
---|
267 | self.current_datainfo.collimation.append(self.collimation) |
---|
268 | self.collimation = Collimation() |
---|
269 | elif self.parent_class == u'SASaperture': |
---|
270 | self.collimation.aperture.append(self.aperture) |
---|
271 | self.aperture = Aperture() |
---|
272 | elif self.parent_class == u'SASdata': |
---|
273 | if type(self.current_dataset) is plottable_2D: |
---|
274 | self.data2d.append(self.current_dataset) |
---|
275 | elif type(self.current_dataset) is plottable_1D: |
---|
276 | self.data1d.append(self.current_dataset) |
---|
277 | |
---|
278 | def final_data_cleanup(self): |
---|
279 | """ |
---|
280 | Does some final cleanup and formatting on self.current_datainfo and all data1D and data2D objects and then |
---|
281 | combines the data and info into Data1D and Data2D objects |
---|
282 | """ |
---|
283 | |
---|
284 | ## Type cast data arrays to float64 |
---|
285 | if len(self.current_datainfo.trans_spectrum) > 0: |
---|
286 | spectrum_list = [] |
---|
287 | for spectrum in self.current_datainfo.trans_spectrum: |
---|
288 | spectrum.transmission = np.delete(spectrum.transmission, [0]) |
---|
289 | spectrum.transmission = spectrum.transmission.astype(np.float64) |
---|
290 | spectrum.transmission_deviation = np.delete(spectrum.transmission_deviation, [0]) |
---|
291 | spectrum.transmission_deviation = spectrum.transmission_deviation.astype(np.float64) |
---|
292 | spectrum.wavelength = np.delete(spectrum.wavelength, [0]) |
---|
293 | spectrum.wavelength = spectrum.wavelength.astype(np.float64) |
---|
294 | if len(spectrum.transmission) > 0: |
---|
295 | spectrum_list.append(spectrum) |
---|
296 | self.current_datainfo.trans_spectrum = spectrum_list |
---|
297 | |
---|
298 | ## Append errors to dataset and reset class errors |
---|
299 | self.current_datainfo.errors = self.errors |
---|
300 | self.errors.clear() |
---|
301 | |
---|
302 | ## Combine all plottables with datainfo and append each to output |
---|
303 | ## Type cast data arrays to float64 and find min/max as appropriate |
---|
304 | for dataset in self.data2d: |
---|
305 | dataset.data = dataset.data.astype(np.float64) |
---|
306 | dataset.err_data = dataset.err_data.astype(np.float64) |
---|
307 | if dataset.qx_data is not None: |
---|
308 | dataset.xmin = np.min(dataset.qx_data) |
---|
309 | dataset.xmax = np.max(dataset.qx_data) |
---|
310 | dataset.qx_data = dataset.qx_data.astype(np.float64) |
---|
311 | if dataset.dqx_data is not None: |
---|
312 | dataset.dqx_data = dataset.dqx_data.astype(np.float64) |
---|
313 | if dataset.qy_data is not None: |
---|
314 | dataset.ymin = np.min(dataset.qy_data) |
---|
315 | dataset.ymax = np.max(dataset.qy_data) |
---|
316 | dataset.qy_data = dataset.qy_data.astype(np.float64) |
---|
317 | if dataset.dqy_data is not None: |
---|
318 | dataset.dqy_data = dataset.dqy_data.astype(np.float64) |
---|
319 | if dataset.q_data is not None: |
---|
320 | dataset.q_data = dataset.q_data.astype(np.float64) |
---|
321 | zeros = np.ones(dataset.data.size, dtype=bool) |
---|
322 | try: |
---|
323 | for i in range (0, dataset.mask.size - 1): |
---|
324 | zeros[i] = dataset.mask[i] |
---|
325 | except: |
---|
326 | self.errors.add(sys.exc_value) |
---|
327 | dataset.mask = zeros |
---|
328 | ## Calculate the actual Q matrix |
---|
329 | try: |
---|
330 | if dataset.q_data.size <= 1: |
---|
331 | dataset.q_data = np.sqrt(dataset.qx_data * dataset.qx_data + dataset.qy_data * dataset.qy_data) |
---|
332 | except: |
---|
333 | dataset.q_data = None |
---|
334 | |
---|
335 | if dataset.data.ndim == 2: |
---|
336 | (n_rows, n_cols) = dataset.data.shape |
---|
337 | dataset.y_bins = dataset.qy_data[0::n_cols] |
---|
338 | dataset.x_bins = dataset.qx_data[:n_cols] |
---|
339 | dataset.data = dataset.data.flatten() |
---|
340 | |
---|
341 | final_dataset = combine_data_info_with_plottable(dataset, self.current_datainfo) |
---|
342 | self.output.append(final_dataset) |
---|
343 | |
---|
344 | for dataset in self.data1d: |
---|
345 | if dataset.x is not None: |
---|
346 | dataset.x = dataset.x.astype(np.float64) |
---|
347 | dataset.xmin = np.min(dataset.x) |
---|
348 | dataset.xmax = np.max(dataset.x) |
---|
349 | if dataset.y is not None: |
---|
350 | dataset.y = dataset.y.astype(np.float64) |
---|
351 | dataset.ymin = np.min(dataset.y) |
---|
352 | dataset.ymax = np.max(dataset.y) |
---|
353 | if dataset.dx is not None: |
---|
354 | dataset.dx = dataset.dx.astype(np.float64) |
---|
355 | if dataset.dxl is not None: |
---|
356 | dataset.dxl = dataset.dxl.astype(np.float64) |
---|
357 | if dataset.dxw is not None: |
---|
358 | dataset.dxw = dataset.dxw.astype(np.float64) |
---|
359 | if dataset.dy is not None: |
---|
360 | dataset.dy = dataset.dy.astype(np.float64) |
---|
361 | final_dataset = combine_data_info_with_plottable(dataset, self.current_datainfo) |
---|
362 | self.output.append(final_dataset) |
---|
363 | |
---|
364 | def add_data_set(self, key=""): |
---|
365 | """ |
---|
366 | Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a |
---|
367 | private method to generate a new data set. |
---|
368 | |
---|
369 | :param key: NeXus group name for current tree level |
---|
370 | """ |
---|
371 | |
---|
372 | if self.current_datainfo and self.current_dataset: |
---|
373 | self.final_data_cleanup() |
---|
374 | self.data1d = [] |
---|
375 | self.data2d = [] |
---|
376 | self.current_datainfo = DataInfo() |
---|
377 | |
---|
378 | def _initialize_new_data_set(self, parent_list = None): |
---|
379 | """ |
---|
380 | A private class method to generate a new 1D or 2D data object based on the type of data within the set. |
---|
381 | Outside methods should call add_data_set() to be sure any existing data is stored properly. |
---|
382 | |
---|
383 | :param parent_list: List of names of parent elements |
---|
384 | """ |
---|
385 | |
---|
386 | if parent_list is None: |
---|
387 | parent_list = [] |
---|
388 | if self._find_intermediate(parent_list, "Qx"): |
---|
389 | self.current_dataset = plottable_2D() |
---|
390 | else: |
---|
391 | x = np.array(0) |
---|
392 | y = np.array(0) |
---|
393 | self.current_dataset = plottable_1D(x, y) |
---|
394 | self.current_datainfo.filename = self.raw_data.filename |
---|
395 | |
---|
396 | def _find_intermediate(self, parent_list, basename=""): |
---|
397 | """ |
---|
398 | A private class used to find an entry by either using a direct key or knowing the approximate basename. |
---|
399 | |
---|
400 | :param parent_list: List of parents to the current level in the HDF5 file |
---|
401 | :param basename: Approximate name of an entry to search for |
---|
402 | :return: |
---|
403 | """ |
---|
404 | |
---|
405 | entry = False |
---|
406 | key_prog = re.compile(basename) |
---|
407 | top = self.raw_data |
---|
408 | for parent in parent_list: |
---|
409 | top = top.get(parent) |
---|
410 | for key in top.keys(): |
---|
411 | if (key_prog.match(key)): |
---|
412 | entry = True |
---|
413 | break |
---|
414 | return entry |
---|
415 | |
---|
416 | def _create_unique_key(self, dictionary, name, numb=0): |
---|
417 | """ |
---|
418 | Create a unique key value for any dictionary to prevent overwriting |
---|
419 | Recurses until a unique key value is found. |
---|
420 | |
---|
421 | :param dictionary: A dictionary with any number of entries |
---|
422 | :param name: The index of the item to be added to dictionary |
---|
423 | :param numb: The number to be appended to the name, starts at 0 |
---|
424 | :return: The new name for the dictionary entry |
---|
425 | """ |
---|
426 | if dictionary.get(name) is not None: |
---|
427 | numb += 1 |
---|
428 | name = name.split("_")[0] |
---|
429 | name += "_{0}".format(numb) |
---|
430 | name = self._create_unique_key(dictionary, name, numb) |
---|
431 | return name |
---|
432 | |
---|
433 | def _get_unit(self, value): |
---|
434 | """ |
---|
435 | Find the unit for a particular value within the h5py dictionary |
---|
436 | |
---|
437 | :param value: attribute dictionary for a particular value set |
---|
438 | :return: unit for the value passed to the method |
---|
439 | """ |
---|
440 | unit = value.attrs.get(u'units') |
---|
441 | if unit == None: |
---|
442 | unit = value.attrs.get(u'unit') |
---|
443 | ## Convert the unit formats |
---|
444 | if unit == "1/A": |
---|
445 | unit = "A^{-1}" |
---|
446 | elif unit == "1/cm": |
---|
447 | unit = "cm^{-1}" |
---|
448 | return unit |
---|