source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ eb98f24

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since eb98f24 was eb98f24, checked in by lewis, 8 years ago

Refactor write method to its own class

The cansas_reader_HDF5 read method is designed for reading CanSAS 2.0 HDF5
formatted files, but will also read NXcanSAS files. The write method was
following the NXcanSAS spec, so has been moved to its own class to avoid
confusion between then two (very simillar) formats.

  • Property mode set to 100644
File size: 20.8 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D, Data1D, Data2D, DataInfo, Process, Aperture
12from sas.sascalc.dataloader.data_info import Collimation, TransmissionSpectrum, Detector
13from sas.sascalc.dataloader.data_info import combine_data_info_with_plottable
14
15
16
17class Reader():
18    """
19    A class for reading in CanSAS v2.0 data files. The existing iteration opens Mantid generated HDF5 formatted files
20    with file extension .h5/.H5. Any number of data sets may be present within the file and any dimensionality of data
21    may be used. Currently 1D and 2D SAS data sets are supported, but future implementations will include 1D and 2D
22    SESANS data.
23
24    Any number of SASdata sets may be present in a SASentry and the data within can be either 1D I(Q) or 2D I(Qx, Qy).
25
26    :Dependencies:
27        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
28    """
29
30    ## CanSAS version
31    cansas_version = 2.0
32    ## Logged warnings or messages
33    logging = None
34    ## List of errors for the current data set
35    errors = None
36    ## Raw file contents to be processed
37    raw_data = None
38    ## Data info currently being read in
39    current_datainfo = None
40    ## SASdata set currently being read in
41    current_dataset = None
42    ## List of plottable1D objects that should be linked to the current_datainfo
43    data1d = None
44    ## List of plottable2D objects that should be linked to the current_datainfo
45    data2d = None
46    ## Data type name
47    type_name = "CanSAS 2.0"
48    ## Wildcards
49    type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"]
50    ## List of allowed extensions
51    ext = ['.h5', '.H5']
52    ## Flag to bypass extension check
53    allow_all = False
54    ## List of files to return
55    output = None
56
57    def read(self, filename):
58        """
59        This is the general read method that all SasView data_loaders must have.
60
61        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
62        :return: List of Data1D/2D objects and/or a list of errors.
63        """
64
65        ## Reinitialize the class when loading a new data file to reset all class variables
66        self.reset_class_variables()
67        ## Check that the file exists
68        if os.path.isfile(filename):
69            basename = os.path.basename(filename)
70            _, extension = os.path.splitext(basename)
71            # If the file type is not allowed, return empty list
72            if extension in self.ext or self.allow_all:
73                ## Load the data file
74                self.raw_data = h5py.File(filename, 'r')
75                ## Read in all child elements of top level SASroot
76                self.read_children(self.raw_data, [])
77                ## Add the last data set to the list of outputs
78                self.add_data_set()
79        ## Return data set(s)
80        return self.output
81
82    def reset_class_variables(self):
83        """
84        Create the reader object and define initial states for class variables
85        """
86        self.current_datainfo = None
87        self.current_dataset = None
88        self.data1d = []
89        self.data2d = []
90        self.raw_data = None
91        self.errors = set()
92        self.logging = []
93        self.output = []
94        self.parent_class = u''
95        self.detector = Detector()
96        self.collimation = Collimation()
97        self.aperture = Aperture()
98        self.process = Process()
99        self.trans_spectrum = TransmissionSpectrum()
100
101    def read_children(self, data, parent_list):
102        """
103        A recursive method for stepping through the hierarchical data file.
104
105        :param data: h5py Group object of any kind
106        :param parent: h5py Group parent name
107        """
108
109        ## Loop through each element of the parent and process accordingly
110        for key in data.keys():
111            ## Get all information for the current key
112            value = data.get(key)
113            if value.attrs.get(u'canSAS_class') is not None:
114                class_name = value.attrs.get(u'canSAS_class')
115            else:
116                class_name = value.attrs.get(u'NX_class')
117            if class_name is not None:
118                class_prog = re.compile(class_name)
119            else:
120                class_prog = re.compile(value.name)
121
122            if isinstance(value, h5py.Group):
123                self.parent_class = class_name
124                parent_list.append(key)
125                ## If this is a new sasentry, store the current data sets and create a fresh Data1D/2D object
126                if class_prog.match(u'SASentry'):
127                    self.add_data_set(key)
128                elif class_prog.match(u'SASdata'):
129                    self._initialize_new_data_set(parent_list)
130                ## Recursion step to access data within the group
131                self.read_children(value, parent_list)
132                self.add_intermediate()
133                parent_list.remove(key)
134
135            elif isinstance(value, h5py.Dataset):
136                ## If this is a dataset, store the data appropriately
137                data_set = data[key][:]
138
139                for data_point in data_set:
140                    ## Top Level Meta Data
141                    unit = self._get_unit(value)
142                    if key == u'definition':
143                        self.current_datainfo.meta_data['reader'] = data_point
144                    elif key == u'run':
145                        self.current_datainfo.run.append(data_point)
146                    elif key == u'title':
147                        self.current_datainfo.title = data_point
148                    elif key == u'SASnote':
149                        self.current_datainfo.notes.append(data_point)
150
151                    ## I and Q Data
152                    elif key == u'I':
153                        if type(self.current_dataset) is plottable_2D:
154                            self.current_dataset.data = np.append(self.current_dataset.data, data_point)
155                            self.current_dataset.zaxis("Intensity", unit)
156                        else:
157                            self.current_dataset.y = np.append(self.current_dataset.y, data_point)
158                            self.current_dataset.yaxis("Intensity", unit)
159                    elif key == u'Idev':
160                        if type(self.current_dataset) is plottable_2D:
161                            self.current_dataset.err_data = np.append(self.current_dataset.err_data, data_point)
162                        else:
163                            self.current_dataset.dy = np.append(self.current_dataset.dy, data_point)
164                    elif key == u'Q':
165                        self.current_dataset.xaxis("Q", unit)
166                        if type(self.current_dataset) is plottable_2D:
167                            self.current_dataset.q = np.append(self.current_dataset.q, data_point)
168                        else:
169                            self.current_dataset.x = np.append(self.current_dataset.x, data_point)
170                    elif key == u'Qy':
171                        self.current_dataset.yaxis("Q_y", unit)
172                        self.current_dataset.qy_data = np.append(self.current_dataset.qy_data, data_point)
173                    elif key == u'Qydev':
174                        self.current_dataset.dqy_data = np.append(self.current_dataset.dqy_data, data_point)
175                    elif key == u'Qx':
176                        self.current_dataset.xaxis("Q_x", unit)
177                        self.current_dataset.qx_data = np.append(self.current_dataset.qx_data, data_point)
178                    elif key == u'Qxdev':
179                        self.current_dataset.dqx_data = np.append(self.current_dataset.dqx_data, data_point)
180                    elif key == u'Mask':
181                        self.current_dataset.mask = np.append(self.current_dataset.mask, data_point)
182
183                    ## Sample Information
184                    elif key == u'Title' and self.parent_class == u'SASsample': # CanSAS 2.0 format
185                        self.current_datainfo.sample.name = data_point
186                    elif key == u'name' and self.parent_class == u'SASsample': # NXcanSAS format
187                        self.current_datainfo.sample.name = data_point
188                    elif key == u'thickness' and self.parent_class == u'SASsample':
189                        self.current_datainfo.sample.thickness = data_point
190                    elif key == u'temperature' and self.parent_class == u'SASsample':
191                        self.current_datainfo.sample.temperature = data_point
192
193                    ## Instrumental Information
194                    elif key == u'name' and self.parent_class == u'SASinstrument':
195                        self.current_datainfo.instrument = data_point
196                    elif key == u'name' and self.parent_class == u'SASdetector':
197                        self.detector.name = data_point
198                    elif key == u'SDD' and self.parent_class == u'SASdetector':
199                        self.detector.distance = float(data_point)
200                        self.detector.distance_unit = unit
201                    elif key == u'SSD' and self.parent_class == u'SAScollimation':
202                        self.collimation.length = data_point
203                        self.collimation.length_unit = unit
204                    elif key == u'name' and self.parent_class == u'SAScollimation':
205                        self.collimation.name = data_point
206
207                    ## Process Information
208                    elif key == u'name' and self.parent_class == u'SASprocess':
209                        self.process.name = data_point
210                    elif key == u'Title' and self.parent_class == u'SASprocess': # CanSAS 2.0 format
211                        self.process.name = data_point
212                    elif key == u'name' and self.parent_class == u'SASprocess': # NXcanSAS format
213                        self.process.name = data_point
214                    elif key == u'description' and self.parent_class == u'SASprocess':
215                        self.process.description = data_point
216                    elif key == u'date' and self.parent_class == u'SASprocess':
217                        self.process.date = data_point
218                    elif self.parent_class == u'SASprocess':
219                        self.process.notes.append(data_point)
220
221                    ## Transmission Spectrum
222                    elif key == u'T' and self.parent_class == u'SAStransmission_spectrum':
223                        self.trans_spectrum.transmission.append(data_point)
224                    elif key == u'Tdev' and self.parent_class == u'SAStransmission_spectrum':
225                        self.trans_spectrum.transmission_deviation.append(data_point)
226                    elif key == u'lambda' and self.parent_class == u'SAStransmission_spectrum':
227                        self.trans_spectrum.wavelength.append(data_point)
228
229                    ## Other Information
230                    elif key == u'wavelength' and self.parent_class == u'SASdata':
231                        self.current_datainfo.source.wavelength = data_point
232                        self.current_datainfo.source.wavelength.unit = unit
233                    elif key == u'radiation' and self.parent_class == u'SASsource':
234                        self.current_datainfo.source.radiation = data_point
235                    elif key == u'transmission' and self.parent_class == u'SASdata':
236                        self.current_datainfo.sample.transmission = data_point
237
238                    ## Everything else goes in meta_data
239                    else:
240                        new_key = self._create_unique_key(self.current_datainfo.meta_data, key)
241                        self.current_datainfo.meta_data[new_key] = data_point
242
243            else:
244                ## I don't know if this reachable code
245                self.errors.add("ShouldNeverHappenException")
246
247    def add_intermediate(self):
248        """
249        This method stores any intermediate objects within the final data set after fully reading the set.
250
251        :param parent: The NXclass name for the h5py Group object that just finished being processed
252        """
253
254        if self.parent_class == u'SASprocess':
255            self.current_datainfo.process.append(self.process)
256            self.process = Process()
257        elif self.parent_class == u'SASdetector':
258            self.current_datainfo.detector.append(self.detector)
259            self.detector = Detector()
260        elif self.parent_class == u'SAStransmission_spectrum':
261            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
262            self.trans_spectrum = TransmissionSpectrum()
263        elif self.parent_class == u'SAScollimation':
264            self.current_datainfo.collimation.append(self.collimation)
265            self.collimation = Collimation()
266        elif self.parent_class == u'SASaperture':
267            self.collimation.aperture.append(self.aperture)
268            self.aperture = Aperture()
269        elif self.parent_class == u'SASdata':
270            if type(self.current_dataset) is plottable_2D:
271                self.data2d.append(self.current_dataset)
272            elif type(self.current_dataset) is plottable_1D:
273                self.data1d.append(self.current_dataset)
274
275    def final_data_cleanup(self):
276        """
277        Does some final cleanup and formatting on self.current_datainfo and all data1D and data2D objects and then
278        combines the data and info into Data1D and Data2D objects
279        """
280
281        ## Type cast data arrays to float64
282        if len(self.current_datainfo.trans_spectrum) > 0:
283            spectrum_list = []
284            for spectrum in self.current_datainfo.trans_spectrum:
285                spectrum.transmission = np.delete(spectrum.transmission, [0])
286                spectrum.transmission = spectrum.transmission.astype(np.float64)
287                spectrum.transmission_deviation = np.delete(spectrum.transmission_deviation, [0])
288                spectrum.transmission_deviation = spectrum.transmission_deviation.astype(np.float64)
289                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
290                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
291                if len(spectrum.transmission) > 0:
292                    spectrum_list.append(spectrum)
293            self.current_datainfo.trans_spectrum = spectrum_list
294
295        ## Append errors to dataset and reset class errors
296        self.current_datainfo.errors = self.errors
297        self.errors.clear()
298
299        ## Combine all plottables with datainfo and append each to output
300        ## Type cast data arrays to float64 and find min/max as appropriate
301        for dataset in self.data2d:
302            dataset.data = np.delete(dataset.data, [0])
303            dataset.data = dataset.data.astype(np.float64)
304            dataset.err_data = np.delete(dataset.err_data, [0])
305            dataset.err_data = dataset.err_data.astype(np.float64)
306            dataset.mask = np.delete(dataset.mask, [0])
307            if dataset.qx_data is not None:
308                dataset.qx_data = np.delete(dataset.qx_data, [0])
309                dataset.xmin = np.min(dataset.qx_data)
310                dataset.xmax = np.max(dataset.qx_data)
311                dataset.qx_data = dataset.qx_data.astype(np.float64)
312            if dataset.dqx_data is not None:
313                dataset.dqx_data = np.delete(dataset.dqx_data, [0])
314                dataset.dqx_data = dataset.dqx_data.astype(np.float64)
315            if dataset.qy_data is not None:
316                dataset.qy_data = np.delete(dataset.qy_data, [0])
317                dataset.ymin = np.min(dataset.qy_data)
318                dataset.ymax = np.max(dataset.qy_data)
319                dataset.qy_data = dataset.qy_data.astype(np.float64)
320            if dataset.dqy_data is not None:
321                dataset.dqy_data = np.delete(dataset.dqy_data, [0])
322                dataset.dqy_data = dataset.dqy_data.astype(np.float64)
323            if dataset.q_data is not None:
324                dataset.q_data = np.delete(dataset.q_data, [0])
325                dataset.q_data = dataset.q_data.astype(np.float64)
326            zeros = np.ones(dataset.data.size, dtype=bool)
327            try:
328                for i in range (0, dataset.mask.size - 1):
329                    zeros[i] = dataset.mask[i]
330            except:
331                self.errors.add(sys.exc_value)
332            dataset.mask = zeros
333            ## Calculate the actual Q matrix
334            try:
335                if dataset.q_data.size <= 1:
336                    dataset.q_data = np.sqrt(dataset.qx_data * dataset.qx_data + dataset.qy_data * dataset.qy_data)
337            except:
338                dataset.q_data = None
339            final_dataset = combine_data_info_with_plottable(dataset, self.current_datainfo)
340            self.output.append(final_dataset)
341
342        for dataset in self.data1d:
343            if dataset.x is not None:
344                dataset.x = np.delete(dataset.x, [0])
345                dataset.x = dataset.x.astype(np.float64)
346                dataset.xmin = np.min(dataset.x)
347                dataset.xmax = np.max(dataset.x)
348            if dataset.y is not None:
349                dataset.y = np.delete(dataset.y, [0])
350                dataset.y = dataset.y.astype(np.float64)
351                dataset.ymin = np.min(dataset.y)
352                dataset.ymax = np.max(dataset.y)
353            if dataset.dx is not None:
354                dataset.dx = np.delete(dataset.dx, [0])
355                dataset.dx = dataset.dx.astype(np.float64)
356            if dataset.dxl is not None:
357                dataset.dxl = np.delete(dataset.dxl, [0])
358                dataset.dxl = dataset.dxl.astype(np.float64)
359            if dataset.dxw is not None:
360                dataset.dxw = np.delete(dataset.dxw, [0])
361                dataset.dxw = dataset.dxw.astype(np.float64)
362            if dataset.dy is not None:
363                dataset.dy = np.delete(dataset.dy, [0])
364                dataset.dy = dataset.dy.astype(np.float64)
365            final_dataset = combine_data_info_with_plottable(dataset, self.current_datainfo)
366            self.output.append(final_dataset)
367
368    def add_data_set(self, key=""):
369        """
370        Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a
371        private method to generate a new data set.
372
373        :param key: NeXus group name for current tree level
374        """
375
376        if self.current_datainfo and self.current_dataset:
377            self.final_data_cleanup()
378        self.data1d = []
379        self.data2d = []
380        self.current_datainfo = DataInfo()
381
382
383    def _initialize_new_data_set(self, parent_list = None):
384        """
385        A private class method to generate a new 1D or 2D data object based on the type of data within the set.
386        Outside methods should call add_data_set() to be sure any existing data is stored properly.
387
388        :param parent_list: List of names of parent elements
389        """
390
391        if parent_list is None:
392            parent_list = []
393        if self._find_intermediate(parent_list, "Qx"):
394            self.current_dataset = plottable_2D()
395        else:
396            x = np.array(0)
397            y = np.array(0)
398            self.current_dataset = plottable_1D(x, y)
399        self.current_datainfo.filename = self.raw_data.filename
400
401    def _find_intermediate(self, parent_list, basename=""):
402        """
403        A private class used to find an entry by either using a direct key or knowing the approximate basename.
404
405        :param parent_list: List of parents to the current level in the HDF5 file
406        :param basename: Approximate name of an entry to search for
407        :return:
408        """
409
410        entry = False
411        key_prog = re.compile(basename)
412        top = self.raw_data
413        for parent in parent_list:
414            top = top.get(parent)
415        for key in top.keys():
416            if (key_prog.match(key)):
417                entry = True
418                break
419        return entry
420
421    def _create_unique_key(self, dictionary, name, numb=0):
422        """
423        Create a unique key value for any dictionary to prevent overwriting
424        Recurses until a unique key value is found.
425
426        :param dictionary: A dictionary with any number of entries
427        :param name: The index of the item to be added to dictionary
428        :param numb: The number to be appended to the name, starts at 0
429        :return: The new name for the dictionary entry
430        """
431        if dictionary.get(name) is not None:
432            numb += 1
433            name = name.split("_")[0]
434            name += "_{0}".format(numb)
435            name = self._create_unique_key(dictionary, name, numb)
436        return name
437
438    def _get_unit(self, value):
439        """
440        Find the unit for a particular value within the h5py dictionary
441
442        :param value: attribute dictionary for a particular value set
443        :return: unit for the value passed to the method
444        """
445        unit = value.attrs.get(u'units')
446        if unit == None:
447            unit = value.attrs.get(u'unit')
448        ## Convert the unit formats
449        if unit == "1/A":
450            unit = "A^{-1}"
451        elif unit == "1/cm":
452            unit = "cm^{-1}"
453        return unit
Note: See TracBrowser for help on using the repository browser.