source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ 90d389d

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalc
Last change on this file since 90d389d was 132db16, checked in by Piotr Rozyczko <rozyczko@…>, 8 years ago

Refactored Cansas HDF5 reader to allow multiple instances of SASdata of any dimensionality in a single SASentry and multiple instances of SASentry in a single file. Added unit tests to test all types of data (1 entry | 1 data, 1 entry | many data, many entry | 1 data each, many entr |, many data).

  • Property mode set to 100644
File size: 20.5 KB
RevLine 
[68aa210]1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
[132db16]11from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D, Data1D, Data2D, DataInfo, Process, Aperture
12from sas.sascalc.dataloader.data_info import Collimation, TransmissionSpectrum, Detector
13from sas.sascalc.dataloader.data_info import combine_data_info_with_plottable
14
[68aa210]15
16
17class Reader():
18    """
[ad52d31]19    A class for reading in CanSAS v2.0 data files. The existing iteration opens Mantid generated HDF5 formatted files
20    with file extension .h5/.H5. Any number of data sets may be present within the file and any dimensionality of data
21    may be used. Currently 1D and 2D SAS data sets are supported, but future implementations will include 1D and 2D
[132db16]22    SESANS data.
23
24    Any number of SASdata sets may be present in a SASentry and the data within can be either 1D I(Q) or 2D I(Qx, Qy).
[68aa210]25
26    :Dependencies:
[132db16]27        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
[68aa210]28    """
29
30    ## CanSAS version
31    cansas_version = 2.0
32    ## Logged warnings or messages
33    logging = None
34    ## List of errors for the current data set
35    errors = None
36    ## Raw file contents to be processed
37    raw_data = None
[132db16]38    ## Data info currently being read in
39    current_datainfo = None
40    ## SASdata set currently being read in
[68aa210]41    current_dataset = None
[132db16]42    ## List of plottable1D objects that should be linked to the current_datainfo
43    data1d = None
44    ## List of plottable2D objects that should be linked to the current_datainfo
45    data2d = None
[68aa210]46    ## Data type name
[ad52d31]47    type_name = "CanSAS 2.0"
[68aa210]48    ## Wildcards
[ad52d31]49    type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"]
[68aa210]50    ## List of allowed extensions
51    ext = ['.h5', '.H5']
52    ## Flag to bypass extension check
53    allow_all = False
54    ## List of files to return
55    output = None
56
57    def read(self, filename):
58        """
[ad52d31]59        This is the general read method that all SasView data_loaders must have.
[68aa210]60
61        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
[132db16]62        :return: List of Data1D/2D objects and/or a list of errors.
[68aa210]63        """
64
[ad52d31]65        ## Reinitialize the class when loading a new data file to reset all class variables
[132db16]66        self.reset_class_variables()
[68aa210]67        ## Check that the file exists
68        if os.path.isfile(filename):
69            basename = os.path.basename(filename)
70            _, extension = os.path.splitext(basename)
71            # If the file type is not allowed, return empty list
72            if extension in self.ext or self.allow_all:
73                ## Load the data file
74                self.raw_data = h5py.File(filename, 'r')
75                ## Read in all child elements of top level SASroot
[132db16]76                self.read_children(self.raw_data, [])
[ad52d31]77                ## Add the last data set to the list of outputs
[68aa210]78                self.add_data_set()
79        ## Return data set(s)
80        return self.output
81
[132db16]82    def reset_class_variables(self):
83        """
84        Create the reader object and define initial states for class variables
85        """
86        self.current_datainfo = None
87        self.current_dataset = None
88        self.data1d = []
89        self.data2d = []
90        self.raw_data = None
91        self.errors = set()
92        self.logging = []
93        self.output = []
94        self.parent_class = u''
95        self.detector = Detector()
96        self.collimation = Collimation()
97        self.aperture = Aperture()
98        self.process = Process()
99        self.trans_spectrum = TransmissionSpectrum()
100
101    def read_children(self, data, parent_list):
[68aa210]102        """
[ad52d31]103        A recursive method for stepping through the hierarchical data file.
[68aa210]104
105        :param data: h5py Group object of any kind
106        :param parent: h5py Group parent name
107        """
108
109        ## Loop through each element of the parent and process accordingly
110        for key in data.keys():
111            ## Get all information for the current key
112            value = data.get(key)
[d398285]113            if value.attrs.get(u'canSAS_class') is not None:
114                class_name = value.attrs.get(u'canSAS_class')
115            else:
116                class_name = value.attrs.get(u'NX_class')
[68aa210]117            if class_name is not None:
118                class_prog = re.compile(class_name)
119            else:
120                class_prog = re.compile(value.name)
121
122            if isinstance(value, h5py.Group):
[132db16]123                self.parent_class = class_name
124                parent_list.append(key)
125                ## If this is a new sasentry, store the current data sets and create a fresh Data1D/2D object
[68aa210]126                if class_prog.match(u'SASentry'):
127                    self.add_data_set(key)
[132db16]128                elif class_prog.match(u'SASdata'):
129                    self._initialize_new_data_set(parent_list)
[ad52d31]130                ## Recursion step to access data within the group
[132db16]131                self.read_children(value, parent_list)
132                self.add_intermediate()
133                parent_list.remove(key)
[68aa210]134
135            elif isinstance(value, h5py.Dataset):
136                ## If this is a dataset, store the data appropriately
137                data_set = data[key][:]
138
139                for data_point in data_set:
140                    ## Top Level Meta Data
[d398285]141                    unit = self._get_unit(value)
[68aa210]142                    if key == u'definition':
[132db16]143                        self.current_datainfo.meta_data['reader'] = data_point
[68aa210]144                    elif key == u'run':
[132db16]145                        self.current_datainfo.run.append(data_point)
[68aa210]146                    elif key == u'title':
[132db16]147                        self.current_datainfo.title = data_point
[68aa210]148                    elif key == u'SASnote':
[132db16]149                        self.current_datainfo.notes.append(data_point)
[68aa210]150
151                    ## I and Q Data
152                    elif key == u'I':
[132db16]153                        if type(self.current_dataset) is plottable_2D:
[68aa210]154                            self.current_dataset.data = np.append(self.current_dataset.data, data_point)
[d398285]155                            self.current_dataset.zaxis("Intensity", unit)
[68aa210]156                        else:
157                            self.current_dataset.y = np.append(self.current_dataset.y, data_point)
[d398285]158                            self.current_dataset.yaxis("Intensity", unit)
[68aa210]159                    elif key == u'Idev':
[132db16]160                        if type(self.current_dataset) is plottable_2D:
[68aa210]161                            self.current_dataset.err_data = np.append(self.current_dataset.err_data, data_point)
162                        else:
163                            self.current_dataset.dy = np.append(self.current_dataset.dy, data_point)
164                    elif key == u'Q':
[d398285]165                        self.current_dataset.xaxis("Q", unit)
[132db16]166                        if type(self.current_dataset) is plottable_2D:
[68aa210]167                            self.current_dataset.q = np.append(self.current_dataset.q, data_point)
168                        else:
169                            self.current_dataset.x = np.append(self.current_dataset.x, data_point)
170                    elif key == u'Qy':
[d398285]171                        self.current_dataset.yaxis("Q_y", unit)
[68aa210]172                        self.current_dataset.qy_data = np.append(self.current_dataset.qy_data, data_point)
173                    elif key == u'Qydev':
174                        self.current_dataset.dqy_data = np.append(self.current_dataset.dqy_data, data_point)
175                    elif key == u'Qx':
[d398285]176                        self.current_dataset.xaxis("Q_x", unit)
[68aa210]177                        self.current_dataset.qx_data = np.append(self.current_dataset.qx_data, data_point)
178                    elif key == u'Qxdev':
179                        self.current_dataset.dqx_data = np.append(self.current_dataset.dqx_data, data_point)
180                    elif key == u'Mask':
181                        self.current_dataset.mask = np.append(self.current_dataset.mask, data_point)
182
183                    ## Sample Information
[132db16]184                    elif key == u'Title' and self.parent_class == u'SASsample':
185                        self.current_datainfo.sample.name = data_point
186                    elif key == u'thickness' and self.parent_class == u'SASsample':
187                        self.current_datainfo.sample.thickness = data_point
188                    elif key == u'temperature' and self.parent_class == u'SASsample':
189                        self.current_datainfo.sample.temperature = data_point
[68aa210]190
[ad52d31]191                    ## Instrumental Information
[132db16]192                    elif key == u'name' and self.parent_class == u'SASinstrument':
193                        self.current_datainfo.instrument = data_point
194                    elif key == u'name' and self.parent_class == u'SASdetector':
[ad52d31]195                        self.detector.name = data_point
[132db16]196                    elif key == u'SDD' and self.parent_class == u'SASdetector':
197                        self.detector.distance = float(data_point)
[d398285]198                        self.detector.distance_unit = unit
[132db16]199                    elif key == u'SSD' and self.parent_class == u'SAScollimation':
[ad52d31]200                        self.collimation.length = data_point
[d398285]201                        self.collimation.length_unit = unit
[132db16]202                    elif key == u'name' and self.parent_class == u'SAScollimation':
[ad52d31]203                        self.collimation.name = data_point
204
[68aa210]205                    ## Process Information
[132db16]206                    elif key == u'name' and self.parent_class == u'SASprocess':
[68aa210]207                        self.process.name = data_point
[132db16]208                    elif key == u'Title' and self.parent_class == u'SASprocess':
[68aa210]209                        self.process.name = data_point
[132db16]210                    elif key == u'description' and self.parent_class == u'SASprocess':
[68aa210]211                        self.process.description = data_point
[132db16]212                    elif key == u'date' and self.parent_class == u'SASprocess':
[68aa210]213                        self.process.date = data_point
[132db16]214                    elif self.parent_class == u'SASprocess':
[ad52d31]215                        self.process.notes.append(data_point)
216
217                    ## Transmission Spectrum
[132db16]218                    elif key == u'T' and self.parent_class == u'SAStransmission_spectrum':
[ad52d31]219                        self.trans_spectrum.transmission.append(data_point)
[132db16]220                    elif key == u'Tdev' and self.parent_class == u'SAStransmission_spectrum':
[ad52d31]221                        self.trans_spectrum.transmission_deviation.append(data_point)
[132db16]222                    elif key == u'lambda' and self.parent_class == u'SAStransmission_spectrum':
[ad52d31]223                        self.trans_spectrum.wavelength.append(data_point)
224
225                    ## Other Information
[132db16]226                    elif key == u'wavelength' and self.parent_class == u'SASdata':
227                        self.current_datainfo.source.wavelength = data_point
228                        self.current_datainfo.source.wavelength.unit = unit
229                    elif key == u'radiation' and self.parent_class == u'SASsource':
230                        self.current_datainfo.source.radiation = data_point
231                    elif key == u'transmission' and self.parent_class == u'SASdata':
232                        self.current_datainfo.sample.transmission = data_point
[68aa210]233
234                    ## Everything else goes in meta_data
235                    else:
[132db16]236                        new_key = self._create_unique_key(self.current_datainfo.meta_data, key)
237                        self.current_datainfo.meta_data[new_key] = data_point
[68aa210]238
239            else:
240                ## I don't know if this reachable code
241                self.errors.add("ShouldNeverHappenException")
242
[132db16]243    def add_intermediate(self):
[ad52d31]244        """
245        This method stores any intermediate objects within the final data set after fully reading the set.
246
247        :param parent: The NXclass name for the h5py Group object that just finished being processed
248        """
249
[132db16]250        if self.parent_class == u'SASprocess':
251            self.current_datainfo.process.append(self.process)
[ad52d31]252            self.process = Process()
[132db16]253        elif self.parent_class == u'SASdetector':
254            self.current_datainfo.detector.append(self.detector)
[ad52d31]255            self.detector = Detector()
[132db16]256        elif self.parent_class == u'SAStransmission_spectrum':
257            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
[ad52d31]258            self.trans_spectrum = TransmissionSpectrum()
[132db16]259        elif self.parent_class == u'SAScollimation':
260            self.current_datainfo.collimation.append(self.collimation)
[ad52d31]261            self.collimation = Collimation()
[132db16]262        elif self.parent_class == u'SASaperture':
[ad52d31]263            self.collimation.aperture.append(self.aperture)
264            self.aperture = Aperture()
[132db16]265        elif self.parent_class == u'SASdata':
266            if type(self.current_dataset) is plottable_2D:
267                self.data2d.append(self.current_dataset)
268            elif type(self.current_dataset) is plottable_1D:
269                self.data1d.append(self.current_dataset)
[68aa210]270
271    def final_data_cleanup(self):
272        """
[132db16]273        Does some final cleanup and formatting on self.current_datainfo and all data1D and data2D objects and then
274        combines the data and info into Data1D and Data2D objects
[68aa210]275        """
276
[132db16]277        ## Type cast data arrays to float64
278        if len(self.current_datainfo.trans_spectrum) > 0:
[ad52d31]279            spectrum_list = []
[132db16]280            for spectrum in self.current_datainfo.trans_spectrum:
[ad52d31]281                spectrum.transmission = np.delete(spectrum.transmission, [0])
282                spectrum.transmission = spectrum.transmission.astype(np.float64)
283                spectrum.transmission_deviation = np.delete(spectrum.transmission_deviation, [0])
284                spectrum.transmission_deviation = spectrum.transmission_deviation.astype(np.float64)
285                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
286                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
[132db16]287                if len(spectrum.transmission) > 0:
288                    spectrum_list.append(spectrum)
289            self.current_datainfo.trans_spectrum = spectrum_list
[68aa210]290
291        ## Append errors to dataset and reset class errors
[132db16]292        self.current_datainfo.errors = self.errors
[68aa210]293        self.errors.clear()
294
[132db16]295        ## Combine all plottables with datainfo and append each to output
296        ## Type cast data arrays to float64 and find min/max as appropriate
297        for dataset in self.data2d:
298            dataset.data = np.delete(dataset.data, [0])
299            dataset.data = dataset.data.astype(np.float64)
300            dataset.err_data = np.delete(dataset.err_data, [0])
301            dataset.err_data = dataset.err_data.astype(np.float64)
302            dataset.mask = np.delete(dataset.mask, [0])
303            if dataset.qx_data is not None:
304                dataset.qx_data = np.delete(dataset.qx_data, [0])
305                dataset.xmin = np.min(dataset.qx_data)
306                dataset.xmax = np.max(dataset.qx_data)
307                dataset.qx_data = dataset.qx_data.astype(np.float64)
308            if dataset.dqx_data is not None:
309                dataset.dqx_data = np.delete(dataset.dqx_data, [0])
310                dataset.dqx_data = dataset.dqx_data.astype(np.float64)
311            if dataset.qy_data is not None:
312                dataset.qy_data = np.delete(dataset.qy_data, [0])
313                dataset.ymin = np.min(dataset.qy_data)
314                dataset.ymax = np.max(dataset.qy_data)
315                dataset.qy_data = dataset.qy_data.astype(np.float64)
316            if dataset.dqy_data is not None:
317                dataset.dqy_data = np.delete(dataset.dqy_data, [0])
318                dataset.dqy_data = dataset.dqy_data.astype(np.float64)
319            if dataset.q_data is not None:
320                dataset.q_data = np.delete(dataset.q_data, [0])
321                dataset.q_data = dataset.q_data.astype(np.float64)
322            zeros = np.ones(dataset.data.size, dtype=bool)
323            try:
324                for i in range (0, dataset.mask.size - 1):
325                    zeros[i] = dataset.mask[i]
326            except:
327                self.errors.add(sys.exc_value)
328            dataset.mask = zeros
329            ## Calculate the actual Q matrix
330            try:
331                if dataset.q_data.size <= 1:
332                    dataset.q_data = np.sqrt(dataset.qx_data * dataset.qx_data + dataset.qy_data * dataset.qy_data)
333            except:
334                dataset.q_data = None
335            final_dataset = combine_data_info_with_plottable(dataset, self.current_datainfo)
336            self.output.append(final_dataset)
337
338        for dataset in self.data1d:
339            if dataset.x is not None:
340                dataset.x = np.delete(dataset.x, [0])
341                dataset.x = dataset.x.astype(np.float64)
342                dataset.xmin = np.min(dataset.x)
343                dataset.xmax = np.max(dataset.x)
344            if dataset.y is not None:
345                dataset.y = np.delete(dataset.y, [0])
346                dataset.y = dataset.y.astype(np.float64)
347                dataset.ymin = np.min(dataset.y)
348                dataset.ymax = np.max(dataset.y)
349            if dataset.dx is not None:
350                dataset.dx = np.delete(dataset.dx, [0])
351                dataset.dx = dataset.dx.astype(np.float64)
352            if dataset.dxl is not None:
353                dataset.dxl = np.delete(dataset.dxl, [0])
354                dataset.dxl = dataset.dxl.astype(np.float64)
355            if dataset.dxw is not None:
356                dataset.dxw = np.delete(dataset.dxw, [0])
357                dataset.dxw = dataset.dxw.astype(np.float64)
358            if dataset.dy is not None:
359                dataset.dy = np.delete(dataset.dy, [0])
360                dataset.dy = dataset.dy.astype(np.float64)
361            final_dataset = combine_data_info_with_plottable(dataset, self.current_datainfo)
362            self.output.append(final_dataset)
363
[68aa210]364    def add_data_set(self, key=""):
365        """
366        Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a
367        private method to generate a new data set.
368
369        :param key: NeXus group name for current tree level
370        """
[132db16]371
372        if self.current_datainfo and self.current_dataset:
[68aa210]373            self.final_data_cleanup()
[132db16]374        self.data1d = []
375        self.data2d = []
376        self.current_datainfo = DataInfo()
[68aa210]377
[132db16]378    def _initialize_new_data_set(self, parent_list = None):
[68aa210]379        """
380        A private class method to generate a new 1D or 2D data object based on the type of data within the set.
381        Outside methods should call add_data_set() to be sure any existing data is stored properly.
382
[132db16]383        :param parent_list: List of names of parent elements
[68aa210]384        """
[132db16]385
386        if parent_list is None:
387            parent_list = []
388        if self._find_intermediate(parent_list, "Qx"):
389            self.current_dataset = plottable_2D()
[68aa210]390        else:
391            x = np.array(0)
392            y = np.array(0)
[132db16]393            self.current_dataset = plottable_1D(x, y)
394        self.current_datainfo.filename = self.raw_data.filename
[68aa210]395
[132db16]396    def _find_intermediate(self, parent_list, basename=""):
[ad52d31]397        """
398        A private class used to find an entry by either using a direct key or knowing the approximate basename.
399
[132db16]400        :param parent_list: List of parents to the current level in the HDF5 file
401        :param basename: Approximate name of an entry to search for
[ad52d31]402        :return:
403        """
[132db16]404
405        entry = False
406        key_prog = re.compile(basename)
407        top = self.raw_data
408        for parent in parent_list:
409            top = top.get(parent)
410        for key in top.keys():
411            if (key_prog.match(key)):
412                entry = True
413                break
[ad52d31]414        return entry
415
[68aa210]416    def _create_unique_key(self, dictionary, name, numb=0):
417        """
418        Create a unique key value for any dictionary to prevent overwriting
419        Recurses until a unique key value is found.
420
421        :param dictionary: A dictionary with any number of entries
422        :param name: The index of the item to be added to dictionary
423        :param numb: The number to be appended to the name, starts at 0
[132db16]424        :return: The new name for the dictionary entry
[68aa210]425        """
426        if dictionary.get(name) is not None:
427            numb += 1
428            name = name.split("_")[0]
429            name += "_{0}".format(numb)
430            name = self._create_unique_key(dictionary, name, numb)
[d398285]431        return name
432
433    def _get_unit(self, value):
434        """
435        Find the unit for a particular value within the h5py dictionary
436
437        :param value: attribute dictionary for a particular value set
[132db16]438        :return: unit for the value passed to the method
[d398285]439        """
440        unit = value.attrs.get(u'units')
441        if unit == None:
442            unit = value.attrs.get(u'unit')
443        ## Convert the unit formats
444        if unit == "1/A":
445            unit = "A^{-1}"
446        elif unit == "1/cm":
447            unit = "cm^{-1}"
448        return unit
Note: See TracBrowser for help on using the repository browser.