source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ ac370c5

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since ac370c5 was ac370c5, checked in by lewis, 6 years ago

Set data.x_bins and data.y_bins in CanSAS 2.0 reader (references #622)

  • Property mode set to 100644
File size: 20.0 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D, Data1D, Data2D, DataInfo, Process, Aperture
12from sas.sascalc.dataloader.data_info import Collimation, TransmissionSpectrum, Detector
13from sas.sascalc.dataloader.data_info import combine_data_info_with_plottable
14
15
16
17class Reader():
18    """
19    A class for reading in CanSAS v2.0 data files. The existing iteration opens Mantid generated HDF5 formatted files
20    with file extension .h5/.H5. Any number of data sets may be present within the file and any dimensionality of data
21    may be used. Currently 1D and 2D SAS data sets are supported, but future implementations will include 1D and 2D
22    SESANS data.
23
24    Any number of SASdata sets may be present in a SASentry and the data within can be either 1D I(Q) or 2D I(Qx, Qy).
25
26    :Dependencies:
27        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
28    """
29
30    ## CanSAS version
31    cansas_version = 2.0
32    ## Logged warnings or messages
33    logging = None
34    ## List of errors for the current data set
35    errors = None
36    ## Raw file contents to be processed
37    raw_data = None
38    ## Data info currently being read in
39    current_datainfo = None
40    ## SASdata set currently being read in
41    current_dataset = None
42    ## List of plottable1D objects that should be linked to the current_datainfo
43    data1d = None
44    ## List of plottable2D objects that should be linked to the current_datainfo
45    data2d = None
46    ## Data type name
47    type_name = "CanSAS 2.0"
48    ## Wildcards
49    type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"]
50    ## List of allowed extensions
51    ext = ['.h5', '.H5']
52    ## Flag to bypass extension check
53    allow_all = False
54    ## List of files to return
55    output = None
56
57    def read(self, filename):
58        """
59        This is the general read method that all SasView data_loaders must have.
60
61        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
62        :return: List of Data1D/2D objects and/or a list of errors.
63        """
64        ## Reinitialize the class when loading a new data file to reset all class variables
65        self.reset_class_variables()
66        ## Check that the file exists
67        if os.path.isfile(filename):
68            basename = os.path.basename(filename)
69            _, extension = os.path.splitext(basename)
70            # If the file type is not allowed, return empty list
71            if extension in self.ext or self.allow_all:
72                ## Load the data file
73                self.raw_data = h5py.File(filename, 'r')
74                ## Read in all child elements of top level SASroot
75                self.read_children(self.raw_data, [])
76                ## Add the last data set to the list of outputs
77                self.add_data_set()
78        ## Return data set(s)
79        return self.output
80
81    def reset_class_variables(self):
82        """
83        Create the reader object and define initial states for class variables
84        """
85        self.current_datainfo = None
86        self.current_dataset = None
87        self.data1d = []
88        self.data2d = []
89        self.raw_data = None
90        self.errors = set()
91        self.logging = []
92        self.output = []
93        self.parent_class = u''
94        self.detector = Detector()
95        self.collimation = Collimation()
96        self.aperture = Aperture()
97        self.process = Process()
98        self.trans_spectrum = TransmissionSpectrum()
99
100    def read_children(self, data, parent_list):
101        """
102        A recursive method for stepping through the hierarchical data file.
103
104        :param data: h5py Group object of any kind
105        :param parent: h5py Group parent name
106        """
107
108        ## Loop through each element of the parent and process accordingly
109        for key in data.keys():
110            ## Get all information for the current key
111            value = data.get(key)
112            if value.attrs.get(u'canSAS_class') is not None:
113                class_name = value.attrs.get(u'canSAS_class')
114            else:
115                class_name = value.attrs.get(u'NX_class')
116            if class_name is not None:
117                class_prog = re.compile(class_name)
118            else:
119                class_prog = re.compile(value.name)
120
121            if isinstance(value, h5py.Group):
122                self.parent_class = class_name
123                parent_list.append(key)
124                ## If this is a new sasentry, store the current data sets and create a fresh Data1D/2D object
125                if class_prog.match(u'SASentry'):
126                    self.add_data_set(key)
127                elif class_prog.match(u'SASdata'):
128                    self._initialize_new_data_set(parent_list)
129                ## Recursion step to access data within the group
130                self.read_children(value, parent_list)
131                self.add_intermediate()
132                parent_list.remove(key)
133
134            elif isinstance(value, h5py.Dataset):
135                ## If this is a dataset, store the data appropriately
136                data_set = data[key][:]
137                unit = self._get_unit(value)
138
139                ## I and Q Data
140                if key == u'I':
141                    if type(self.current_dataset) is plottable_2D:
142                        self.current_dataset.data = data_set
143                        self.current_dataset.zaxis("Intensity", unit)
144                    else:
145                        self.current_dataset.y = data_set.flatten()
146                        self.current_dataset.yaxis("Intensity", unit)
147                    continue
148                elif key == u'Idev':
149                    if type(self.current_dataset) is plottable_2D:
150                        self.current_dataset.err_data = data_set.flatten()
151                    else:
152                        self.current_dataset.dy = data_set.flatten()
153                    continue
154                elif key == u'Q':
155                    self.current_dataset.xaxis("Q", unit)
156                    if type(self.current_dataset) is plottable_2D:
157                        self.current_dataset.q = data_set.flatten()
158                    else:
159                        self.current_dataset.x = data_set.flatten()
160                    continue
161                elif key == u'Qy':
162                    self.current_dataset.yaxis("Q_y", unit)
163                    self.current_dataset.qy_data = data_set.flatten()
164                    continue
165                elif key == u'Qydev':
166                    self.current_dataset.dqy_data = data_set.flatten()
167                    continue
168                elif key == u'Qx':
169                    self.current_dataset.xaxis("Q_x", unit)
170                    self.current_dataset.qx_data = data_set.flatten()
171                    continue
172                elif key == u'Qxdev':
173                    self.current_dataset.dqx_data = data_set.flatten()
174                    continue
175                elif key == u'Mask':
176                    self.current_dataset.mask = data_set.flatten()
177                    continue
178
179                for data_point in data_set:
180                    ## Top Level Meta Data
181                    if key == u'definition':
182                        self.current_datainfo.meta_data['reader'] = data_point
183                    elif key == u'run':
184                        self.current_datainfo.run.append(data_point)
185                    elif key == u'title':
186                        self.current_datainfo.title = data_point
187                    elif key == u'SASnote':
188                        self.current_datainfo.notes.append(data_point)
189
190                    ## Sample Information
191                    elif key == u'Title' and self.parent_class == u'SASsample': # CanSAS 2.0 format
192                        self.current_datainfo.sample.name = data_point
193                    elif key == u'name' and self.parent_class == u'SASsample': # NXcanSAS format
194                        self.current_datainfo.sample.name = data_point
195                    elif key == u'thickness' and self.parent_class == u'SASsample':
196                        self.current_datainfo.sample.thickness = data_point
197                    elif key == u'temperature' and self.parent_class == u'SASsample':
198                        self.current_datainfo.sample.temperature = data_point
199
200                    ## Instrumental Information
201                    elif key == u'name' and self.parent_class == u'SASinstrument':
202                        self.current_datainfo.instrument = data_point
203                    elif key == u'name' and self.parent_class == u'SASdetector':
204                        self.detector.name = data_point
205                    elif key == u'SDD' and self.parent_class == u'SASdetector':
206                        self.detector.distance = float(data_point)
207                        self.detector.distance_unit = unit
208                    elif key == u'SSD' and self.parent_class == u'SAScollimation':
209                        self.collimation.length = data_point
210                        self.collimation.length_unit = unit
211                    elif key == u'name' and self.parent_class == u'SAScollimation':
212                        self.collimation.name = data_point
213
214                    ## Process Information
215                    elif key == u'name' and self.parent_class == u'SASprocess':
216                        self.process.name = data_point
217                    elif key == u'Title' and self.parent_class == u'SASprocess': # CanSAS 2.0 format
218                        self.process.name = data_point
219                    elif key == u'name' and self.parent_class == u'SASprocess': # NXcanSAS format
220                        self.process.name = data_point
221                    elif key == u'description' and self.parent_class == u'SASprocess':
222                        self.process.description = data_point
223                    elif key == u'date' and self.parent_class == u'SASprocess':
224                        self.process.date = data_point
225                    elif self.parent_class == u'SASprocess':
226                        self.process.notes.append(data_point)
227
228                    ## Transmission Spectrum
229                    elif key == u'T' and self.parent_class == u'SAStransmission_spectrum':
230                        self.trans_spectrum.transmission.append(data_point)
231                    elif key == u'Tdev' and self.parent_class == u'SAStransmission_spectrum':
232                        self.trans_spectrum.transmission_deviation.append(data_point)
233                    elif key == u'lambda' and self.parent_class == u'SAStransmission_spectrum':
234                        self.trans_spectrum.wavelength.append(data_point)
235
236                    ## Other Information
237                    elif key == u'wavelength' and self.parent_class == u'SASdata':
238                        self.current_datainfo.source.wavelength = data_point
239                        self.current_datainfo.source.wavelength.unit = unit
240                    elif key == u'radiation' and self.parent_class == u'SASsource':
241                        self.current_datainfo.source.radiation = data_point
242                    elif key == u'transmission' and self.parent_class == u'SASdata':
243                        self.current_datainfo.sample.transmission = data_point
244
245                    ## Everything else goes in meta_data
246                    else:
247                        new_key = self._create_unique_key(self.current_datainfo.meta_data, key)
248                        self.current_datainfo.meta_data[new_key] = data_point
249
250            else:
251                ## I don't know if this reachable code
252                self.errors.add("ShouldNeverHappenException")
253
254    def add_intermediate(self):
255        """
256        This method stores any intermediate objects within the final data set after fully reading the set.
257
258        :param parent: The NXclass name for the h5py Group object that just finished being processed
259        """
260
261        if self.parent_class == u'SASprocess':
262            self.current_datainfo.process.append(self.process)
263            self.process = Process()
264        elif self.parent_class == u'SASdetector':
265            self.current_datainfo.detector.append(self.detector)
266            self.detector = Detector()
267        elif self.parent_class == u'SAStransmission_spectrum':
268            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
269            self.trans_spectrum = TransmissionSpectrum()
270        elif self.parent_class == u'SAScollimation':
271            self.current_datainfo.collimation.append(self.collimation)
272            self.collimation = Collimation()
273        elif self.parent_class == u'SASaperture':
274            self.collimation.aperture.append(self.aperture)
275            self.aperture = Aperture()
276        elif self.parent_class == u'SASdata':
277            if type(self.current_dataset) is plottable_2D:
278                self.data2d.append(self.current_dataset)
279            elif type(self.current_dataset) is plottable_1D:
280                self.data1d.append(self.current_dataset)
281
282    def final_data_cleanup(self):
283        """
284        Does some final cleanup and formatting on self.current_datainfo and all data1D and data2D objects and then
285        combines the data and info into Data1D and Data2D objects
286        """
287
288        ## Type cast data arrays to float64
289        if len(self.current_datainfo.trans_spectrum) > 0:
290            spectrum_list = []
291            for spectrum in self.current_datainfo.trans_spectrum:
292                spectrum.transmission = np.delete(spectrum.transmission, [0])
293                spectrum.transmission = spectrum.transmission.astype(np.float64)
294                spectrum.transmission_deviation = np.delete(spectrum.transmission_deviation, [0])
295                spectrum.transmission_deviation = spectrum.transmission_deviation.astype(np.float64)
296                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
297                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
298                if len(spectrum.transmission) > 0:
299                    spectrum_list.append(spectrum)
300            self.current_datainfo.trans_spectrum = spectrum_list
301
302        ## Append errors to dataset and reset class errors
303        self.current_datainfo.errors = self.errors
304        self.errors.clear()
305
306        ## Combine all plottables with datainfo and append each to output
307        ## Type cast data arrays to float64 and find min/max as appropriate
308        for dataset in self.data2d:
309            dataset.data = dataset.data.astype(np.float64)
310            dataset.err_data = dataset.err_data.astype(np.float64)
311            if dataset.qx_data is not None:
312                dataset.xmin = np.min(dataset.qx_data)
313                dataset.xmax = np.max(dataset.qx_data)
314                dataset.qx_data = dataset.qx_data.astype(np.float64)
315            if dataset.dqx_data is not None:
316                dataset.dqx_data = dataset.dqx_data.astype(np.float64)
317            if dataset.qy_data is not None:
318                dataset.ymin = np.min(dataset.qy_data)
319                dataset.ymax = np.max(dataset.qy_data)
320                dataset.qy_data = dataset.qy_data.astype(np.float64)
321            if dataset.dqy_data is not None:
322                dataset.dqy_data = dataset.dqy_data.astype(np.float64)
323            if dataset.q_data is not None:
324                dataset.q_data = dataset.q_data.astype(np.float64)
325            zeros = np.ones(dataset.data.size, dtype=bool)
326            try:
327                for i in range (0, dataset.mask.size - 1):
328                    zeros[i] = dataset.mask[i]
329            except:
330                self.errors.add(sys.exc_value)
331            dataset.mask = zeros
332            ## Calculate the actual Q matrix
333            try:
334                if dataset.q_data.size <= 1:
335                    dataset.q_data = np.sqrt(dataset.qx_data * dataset.qx_data + dataset.qy_data * dataset.qy_data)
336            except:
337                dataset.q_data = None
338
339            if dataset.data.ndim == 2:
340                (n_rows, n_cols) = dataset.data.shape
341                dataset.y_bins = dataset.qy_data[0::n_rows]
342                dataset.x_bins = dataset.qx_data[:n_cols]
343                dataset.data = dataset.data.flatten()
344
345            final_dataset = combine_data_info_with_plottable(dataset, self.current_datainfo)
346            self.output.append(final_dataset)
347
348        for dataset in self.data1d:
349            if dataset.x is not None:
350                dataset.x = dataset.x.astype(np.float64)
351                dataset.xmin = np.min(dataset.x)
352                dataset.xmax = np.max(dataset.x)
353            if dataset.y is not None:
354                dataset.y = dataset.y.astype(np.float64)
355                dataset.ymin = np.min(dataset.y)
356                dataset.ymax = np.max(dataset.y)
357            if dataset.dx is not None:
358                dataset.dx = dataset.dx.astype(np.float64)
359            if dataset.dxl is not None:
360                dataset.dxl = dataset.dxl.astype(np.float64)
361            if dataset.dxw is not None:
362                dataset.dxw = dataset.dxw.astype(np.float64)
363            if dataset.dy is not None:
364                dataset.dy = dataset.dy.astype(np.float64)
365            final_dataset = combine_data_info_with_plottable(dataset, self.current_datainfo)
366            self.output.append(final_dataset)
367
368    def add_data_set(self, key=""):
369        """
370        Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a
371        private method to generate a new data set.
372
373        :param key: NeXus group name for current tree level
374        """
375
376        if self.current_datainfo and self.current_dataset:
377            self.final_data_cleanup()
378        self.data1d = []
379        self.data2d = []
380        self.current_datainfo = DataInfo()
381
382
383    def _initialize_new_data_set(self, parent_list = None):
384        """
385        A private class method to generate a new 1D or 2D data object based on the type of data within the set.
386        Outside methods should call add_data_set() to be sure any existing data is stored properly.
387
388        :param parent_list: List of names of parent elements
389        """
390
391        if parent_list is None:
392            parent_list = []
393        if self._find_intermediate(parent_list, "Qx"):
394            self.current_dataset = plottable_2D()
395        else:
396            x = np.array(0)
397            y = np.array(0)
398            self.current_dataset = plottable_1D(x, y)
399        self.current_datainfo.filename = self.raw_data.filename
400
401    def _find_intermediate(self, parent_list, basename=""):
402        """
403        A private class used to find an entry by either using a direct key or knowing the approximate basename.
404
405        :param parent_list: List of parents to the current level in the HDF5 file
406        :param basename: Approximate name of an entry to search for
407        :return:
408        """
409
410        entry = False
411        key_prog = re.compile(basename)
412        top = self.raw_data
413        for parent in parent_list:
414            top = top.get(parent)
415        for key in top.keys():
416            if (key_prog.match(key)):
417                entry = True
418                break
419        return entry
420
421    def _create_unique_key(self, dictionary, name, numb=0):
422        """
423        Create a unique key value for any dictionary to prevent overwriting
424        Recurses until a unique key value is found.
425
426        :param dictionary: A dictionary with any number of entries
427        :param name: The index of the item to be added to dictionary
428        :param numb: The number to be appended to the name, starts at 0
429        :return: The new name for the dictionary entry
430        """
431        if dictionary.get(name) is not None:
432            numb += 1
433            name = name.split("_")[0]
434            name += "_{0}".format(numb)
435            name = self._create_unique_key(dictionary, name, numb)
436        return name
437
438    def _get_unit(self, value):
439        """
440        Find the unit for a particular value within the h5py dictionary
441
442        :param value: attribute dictionary for a particular value set
443        :return: unit for the value passed to the method
444        """
445        unit = value.attrs.get(u'units')
446        if unit == None:
447            unit = value.attrs.get(u'unit')
448        ## Convert the unit formats
449        if unit == "1/A":
450            unit = "A^{-1}"
451        elif unit == "1/cm":
452            unit = "cm^{-1}"
453        return unit
Note: See TracBrowser for help on using the repository browser.