source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ 3bab401

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since 3bab401 was 3bab401, checked in by krzywon, 6 years ago

Create a universal unit converter for all data loaders. refs #1111

  • Property mode set to 100644
File size: 25.9 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from ..data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from ..data_info import combine_data_info_with_plottable
15from ..loader_exceptions import FileContentsException, DefaultReaderException
16from ..file_reader_base_class import FileReader, decode
17
18def h5attr(node, key, default=None):
19    return decode(node.attrs.get(key, default))
20
21class Reader(FileReader):
22    """
23    A class for reading in CanSAS v2.0 data files. The existing iteration opens
24    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
25    number of data sets may be present within the file and any dimensionality
26    of data may be used. Currently 1D and 2D SAS data sets are supported, but
27    future implementations will include 1D and 2D SESANS data.
28
29    Any number of SASdata sets may be present in a SASentry and the data within
30    can be either 1D I(Q) or 2D I(Qx, Qy).
31
32    Also supports reading NXcanSAS formatted HDF5 files
33
34    :Dependencies:
35        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
36    """
37
38    # CanSAS version
39    cansas_version = 2.0
40    # Data type name
41    type_name = "CanSAS 2.0"
42    # Wildcards
43    type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"]
44    # List of allowed extensions
45    ext = ['.h5', '.H5']
46    # Flag to bypass extension check
47    allow_all = True
48
49    def get_file_contents(self):
50        """
51        This is the general read method that all SasView data_loaders must have.
52
53        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
54        :return: List of Data1D/2D objects and/or a list of errors.
55        """
56        # Reinitialize when loading a new data file to reset all class variables
57        self.reset_state()
58
59        filename = self.f_open.name
60        self.f_open.close() # IO handled by h5py
61
62        # Check that the file exists
63        if os.path.isfile(filename):
64            basename = os.path.basename(filename)
65            _, extension = os.path.splitext(basename)
66            # If the file type is not allowed, return empty list
67            if extension in self.ext or self.allow_all:
68                # Load the data file
69                try:
70                    self.raw_data = h5py.File(filename, 'r')
71                except Exception as e:
72                    if extension not in self.ext:
73                        msg = "CanSAS2.0 HDF5 Reader could not load file {}".format(basename + extension)
74                        raise DefaultReaderException(msg)
75                    raise FileContentsException(e.message)
76                try:
77                    # Read in all child elements of top level SASroot
78                    self.read_children(self.raw_data, [])
79                    # Add the last data set to the list of outputs
80                    self.add_data_set()
81                except Exception as exc:
82                    raise FileContentsException(exc.message)
83                finally:
84                    # Close the data file
85                    self.raw_data.close()
86
87                for dataset in self.output:
88                    if isinstance(dataset, Data1D):
89                        if dataset.x.size < 5:
90                            self.output = []
91                            raise FileContentsException("Fewer than 5 data points found.")
92
93    def reset_state(self):
94        """
95        Create the reader object and define initial states for class variables
96        """
97        super(Reader, self).reset_state()
98        self.data1d = []
99        self.data2d = []
100        self.raw_data = None
101        self.errors = set()
102        self.logging = []
103        self.q_name = []
104        self.mask_name = u''
105        self.i_name = u''
106        self.i_node = u''
107        self.q_uncertainties = u''
108        self.q_resolutions = u''
109        self.i_uncertainties = u''
110        self.parent_class = u''
111        self.detector = Detector()
112        self.collimation = Collimation()
113        self.aperture = Aperture()
114        self.process = Process()
115        self.trans_spectrum = TransmissionSpectrum()
116
117    def read_children(self, data, parent_list):
118        """
119        A recursive method for stepping through the hierarchical data file.
120
121        :param data: h5py Group object of any kind
122        :param parent: h5py Group parent name
123        """
124
125        # Loop through each element of the parent and process accordingly
126        for key in data.keys():
127            # Get all information for the current key
128            value = data.get(key)
129            class_name = h5attr(value, u'canSAS_class')
130            if class_name is None:
131                class_name = h5attr(value, u'NX_class')
132            if class_name is not None:
133                class_prog = re.compile(class_name)
134            else:
135                class_prog = re.compile(value.name)
136
137            if isinstance(value, h5py.Group):
138                # Set parent class before recursion
139                self.parent_class = class_name
140                parent_list.append(key)
141                # If a new sasentry, store the current data sets and create
142                # a fresh Data1D/2D object
143                if class_prog.match(u'SASentry'):
144                    self.add_data_set(key)
145                elif class_prog.match(u'SASdata'):
146                    self._find_data_attributes(value)
147                    self._initialize_new_data_set(parent_list)
148                # Recursion step to access data within the group
149                self.read_children(value, parent_list)
150                # Reset parent class when returning from recursive method
151                self.parent_class = class_name
152                self.add_intermediate()
153                parent_list.remove(key)
154
155            elif isinstance(value, h5py.Dataset):
156                # If this is a dataset, store the data appropriately
157                data_set = data[key][:]
158                unit = self._get_unit(value)
159
160                for data_point in data_set:
161                    if isinstance(data_point, np.ndarray):
162                        if data_point.dtype.char == 'S':
163                            data_point = decode(bytes(data_point))
164                    else:
165                        data_point = decode(data_point)
166                    # Top Level Meta Data
167                    if key == u'definition':
168                        self.current_datainfo.meta_data['reader'] = data_point
169                    # Run
170                    elif key == u'run':
171                        self.current_datainfo.run.append(data_point)
172                        try:
173                            run_name = h5attr(value, 'name')
174                            run_dict = {data_point: run_name}
175                            self.current_datainfo.run_name = run_dict
176                        except Exception:
177                            pass
178                    # Title
179                    elif key == u'title':
180                        self.current_datainfo.title = data_point
181                    # Note
182                    elif key == u'SASnote':
183                        self.current_datainfo.notes.append(data_point)
184                    # Sample Information
185                    elif self.parent_class == u'SASsample':
186                        self.process_sample(data_point, key)
187                    # Instrumental Information
188                    elif (key == u'name'
189                          and self.parent_class == u'SASinstrument'):
190                        self.current_datainfo.instrument = data_point
191                    # Detector
192                    elif self.parent_class == u'SASdetector':
193                        self.process_detector(data_point, key, unit)
194                    # Collimation
195                    elif self.parent_class == u'SAScollimation':
196                        self.process_collimation(data_point, key, unit)
197                    # Aperture
198                    elif self.parent_class == u'SASaperture':
199                        self.process_aperture(data_point, key)
200                    # Process Information
201                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
202                        self.process_process(data_point, key)
203                    # Source
204                    elif self.parent_class == u'SASsource':
205                        self.process_source(data_point, key, unit)
206                    # Everything else goes in meta_data
207                    elif self.parent_class == u'SASdata':
208                        self.process_data_object(data_set, key, unit)
209                        break
210                    elif self.parent_class == u'SAStransmission_spectrum':
211                        self.process_trans_spectrum(data_set, key)
212                        break
213                    else:
214                        new_key = self._create_unique_key(
215                            self.current_datainfo.meta_data, key)
216                        self.current_datainfo.meta_data[new_key] = data_point
217
218            else:
219                # I don't know if this reachable code
220                self.errors.add("ShouldNeverHappenException")
221
222    def process_data_object(self, data_set, key, unit):
223        """
224        SASdata processor method
225        :param data_set: data from HDF5 file
226        :param key: canSAS_class attribute
227        :param unit: unit attribute
228        """
229        if key == self.i_name:
230            if isinstance(self.current_dataset, plottable_2D):
231                self.current_dataset.data = data_set
232                self.current_dataset.zaxis("Intensity", unit)
233            else:
234                self.current_dataset.y = data_set.flatten()
235                self.current_dataset.yaxis("Intensity", unit)
236        elif key == self.i_uncertainties:
237            if isinstance(self.current_dataset, plottable_2D):
238                self.current_dataset.err_data = data_set.flatten()
239            else:
240                self.current_dataset.dy = data_set.flatten()
241        elif key in self.q_name:
242            self.current_dataset.xaxis("Q", unit)
243            if isinstance(self.current_dataset, plottable_2D):
244                self.current_dataset.q = data_set.flatten()
245            else:
246                self.current_dataset.x = data_set.flatten()
247        elif key in self.q_resolutions:
248            if key == u'dQw':
249                self.current_dataset.dxw = data_set.flatten()
250            elif key == u'dQl':
251                self.current_dataset.dxl = data_set.flatten()
252            else:
253                self.current_dataset.dx = data_set.flatten()
254        elif key == u'Qy':
255            self.current_dataset.yaxis("Q_y", unit)
256            self.current_dataset.qy_data = data_set.flatten()
257        elif key == u'Qydev':
258            self.current_dataset.dqy_data = data_set.flatten()
259        elif key == u'Qx':
260            self.current_dataset.xaxis("Q_x", unit)
261            self.current_dataset.qx_data = data_set.flatten()
262        elif key == u'Qxdev':
263            self.current_dataset.dqx_data = data_set.flatten()
264        elif key == self.mask_name:
265            self.current_dataset.mask = data_set.flatten()
266        elif key == u'wavelength':
267            self.current_datainfo.source.wavelength = data_set[0]
268            self.current_datainfo.source.wavelength_unit = unit
269
270    def process_trans_spectrum(self, data_set, key):
271        """
272        SAStransmission_spectrum processor
273        :param data_set: data from HDF5 file
274        :param key: canSAS_class attribute
275        """
276        if key == u'T':
277            self.trans_spectrum.transmission = data_set.flatten()
278        elif key == u'Tdev':
279            self.trans_spectrum.transmission_deviation = data_set.flatten()
280        elif key == u'lambda':
281            self.trans_spectrum.wavelength = data_set.flatten()
282
283    def process_sample(self, data_point, key):
284        """
285        SASsample processor
286        :param data_point: Single point from an HDF5 data file
287        :param key: class name data_point was taken from
288        """
289        if key == u'Title':
290            self.current_datainfo.sample.name = data_point
291        elif key == u'name':
292            self.current_datainfo.sample.name = data_point
293        elif key == u'ID':
294            self.current_datainfo.sample.name = data_point
295        elif key == u'thickness':
296            self.current_datainfo.sample.thickness = data_point
297        elif key == u'temperature':
298            self.current_datainfo.sample.temperature = data_point
299        elif key == u'transmission':
300            self.current_datainfo.sample.transmission = data_point
301        elif key == u'x_position':
302            self.current_datainfo.sample.position.x = data_point
303        elif key == u'y_position':
304            self.current_datainfo.sample.position.y = data_point
305        elif key == u'pitch':
306            self.current_datainfo.sample.orientation.x = data_point
307        elif key == u'yaw':
308            self.current_datainfo.sample.orientation.y = data_point
309        elif key == u'roll':
310            self.current_datainfo.sample.orientation.z = data_point
311        elif key == u'details':
312            self.current_datainfo.sample.details.append(data_point)
313
314    def process_detector(self, data_point, key, unit):
315        """
316        SASdetector processor
317        :param data_point: Single point from an HDF5 data file
318        :param key: class name data_point was taken from
319        :param unit: unit attribute from data set
320        """
321        if key == u'name':
322            self.detector.name = data_point
323        elif key == u'SDD':
324            self.detector.distance = float(data_point)
325            self.detector.distance_unit = unit
326        elif key == u'slit_length':
327            self.detector.slit_length = float(data_point)
328            self.detector.slit_length_unit = unit
329        elif key == u'x_position':
330            self.detector.offset.x = float(data_point)
331            self.detector.offset_unit = unit
332        elif key == u'y_position':
333            self.detector.offset.y = float(data_point)
334            self.detector.offset_unit = unit
335        elif key == u'pitch':
336            self.detector.orientation.x = float(data_point)
337            self.detector.orientation_unit = unit
338        elif key == u'roll':
339            self.detector.orientation.z = float(data_point)
340            self.detector.orientation_unit = unit
341        elif key == u'yaw':
342            self.detector.orientation.y = float(data_point)
343            self.detector.orientation_unit = unit
344        elif key == u'beam_center_x':
345            self.detector.beam_center.x = float(data_point)
346            self.detector.beam_center_unit = unit
347        elif key == u'beam_center_y':
348            self.detector.beam_center.y = float(data_point)
349            self.detector.beam_center_unit = unit
350        elif key == u'x_pixel_size':
351            self.detector.pixel_size.x = float(data_point)
352            self.detector.pixel_size_unit = unit
353        elif key == u'y_pixel_size':
354            self.detector.pixel_size.y = float(data_point)
355            self.detector.pixel_size_unit = unit
356
357    def process_collimation(self, data_point, key, unit):
358        """
359        SAScollimation processor
360        :param data_point: Single point from an HDF5 data file
361        :param key: class name data_point was taken from
362        :param unit: unit attribute from data set
363        """
364        if key == u'distance':
365            self.collimation.length = data_point
366            self.collimation.length_unit = unit
367        elif key == u'name':
368            self.collimation.name = data_point
369
370    def process_aperture(self, data_point, key):
371        """
372        SASaperture processor
373        :param data_point: Single point from an HDF5 data file
374        :param key: class name data_point was taken from
375        """
376        if key == u'shape':
377            self.aperture.shape = data_point
378        elif key == u'x_gap':
379            self.aperture.size.x = data_point
380        elif key == u'y_gap':
381            self.aperture.size.y = data_point
382
383    def process_source(self, data_point, key, unit):
384        """
385        SASsource processor
386        :param data_point: Single point from an HDF5 data file
387        :param key: class name data_point was taken from
388        :param unit: unit attribute from data set
389        """
390        if key == u'incident_wavelength':
391            self.current_datainfo.source.wavelength = data_point
392            self.current_datainfo.source.wavelength_unit = unit
393        elif key == u'wavelength_max':
394            self.current_datainfo.source.wavelength_max = data_point
395            self.current_datainfo.source.wavelength_max_unit = unit
396        elif key == u'wavelength_min':
397            self.current_datainfo.source.wavelength_min = data_point
398            self.current_datainfo.source.wavelength_min_unit = unit
399        elif key == u'incident_wavelength_spread':
400            self.current_datainfo.source.wavelength_spread = data_point
401            self.current_datainfo.source.wavelength_spread_unit = unit
402        elif key == u'beam_size_x':
403            self.current_datainfo.source.beam_size.x = data_point
404            self.current_datainfo.source.beam_size_unit = unit
405        elif key == u'beam_size_y':
406            self.current_datainfo.source.beam_size.y = data_point
407            self.current_datainfo.source.beam_size_unit = unit
408        elif key == u'beam_shape':
409            self.current_datainfo.source.beam_shape = data_point
410        elif key == u'radiation':
411            self.current_datainfo.source.radiation = data_point
412
413    def process_process(self, data_point, key):
414        """
415        SASprocess processor
416        :param data_point: Single point from an HDF5 data file
417        :param key: class name data_point was taken from
418        """
419        if key == u'Title':  # CanSAS 2.0
420            self.process.name = data_point
421        elif key == u'name':  # NXcanSAS
422            self.process.name = data_point
423        elif key == u'description':
424            self.process.description = data_point
425        elif key == u'date':
426            self.process.date = data_point
427        elif key == u'term':
428            self.process.term = data_point
429        else:
430            self.process.notes.append(data_point)
431
432    def add_intermediate(self):
433        """
434        This method stores any intermediate objects within the final data set
435        after fully reading the set.
436
437        :param parent: The NXclass name for the h5py Group object that just
438                       finished being processed
439        """
440
441        if self.parent_class == u'SASprocess':
442            self.current_datainfo.process.append(self.process)
443            self.process = Process()
444        elif self.parent_class == u'SASdetector':
445            self.current_datainfo.detector.append(self.detector)
446            self.detector = Detector()
447        elif self.parent_class == u'SAStransmission_spectrum':
448            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
449            self.trans_spectrum = TransmissionSpectrum()
450        elif self.parent_class == u'SAScollimation':
451            self.current_datainfo.collimation.append(self.collimation)
452            self.collimation = Collimation()
453        elif self.parent_class == u'SASaperture':
454            self.collimation.aperture.append(self.aperture)
455            self.aperture = Aperture()
456        elif self.parent_class == u'SASdata':
457            if isinstance(self.current_dataset, plottable_2D):
458                self.data2d.append(self.current_dataset)
459            elif isinstance(self.current_dataset, plottable_1D):
460                self.data1d.append(self.current_dataset)
461
462    def final_data_cleanup(self):
463        """
464        Does some final cleanup and formatting on self.current_datainfo and
465        all data1D and data2D objects and then combines the data and info into
466        Data1D and Data2D objects
467        """
468        # Type cast data arrays to float64
469        if len(self.current_datainfo.trans_spectrum) > 0:
470            spectrum_list = []
471            for spectrum in self.current_datainfo.trans_spectrum:
472                spectrum.transmission = np.delete(spectrum.transmission, [0])
473                spectrum.transmission = spectrum.transmission.astype(np.float64)
474                spectrum.transmission_deviation = np.delete(
475                    spectrum.transmission_deviation, [0])
476                spectrum.transmission_deviation = \
477                    spectrum.transmission_deviation.astype(np.float64)
478                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
479                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
480                if len(spectrum.transmission) > 0:
481                    spectrum_list.append(spectrum)
482            self.current_datainfo.trans_spectrum = spectrum_list
483
484        # Append errors to dataset and reset class errors
485        self.current_datainfo.errors = self.errors
486        self.errors.clear()
487
488        # Combine all plottables with datainfo and append each to output
489        # Type cast data arrays to float64 and find min/max as appropriate
490        for dataset in self.data2d:
491            zeros = np.ones(dataset.data.size, dtype=bool)
492            try:
493                for i in range(0, dataset.mask.size - 1):
494                    zeros[i] = dataset.mask[i]
495            except:
496                self.errors.add(sys.exc_value)
497            dataset.mask = zeros
498            # Calculate the actual Q matrix
499            try:
500                if dataset.q_data.size <= 1:
501                    dataset.q_data = np.sqrt(dataset.qx_data
502                                             * dataset.qx_data
503                                             + dataset.qy_data
504                                             * dataset.qy_data)
505            except:
506                dataset.q_data = None
507
508            if dataset.data.ndim == 2:
509                (n_rows, n_cols) = dataset.data.shape
510                dataset.y_bins = dataset.qy_data[0::n_cols]
511                dataset.x_bins = dataset.qx_data[:n_cols]
512                dataset.data = dataset.data.flatten()
513            self.current_dataset = dataset
514            self.send_to_output()
515
516        for dataset in self.data1d:
517            self.current_dataset = dataset
518            self.send_to_output()
519
520    def add_data_set(self, key=""):
521        """
522        Adds the current_dataset to the list of outputs after preforming final
523        processing on the data and then calls a private method to generate a
524        new data set.
525
526        :param key: NeXus group name for current tree level
527        """
528
529        if self.current_datainfo and self.current_dataset:
530            self.final_data_cleanup()
531        self.data1d = []
532        self.data2d = []
533        self.current_datainfo = DataInfo()
534
535    def _initialize_new_data_set(self, parent_list=None):
536        """
537        A private class method to generate a new 1D or 2D data object based on
538        the type of data within the set. Outside methods should call
539        add_data_set() to be sure any existing data is stored properly.
540
541        :param parent_list: List of names of parent elements
542        """
543
544        if parent_list is None:
545            parent_list = []
546        if self._find_intermediate(parent_list, "Qx"):
547            self.current_dataset = plottable_2D()
548        else:
549            x = np.array(0)
550            y = np.array(0)
551            self.current_dataset = plottable_1D(x, y)
552        self.current_datainfo.filename = self.raw_data.filename
553        self.mask_name = ""
554        self.i_name = ""
555        self.i_node = ""
556        self.q_name = []
557        self.q_uncertainties = ""
558        self.q_resolutions = ""
559        self.i_uncertainties = ""
560
561    def _find_data_attributes(self, value):
562        """
563        A class to find the indices for Q, the name of the Qdev and Idev, and
564        the name of the mask.
565        :param value: SASdata/NXdata HDF5 Group
566        """
567        attrs = value.attrs
568        signal = attrs.get("signal")
569        i_axes = np.array(str(attrs.get("I_axes")).split(","))
570        q_indices = np.int_(attrs.get("Q_indices").split(","))
571        keys = value.keys()
572        self.mask_name = attrs.get("mask")
573        for val in q_indices:
574            self.q_name.append(i_axes[val])
575        self.i_name = signal
576        self.i_node = value.get(self.i_name)
577        for item in self.q_name:
578            if item in keys:
579                q_vals = value.get(item)
580                self.q_uncertainties = q_vals.attrs.get("uncertainty")
581                self.q_resolutions = q_vals.attrs.get("resolution")
582        if self.i_name in keys:
583            i_vals = value.get(self.i_name)
584            self.i_uncertainties = i_vals.attrs.get("uncertainty")
585
586    def _find_intermediate(self, parent_list, basename=""):
587        """
588        A private class used to find an entry by either using a direct key or
589        knowing the approximate basename.
590
591        :param parent_list: List of parents nodes in the HDF5 file
592        :param basename: Approximate name of an entry to search for
593        :return:
594        """
595
596        entry = False
597        key_prog = re.compile(basename)
598        top = self.raw_data
599        for parent in parent_list:
600            top = top.get(parent)
601        for key in top.keys():
602            if key_prog.match(key):
603                entry = True
604                break
605        return entry
606
607    def _create_unique_key(self, dictionary, name, numb=0):
608        """
609        Create a unique key value for any dictionary to prevent overwriting
610        Recurses until a unique key value is found.
611
612        :param dictionary: A dictionary with any number of entries
613        :param name: The index of the item to be added to dictionary
614        :param numb: The number to be appended to the name, starts at 0
615        :return: The new name for the dictionary entry
616        """
617        if dictionary.get(name) is not None:
618            numb += 1
619            name = name.split("_")[0]
620            name += "_{0}".format(numb)
621            name = self._create_unique_key(dictionary, name, numb)
622        return name
623
624    def _get_unit(self, value):
625        """
626        Find the unit for a particular value within the h5py dictionary
627
628        :param value: attribute dictionary for a particular value set
629        :return: unit for the value passed to the method
630        """
631        unit = h5attr(value, u'units')
632        if unit is None:
633            unit = h5attr(value, u'unit')
634        return unit
Note: See TracBrowser for help on using the repository browser.