source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ cf820f5

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since cf820f5 was cf820f5, checked in by krzywon, 6 years ago

Give proper label to NXcanSAS data files in file browser window. refs #976

  • Property mode set to 100644
File size: 26.1 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from ..data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from ..data_info import combine_data_info_with_plottable
15from ..loader_exceptions import FileContentsException, DefaultReaderException
16from ..file_reader_base_class import FileReader, decode
17
18def h5attr(node, key, default=None):
19    return decode(node.attrs.get(key, default))
20
21class Reader(FileReader):
22    """
23    A class for reading in CanSAS v2.0 data files. The existing iteration opens
24    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
25    number of data sets may be present within the file and any dimensionality
26    of data may be used. Currently 1D and 2D SAS data sets are supported, but
27    future implementations will include 1D and 2D SESANS data.
28
29    Any number of SASdata sets may be present in a SASentry and the data within
30    can be either 1D I(Q) or 2D I(Qx, Qy).
31
32    Also supports reading NXcanSAS formatted HDF5 files
33
34    :Dependencies:
35        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
36    """
37
38    # CanSAS version
39    cansas_version = 2.0
40    # Data type name
41    type_name = "NXcanSAS"
42    # Wildcards
43    type = ["NXcanSAS HDF5 Files (*.h5)|*.h5|"]
44    # List of allowed extensions
45    ext = ['.h5', '.H5']
46    # Flag to bypass extension check
47    allow_all = True
48
49    def get_file_contents(self):
50        """
51        This is the general read method that all SasView data_loaders must have.
52
53        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
54        :return: List of Data1D/2D objects and/or a list of errors.
55        """
56        # Reinitialize when loading a new data file to reset all class variables
57        self.reset_state()
58
59        filename = self.f_open.name
60        self.f_open.close() # IO handled by h5py
61
62        # Check that the file exists
63        if os.path.isfile(filename):
64            basename = os.path.basename(filename)
65            _, extension = os.path.splitext(basename)
66            # If the file type is not allowed, return empty list
67            if extension in self.ext or self.allow_all:
68                # Load the data file
69                try:
70                    self.raw_data = h5py.File(filename, 'r')
71                except Exception as e:
72                    if extension not in self.ext:
73                        msg = "NXcanSAS HDF5 Reader could not load file {}".format(basename + extension)
74                        raise DefaultReaderException(msg)
75                    raise FileContentsException(e.message)
76                try:
77                    # Read in all child elements of top level SASroot
78                    self.read_children(self.raw_data, [])
79                    # Add the last data set to the list of outputs
80                    self.add_data_set()
81                except Exception as exc:
82                    raise FileContentsException(exc.message)
83                finally:
84                    # Close the data file
85                    self.raw_data.close()
86
87                for dataset in self.output:
88                    if isinstance(dataset, Data1D):
89                        if dataset.x.size < 5:
90                            self.output = []
91                            raise FileContentsException("Fewer than 5 data points found.")
92
93    def reset_state(self):
94        """
95        Create the reader object and define initial states for class variables
96        """
97        super(Reader, self).reset_state()
98        self.data1d = []
99        self.data2d = []
100        self.raw_data = None
101        self.errors = set()
102        self.logging = []
103        self.q_name = []
104        self.mask_name = u''
105        self.i_name = u''
106        self.i_node = u''
107        self.q_uncertainties = u''
108        self.q_resolutions = u''
109        self.i_uncertainties = u''
110        self.parent_class = u''
111        self.detector = Detector()
112        self.collimation = Collimation()
113        self.aperture = Aperture()
114        self.process = Process()
115        self.trans_spectrum = TransmissionSpectrum()
116
117    def read_children(self, data, parent_list):
118        """
119        A recursive method for stepping through the hierarchical data file.
120
121        :param data: h5py Group object of any kind
122        :param parent: h5py Group parent name
123        """
124
125        # Loop through each element of the parent and process accordingly
126        for key in data.keys():
127            # Get all information for the current key
128            value = data.get(key)
129            class_name = h5attr(value, u'canSAS_class')
130            if class_name is None:
131                class_name = h5attr(value, u'NX_class')
132            if class_name is not None:
133                class_prog = re.compile(class_name)
134            else:
135                class_prog = re.compile(value.name)
136
137            if isinstance(value, h5py.Group):
138                # Set parent class before recursion
139                last_parent_class = self.parent_class
140                self.parent_class = class_name
141                parent_list.append(key)
142                # If a new sasentry, store the current data sets and create
143                # a fresh Data1D/2D object
144                if class_prog.match(u'SASentry'):
145                    self.add_data_set(key)
146                elif class_prog.match(u'SASdata'):
147                    self._initialize_new_data_set(value)
148                    self._find_data_attributes(value)
149                # Recursion step to access data within the group
150                self.read_children(value, parent_list)
151                self.add_intermediate()
152                # Reset parent class when returning from recursive method
153                self.parent_class = last_parent_class
154                parent_list.remove(key)
155
156            elif isinstance(value, h5py.Dataset):
157                # If this is a dataset, store the data appropriately
158                data_set = data[key][:]
159                unit = self._get_unit(value)
160
161                for data_point in data_set:
162                    if isinstance(data_point, np.ndarray):
163                        if data_point.dtype.char == 'S':
164                            data_point = decode(bytes(data_point))
165                    else:
166                        data_point = decode(data_point)
167                    # Top Level Meta Data
168                    if key == u'definition':
169                        self.current_datainfo.meta_data['reader'] = data_point
170                    # Run
171                    elif key == u'run':
172                        self.current_datainfo.run.append(data_point)
173                        try:
174                            run_name = h5attr(value, 'name')
175                            run_dict = {data_point: run_name}
176                            self.current_datainfo.run_name = run_dict
177                        except Exception:
178                            pass
179                    # Title
180                    elif key == u'title':
181                        self.current_datainfo.title = data_point
182                    # Note
183                    elif key == u'SASnote':
184                        self.current_datainfo.notes.append(data_point)
185                    # Sample Information
186                    elif self.parent_class == u'SASsample':
187                        self.process_sample(data_point, key)
188                    # Instrumental Information
189                    elif (key == u'name'
190                          and self.parent_class == u'SASinstrument'):
191                        self.current_datainfo.instrument = data_point
192                    # Detector
193                    elif self.parent_class == u'SASdetector':
194                        self.process_detector(data_point, key, unit)
195                    # Collimation
196                    elif self.parent_class == u'SAScollimation':
197                        self.process_collimation(data_point, key, unit)
198                    # Aperture
199                    elif self.parent_class == u'SASaperture':
200                        self.process_aperture(data_point, key)
201                    # Process Information
202                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
203                        self.process_process(data_point, key)
204                    # Source
205                    elif self.parent_class == u'SASsource':
206                        self.process_source(data_point, key, unit)
207                    # Everything else goes in meta_data
208                    elif self.parent_class == u'SASdata':
209                        self.process_data_object(data_set, key, unit)
210                        break
211                    elif self.parent_class == u'SAStransmission_spectrum':
212                        self.process_trans_spectrum(data_set, key)
213                        break
214                    else:
215                        new_key = self._create_unique_key(
216                            self.current_datainfo.meta_data, key)
217                        self.current_datainfo.meta_data[new_key] = data_point
218
219            else:
220                # I don't know if this reachable code
221                self.errors.add("ShouldNeverHappenException")
222
223    def process_data_object(self, data_set, key, unit):
224        """
225        SASdata processor method
226        :param data_set: data from HDF5 file
227        :param key: canSAS_class attribute
228        :param unit: unit attribute
229        """
230        if key == self.i_name:
231            if isinstance(self.current_dataset, plottable_2D):
232                self.current_dataset.data = data_set
233                self.current_dataset.zaxis("Intensity", unit)
234            else:
235                self.current_dataset.y = data_set.flatten()
236                self.current_dataset.yaxis("Intensity", unit)
237        elif key == self.i_uncertainties:
238            if isinstance(self.current_dataset, plottable_2D):
239                self.current_dataset.err_data = data_set.flatten()
240            else:
241                self.current_dataset.dy = data_set.flatten()
242        elif key in self.q_name:
243            self.current_dataset.xaxis("Q", unit)
244            if isinstance(self.current_dataset, plottable_2D):
245                self.current_dataset.q = data_set.flatten()
246            else:
247                self.current_dataset.x = data_set.flatten()
248        elif key in self.q_resolutions:
249            if key == u'dQw':
250                self.current_dataset.dxw = data_set.flatten()
251            elif key == u'dQl':
252                self.current_dataset.dxl = data_set.flatten()
253            else:
254                self.current_dataset.dx = data_set.flatten()
255        elif key == u'Qy':
256            self.current_dataset.yaxis("Q_y", unit)
257            self.current_dataset.qy_data = data_set.flatten()
258        elif key == u'Qydev':
259            self.current_dataset.dqy_data = data_set.flatten()
260        elif key == u'Qx':
261            self.current_dataset.xaxis("Q_x", unit)
262            self.current_dataset.qx_data = data_set.flatten()
263        elif key == u'Qxdev':
264            self.current_dataset.dqx_data = data_set.flatten()
265        elif key == self.mask_name:
266            self.current_dataset.mask = data_set.flatten()
267        elif key == u'wavelength':
268            self.current_datainfo.source.wavelength = data_set[0]
269            self.current_datainfo.source.wavelength_unit = unit
270
271    def process_trans_spectrum(self, data_set, key):
272        """
273        SAStransmission_spectrum processor
274        :param data_set: data from HDF5 file
275        :param key: canSAS_class attribute
276        """
277        if key == u'T':
278            self.trans_spectrum.transmission = data_set.flatten()
279        elif key == u'Tdev':
280            self.trans_spectrum.transmission_deviation = data_set.flatten()
281        elif key == u'lambda':
282            self.trans_spectrum.wavelength = data_set.flatten()
283
284    def process_sample(self, data_point, key):
285        """
286        SASsample processor
287        :param data_point: Single point from an HDF5 data file
288        :param key: class name data_point was taken from
289        """
290        if key == u'Title':
291            self.current_datainfo.sample.name = data_point
292        elif key == u'name':
293            self.current_datainfo.sample.name = data_point
294        elif key == u'ID':
295            self.current_datainfo.sample.name = data_point
296        elif key == u'thickness':
297            self.current_datainfo.sample.thickness = data_point
298        elif key == u'temperature':
299            self.current_datainfo.sample.temperature = data_point
300        elif key == u'transmission':
301            self.current_datainfo.sample.transmission = data_point
302        elif key == u'x_position':
303            self.current_datainfo.sample.position.x = data_point
304        elif key == u'y_position':
305            self.current_datainfo.sample.position.y = data_point
306        elif key == u'pitch':
307            self.current_datainfo.sample.orientation.x = data_point
308        elif key == u'yaw':
309            self.current_datainfo.sample.orientation.y = data_point
310        elif key == u'roll':
311            self.current_datainfo.sample.orientation.z = data_point
312        elif key == u'details':
313            self.current_datainfo.sample.details.append(data_point)
314
315    def process_detector(self, data_point, key, unit):
316        """
317        SASdetector processor
318        :param data_point: Single point from an HDF5 data file
319        :param key: class name data_point was taken from
320        :param unit: unit attribute from data set
321        """
322        if key == u'name':
323            self.detector.name = data_point
324        elif key == u'SDD':
325            self.detector.distance = float(data_point)
326            self.detector.distance_unit = unit
327        elif key == u'slit_length':
328            self.detector.slit_length = float(data_point)
329            self.detector.slit_length_unit = unit
330        elif key == u'x_position':
331            self.detector.offset.x = float(data_point)
332            self.detector.offset_unit = unit
333        elif key == u'y_position':
334            self.detector.offset.y = float(data_point)
335            self.detector.offset_unit = unit
336        elif key == u'pitch':
337            self.detector.orientation.x = float(data_point)
338            self.detector.orientation_unit = unit
339        elif key == u'roll':
340            self.detector.orientation.z = float(data_point)
341            self.detector.orientation_unit = unit
342        elif key == u'yaw':
343            self.detector.orientation.y = float(data_point)
344            self.detector.orientation_unit = unit
345        elif key == u'beam_center_x':
346            self.detector.beam_center.x = float(data_point)
347            self.detector.beam_center_unit = unit
348        elif key == u'beam_center_y':
349            self.detector.beam_center.y = float(data_point)
350            self.detector.beam_center_unit = unit
351        elif key == u'x_pixel_size':
352            self.detector.pixel_size.x = float(data_point)
353            self.detector.pixel_size_unit = unit
354        elif key == u'y_pixel_size':
355            self.detector.pixel_size.y = float(data_point)
356            self.detector.pixel_size_unit = unit
357
358    def process_collimation(self, data_point, key, unit):
359        """
360        SAScollimation processor
361        :param data_point: Single point from an HDF5 data file
362        :param key: class name data_point was taken from
363        :param unit: unit attribute from data set
364        """
365        if key == u'distance':
366            self.collimation.length = data_point
367            self.collimation.length_unit = unit
368        elif key == u'name':
369            self.collimation.name = data_point
370
371    def process_aperture(self, data_point, key):
372        """
373        SASaperture processor
374        :param data_point: Single point from an HDF5 data file
375        :param key: class name data_point was taken from
376        """
377        if key == u'shape':
378            self.aperture.shape = data_point
379        elif key == u'x_gap':
380            self.aperture.size.x = data_point
381        elif key == u'y_gap':
382            self.aperture.size.y = data_point
383
384    def process_source(self, data_point, key, unit):
385        """
386        SASsource processor
387        :param data_point: Single point from an HDF5 data file
388        :param key: class name data_point was taken from
389        :param unit: unit attribute from data set
390        """
391        if key == u'incident_wavelength':
392            self.current_datainfo.source.wavelength = data_point
393            self.current_datainfo.source.wavelength_unit = unit
394        elif key == u'wavelength_max':
395            self.current_datainfo.source.wavelength_max = data_point
396            self.current_datainfo.source.wavelength_max_unit = unit
397        elif key == u'wavelength_min':
398            self.current_datainfo.source.wavelength_min = data_point
399            self.current_datainfo.source.wavelength_min_unit = unit
400        elif key == u'incident_wavelength_spread':
401            self.current_datainfo.source.wavelength_spread = data_point
402            self.current_datainfo.source.wavelength_spread_unit = unit
403        elif key == u'beam_size_x':
404            self.current_datainfo.source.beam_size.x = data_point
405            self.current_datainfo.source.beam_size_unit = unit
406        elif key == u'beam_size_y':
407            self.current_datainfo.source.beam_size.y = data_point
408            self.current_datainfo.source.beam_size_unit = unit
409        elif key == u'beam_shape':
410            self.current_datainfo.source.beam_shape = data_point
411        elif key == u'radiation':
412            self.current_datainfo.source.radiation = data_point
413
414    def process_process(self, data_point, key):
415        """
416        SASprocess processor
417        :param data_point: Single point from an HDF5 data file
418        :param key: class name data_point was taken from
419        """
420        if key == u'Title':  # CanSAS 2.0
421            self.process.name = data_point
422        elif key == u'name':  # NXcanSAS
423            self.process.name = data_point
424        elif key == u'description':
425            self.process.description = data_point
426        elif key == u'date':
427            self.process.date = data_point
428        elif key == u'term':
429            self.process.term = data_point
430        else:
431            self.process.notes.append(data_point)
432
433    def add_intermediate(self):
434        """
435        This method stores any intermediate objects within the final data set
436        after fully reading the set.
437
438        :param parent: The NXclass name for the h5py Group object that just
439                       finished being processed
440        """
441
442        if self.parent_class == u'SASprocess':
443            self.current_datainfo.process.append(self.process)
444            self.process = Process()
445        elif self.parent_class == u'SASdetector':
446            self.current_datainfo.detector.append(self.detector)
447            self.detector = Detector()
448        elif self.parent_class == u'SAStransmission_spectrum':
449            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
450            self.trans_spectrum = TransmissionSpectrum()
451        elif self.parent_class == u'SAScollimation':
452            self.current_datainfo.collimation.append(self.collimation)
453            self.collimation = Collimation()
454        elif self.parent_class == u'SASaperture':
455            self.collimation.aperture.append(self.aperture)
456            self.aperture = Aperture()
457        elif self.parent_class == u'SASdata':
458            if isinstance(self.current_dataset, plottable_2D):
459                self.data2d.append(self.current_dataset)
460            elif isinstance(self.current_dataset, plottable_1D):
461                self.data1d.append(self.current_dataset)
462
463    def final_data_cleanup(self):
464        """
465        Does some final cleanup and formatting on self.current_datainfo and
466        all data1D and data2D objects and then combines the data and info into
467        Data1D and Data2D objects
468        """
469        # Type cast data arrays to float64
470        if len(self.current_datainfo.trans_spectrum) > 0:
471            spectrum_list = []
472            for spectrum in self.current_datainfo.trans_spectrum:
473                spectrum.transmission = np.delete(spectrum.transmission, [0])
474                spectrum.transmission = spectrum.transmission.astype(np.float64)
475                spectrum.transmission_deviation = np.delete(
476                    spectrum.transmission_deviation, [0])
477                spectrum.transmission_deviation = \
478                    spectrum.transmission_deviation.astype(np.float64)
479                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
480                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
481                if len(spectrum.transmission) > 0:
482                    spectrum_list.append(spectrum)
483            self.current_datainfo.trans_spectrum = spectrum_list
484
485        # Append errors to dataset and reset class errors
486        self.current_datainfo.errors = self.errors
487        self.errors.clear()
488
489        # Combine all plottables with datainfo and append each to output
490        # Type cast data arrays to float64 and find min/max as appropriate
491        for dataset in self.data2d:
492            zeros = np.ones(dataset.data.size, dtype=bool)
493            try:
494                for i in range(0, dataset.mask.size - 1):
495                    zeros[i] = dataset.mask[i]
496            except:
497                self.errors.add(sys.exc_value)
498            dataset.mask = zeros
499            # Calculate the actual Q matrix
500            try:
501                if dataset.q_data.size <= 1:
502                    dataset.q_data = np.sqrt(dataset.qx_data
503                                             * dataset.qx_data
504                                             + dataset.qy_data
505                                             * dataset.qy_data)
506            except:
507                dataset.q_data = None
508
509            if dataset.data.ndim == 2:
510                (n_rows, n_cols) = dataset.data.shape
511                dataset.y_bins = dataset.qy_data[0::n_cols]
512                dataset.x_bins = dataset.qx_data[:n_cols]
513                dataset.data = dataset.data.flatten()
514            self.current_dataset = dataset
515            self.send_to_output()
516
517        for dataset in self.data1d:
518            self.current_dataset = dataset
519            self.send_to_output()
520
521    def add_data_set(self, key=""):
522        """
523        Adds the current_dataset to the list of outputs after preforming final
524        processing on the data and then calls a private method to generate a
525        new data set.
526
527        :param key: NeXus group name for current tree level
528        """
529
530        if self.current_datainfo and self.current_dataset:
531            self.final_data_cleanup()
532        self.data1d = []
533        self.data2d = []
534        self.current_datainfo = DataInfo()
535
536    def _initialize_new_data_set(self, value=None):
537        """
538        A private class method to generate a new 1D or 2D data object based on
539        the type of data within the set. Outside methods should call
540        add_data_set() to be sure any existing data is stored properly.
541
542        :param parent_list: List of names of parent elements
543        """
544        if self._is2d(value):
545            self.current_dataset = plottable_2D()
546        else:
547            x = np.array(0)
548            y = np.array(0)
549            self.current_dataset = plottable_1D(x, y)
550        self.current_datainfo.filename = self.raw_data.filename
551        self.mask_name = ""
552        self.i_name = ""
553        self.i_node = ""
554        self.q_name = []
555        self.q_uncertainties = ""
556        self.q_resolutions = ""
557        self.i_uncertainties = ""
558
559    def _find_data_attributes(self, value):
560        """
561        A class to find the indices for Q, the name of the Qdev and Idev, and
562        the name of the mask.
563        :param value: SASdata/NXdata HDF5 Group
564        """
565        signal = "I"
566        i_axes = ["Q"]
567        q_indices = [0]
568        attrs = value.attrs
569        if hasattr(attrs, "signal"):
570            signal = attrs.get("signal")
571        if hasattr(attrs, "I_axes"):
572            i_axes = np.array(str(attrs.get("I_axes")).split(","))
573        if hasattr(attrs, "Q_indices"):
574            q_indices = np.int_(attrs.get("Q_indices").split(","))
575        keys = value.keys()
576        self.mask_name = attrs.get("mask")
577        for val in q_indices:
578            self.q_name.append(i_axes[val])
579        self.i_name = signal
580        self.i_node = value.get(self.i_name)
581        for item in self.q_name:
582            if item in keys:
583                q_vals = value.get(item)
584                self.q_uncertainties = q_vals.attrs.get("uncertainties")
585                if self.q_uncertainties is None:
586                    self.q_uncertainties = q_vals.attrs.get("uncertainty")
587                self.q_resolutions = q_vals.attrs.get("resolutions")
588        if self.i_name in keys:
589            i_vals = value.get(self.i_name)
590            self.i_uncertainties = i_vals.attrs.get("uncertainties")
591            if self.i_uncertainties is None:
592                self.i_uncertainties = i_vals.attrs.get("uncertainty")
593
594    def _is2d(self, value, basename="I"):
595        """
596        A private class to determine if the data set is 1d or 2d.
597
598        :param parent_list: List of parents nodes in the HDF5 file
599        :param basename: Approximate name of an entry to search for
600        :return: True if 2D, otherwise false
601        """
602
603        vals = value.get(basename)
604        return (vals is not None and vals.shape is not None
605                and len(vals.shape) != 1)
606
607    def _create_unique_key(self, dictionary, name, numb=0):
608        """
609        Create a unique key value for any dictionary to prevent overwriting
610        Recurses until a unique key value is found.
611
612        :param dictionary: A dictionary with any number of entries
613        :param name: The index of the item to be added to dictionary
614        :param numb: The number to be appended to the name, starts at 0
615        :return: The new name for the dictionary entry
616        """
617        if dictionary.get(name) is not None:
618            numb += 1
619            name = name.split("_")[0]
620            name += "_{0}".format(numb)
621            name = self._create_unique_key(dictionary, name, numb)
622        return name
623
624    def _get_unit(self, value):
625        """
626        Find the unit for a particular value within the h5py dictionary
627
628        :param value: attribute dictionary for a particular value set
629        :return: unit for the value passed to the method
630        """
631        unit = h5attr(value, u'units')
632        if unit is None:
633            unit = h5attr(value, u'unit')
634        return unit
Note: See TracBrowser for help on using the repository browser.