source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ b799f09

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since b799f09 was 02c1608e, checked in by krzywon, 6 years ago

Allow for multiple Q resolutions/uncertainties for USANS data. refs #976

  • Property mode set to 100644
File size: 26.6 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from ..data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from ..data_info import combine_data_info_with_plottable
15from ..loader_exceptions import FileContentsException, DefaultReaderException
16from ..file_reader_base_class import FileReader, decode
17
18def h5attr(node, key, default=None):
19    return decode(node.attrs.get(key, default))
20
21class Reader(FileReader):
22    """
23    A class for reading in CanSAS v2.0 data files. The existing iteration opens
24    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
25    number of data sets may be present within the file and any dimensionality
26    of data may be used. Currently 1D and 2D SAS data sets are supported, but
27    future implementations will include 1D and 2D SESANS data.
28
29    Any number of SASdata sets may be present in a SASentry and the data within
30    can be either 1D I(Q) or 2D I(Qx, Qy).
31
32    Also supports reading NXcanSAS formatted HDF5 files
33
34    :Dependencies:
35        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
36    """
37
38    # CanSAS version
39    cansas_version = 2.0
40    # Data type name
41    type_name = "NXcanSAS"
42    # Wildcards
43    type = ["NXcanSAS HDF5 Files (*.h5)|*.h5|"]
44    # List of allowed extensions
45    ext = ['.h5', '.H5']
46    # Flag to bypass extension check
47    allow_all = True
48
49    def get_file_contents(self):
50        """
51        This is the general read method that all SasView data_loaders must have.
52
53        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
54        :return: List of Data1D/2D objects and/or a list of errors.
55        """
56        # Reinitialize when loading a new data file to reset all class variables
57        self.reset_state()
58
59        filename = self.f_open.name
60        self.f_open.close() # IO handled by h5py
61
62        # Check that the file exists
63        if os.path.isfile(filename):
64            basename = os.path.basename(filename)
65            _, extension = os.path.splitext(basename)
66            # If the file type is not allowed, return empty list
67            if extension in self.ext or self.allow_all:
68                # Load the data file
69                try:
70                    self.raw_data = h5py.File(filename, 'r')
71                except Exception as e:
72                    if extension not in self.ext:
73                        msg = "NXcanSAS HDF5 Reader could not load file {}".format(basename + extension)
74                        raise DefaultReaderException(msg)
75                    raise FileContentsException(e.message)
76                try:
77                    # Read in all child elements of top level SASroot
78                    self.read_children(self.raw_data, [])
79                    # Add the last data set to the list of outputs
80                    self.add_data_set()
81                except Exception as exc:
82                    raise FileContentsException(exc.message)
83                finally:
84                    # Close the data file
85                    self.raw_data.close()
86
87                for dataset in self.output:
88                    if isinstance(dataset, Data1D):
89                        if dataset.x.size < 5:
90                            self.output = []
91                            raise FileContentsException("Fewer than 5 data points found.")
92
93    def reset_state(self):
94        """
95        Create the reader object and define initial states for class variables
96        """
97        super(Reader, self).reset_state()
98        self.data1d = []
99        self.data2d = []
100        self.raw_data = None
101        self.errors = set()
102        self.logging = []
103        self.q_name = []
104        self.mask_name = u''
105        self.i_name = u''
106        self.i_node = u''
107        self.q_uncertainties = []
108        self.q_resolutions = []
109        self.i_uncertainties = u''
110        self.parent_class = u''
111        self.detector = Detector()
112        self.collimation = Collimation()
113        self.aperture = Aperture()
114        self.process = Process()
115        self.trans_spectrum = TransmissionSpectrum()
116
117    def read_children(self, data, parent_list):
118        """
119        A recursive method for stepping through the hierarchical data file.
120
121        :param data: h5py Group object of any kind
122        :param parent: h5py Group parent name
123        """
124
125        # Loop through each element of the parent and process accordingly
126        for key in data.keys():
127            # Get all information for the current key
128            value = data.get(key)
129            class_name = h5attr(value, u'canSAS_class')
130            if class_name is None:
131                class_name = h5attr(value, u'NX_class')
132            if class_name is not None:
133                class_prog = re.compile(class_name)
134            else:
135                class_prog = re.compile(value.name)
136
137            if isinstance(value, h5py.Group):
138                # Set parent class before recursion
139                last_parent_class = self.parent_class
140                self.parent_class = class_name
141                parent_list.append(key)
142                # If a new sasentry, store the current data sets and create
143                # a fresh Data1D/2D object
144                if class_prog.match(u'SASentry'):
145                    self.add_data_set(key)
146                elif class_prog.match(u'SASdata'):
147                    self._initialize_new_data_set(value)
148                    self._find_data_attributes(value)
149                # Recursion step to access data within the group
150                self.read_children(value, parent_list)
151                self.add_intermediate()
152                # Reset parent class when returning from recursive method
153                self.parent_class = last_parent_class
154                parent_list.remove(key)
155
156            elif isinstance(value, h5py.Dataset):
157                # If this is a dataset, store the data appropriately
158                data_set = data[key][:]
159                unit = self._get_unit(value)
160
161                for data_point in data_set:
162                    if isinstance(data_point, np.ndarray):
163                        if data_point.dtype.char == 'S':
164                            data_point = decode(bytes(data_point))
165                    else:
166                        data_point = decode(data_point)
167                    # Top Level Meta Data
168                    if key == u'definition':
169                        self.current_datainfo.meta_data['reader'] = data_point
170                    # Run
171                    elif key == u'run':
172                        self.current_datainfo.run.append(data_point)
173                        try:
174                            run_name = h5attr(value, 'name')
175                            run_dict = {data_point: run_name}
176                            self.current_datainfo.run_name = run_dict
177                        except Exception:
178                            pass
179                    # Title
180                    elif key == u'title':
181                        self.current_datainfo.title = data_point
182                    # Note
183                    elif key == u'SASnote':
184                        self.current_datainfo.notes.append(data_point)
185                    # Sample Information
186                    elif self.parent_class == u'SASsample':
187                        self.process_sample(data_point, key)
188                    # Instrumental Information
189                    elif (key == u'name'
190                          and self.parent_class == u'SASinstrument'):
191                        self.current_datainfo.instrument = data_point
192                    # Detector
193                    elif self.parent_class == u'SASdetector':
194                        self.process_detector(data_point, key, unit)
195                    # Collimation
196                    elif self.parent_class == u'SAScollimation':
197                        self.process_collimation(data_point, key, unit)
198                    # Aperture
199                    elif self.parent_class == u'SASaperture':
200                        self.process_aperture(data_point, key)
201                    # Process Information
202                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
203                        self.process_process(data_point, key)
204                    # Source
205                    elif self.parent_class == u'SASsource':
206                        self.process_source(data_point, key, unit)
207                    # Everything else goes in meta_data
208                    elif self.parent_class == u'SASdata':
209                        self.process_data_object(data_set, key, unit)
210                        break
211                    elif self.parent_class == u'SAStransmission_spectrum':
212                        self.process_trans_spectrum(data_set, key)
213                        break
214                    else:
215                        new_key = self._create_unique_key(
216                            self.current_datainfo.meta_data, key)
217                        self.current_datainfo.meta_data[new_key] = data_point
218
219            else:
220                # I don't know if this reachable code
221                self.errors.add("ShouldNeverHappenException")
222
223    def process_data_object(self, data_set, key, unit):
224        """
225        SASdata processor method
226        :param data_set: data from HDF5 file
227        :param key: canSAS_class attribute
228        :param unit: unit attribute
229        """
230        if key == self.i_name:
231            if isinstance(self.current_dataset, plottable_2D):
232                self.current_dataset.data = data_set
233                self.current_dataset.zaxis("Intensity", unit)
234            else:
235                self.current_dataset.y = data_set.flatten()
236                self.current_dataset.yaxis("Intensity", unit)
237        elif key == self.i_uncertainties:
238            if isinstance(self.current_dataset, plottable_2D):
239                self.current_dataset.err_data = data_set.flatten()
240            else:
241                self.current_dataset.dy = data_set.flatten()
242        elif key in self.q_name:
243            self.current_dataset.xaxis("Q", unit)
244            if isinstance(self.current_dataset, plottable_2D):
245                self.current_dataset.q = data_set.flatten()
246            else:
247                self.current_dataset.x = data_set.flatten()
248        elif key in self.q_uncertainties or key in self.q_resolutions:
249            if (len(self.q_resolutions) > 1
250                    and np.where(self.q_resolutions == key)[0] == 0):
251                self.current_dataset.dxw = data_set.flatten()
252            elif (len(self.q_resolutions) > 1
253                  and np.where(self.q_resolutions == key)[0] == 1):
254                self.current_dataset.dxl = data_set.flatten()
255            else:
256                self.current_dataset.dx = data_set.flatten()
257        elif key == u'Qy':
258            self.current_dataset.yaxis("Q_y", unit)
259            self.current_dataset.qy_data = data_set.flatten()
260        elif key == u'Qydev':
261            self.current_dataset.dqy_data = data_set.flatten()
262        elif key == u'Qx':
263            self.current_dataset.xaxis("Q_x", unit)
264            self.current_dataset.qx_data = data_set.flatten()
265        elif key == u'Qxdev':
266            self.current_dataset.dqx_data = data_set.flatten()
267        elif key == self.mask_name:
268            self.current_dataset.mask = data_set.flatten()
269        elif key == u'wavelength':
270            self.current_datainfo.source.wavelength = data_set[0]
271            self.current_datainfo.source.wavelength_unit = unit
272
273    def process_trans_spectrum(self, data_set, key):
274        """
275        SAStransmission_spectrum processor
276        :param data_set: data from HDF5 file
277        :param key: canSAS_class attribute
278        """
279        if key == u'T':
280            self.trans_spectrum.transmission = data_set.flatten()
281        elif key == u'Tdev':
282            self.trans_spectrum.transmission_deviation = data_set.flatten()
283        elif key == u'lambda':
284            self.trans_spectrum.wavelength = data_set.flatten()
285
286    def process_sample(self, data_point, key):
287        """
288        SASsample processor
289        :param data_point: Single point from an HDF5 data file
290        :param key: class name data_point was taken from
291        """
292        if key == u'Title':
293            self.current_datainfo.sample.name = data_point
294        elif key == u'name':
295            self.current_datainfo.sample.name = data_point
296        elif key == u'ID':
297            self.current_datainfo.sample.name = data_point
298        elif key == u'thickness':
299            self.current_datainfo.sample.thickness = data_point
300        elif key == u'temperature':
301            self.current_datainfo.sample.temperature = data_point
302        elif key == u'transmission':
303            self.current_datainfo.sample.transmission = data_point
304        elif key == u'x_position':
305            self.current_datainfo.sample.position.x = data_point
306        elif key == u'y_position':
307            self.current_datainfo.sample.position.y = data_point
308        elif key == u'pitch':
309            self.current_datainfo.sample.orientation.x = data_point
310        elif key == u'yaw':
311            self.current_datainfo.sample.orientation.y = data_point
312        elif key == u'roll':
313            self.current_datainfo.sample.orientation.z = data_point
314        elif key == u'details':
315            self.current_datainfo.sample.details.append(data_point)
316
317    def process_detector(self, data_point, key, unit):
318        """
319        SASdetector processor
320        :param data_point: Single point from an HDF5 data file
321        :param key: class name data_point was taken from
322        :param unit: unit attribute from data set
323        """
324        if key == u'name':
325            self.detector.name = data_point
326        elif key == u'SDD':
327            self.detector.distance = float(data_point)
328            self.detector.distance_unit = unit
329        elif key == u'slit_length':
330            self.detector.slit_length = float(data_point)
331            self.detector.slit_length_unit = unit
332        elif key == u'x_position':
333            self.detector.offset.x = float(data_point)
334            self.detector.offset_unit = unit
335        elif key == u'y_position':
336            self.detector.offset.y = float(data_point)
337            self.detector.offset_unit = unit
338        elif key == u'pitch':
339            self.detector.orientation.x = float(data_point)
340            self.detector.orientation_unit = unit
341        elif key == u'roll':
342            self.detector.orientation.z = float(data_point)
343            self.detector.orientation_unit = unit
344        elif key == u'yaw':
345            self.detector.orientation.y = float(data_point)
346            self.detector.orientation_unit = unit
347        elif key == u'beam_center_x':
348            self.detector.beam_center.x = float(data_point)
349            self.detector.beam_center_unit = unit
350        elif key == u'beam_center_y':
351            self.detector.beam_center.y = float(data_point)
352            self.detector.beam_center_unit = unit
353        elif key == u'x_pixel_size':
354            self.detector.pixel_size.x = float(data_point)
355            self.detector.pixel_size_unit = unit
356        elif key == u'y_pixel_size':
357            self.detector.pixel_size.y = float(data_point)
358            self.detector.pixel_size_unit = unit
359
360    def process_collimation(self, data_point, key, unit):
361        """
362        SAScollimation processor
363        :param data_point: Single point from an HDF5 data file
364        :param key: class name data_point was taken from
365        :param unit: unit attribute from data set
366        """
367        if key == u'distance':
368            self.collimation.length = data_point
369            self.collimation.length_unit = unit
370        elif key == u'name':
371            self.collimation.name = data_point
372
373    def process_aperture(self, data_point, key):
374        """
375        SASaperture processor
376        :param data_point: Single point from an HDF5 data file
377        :param key: class name data_point was taken from
378        """
379        if key == u'shape':
380            self.aperture.shape = data_point
381        elif key == u'x_gap':
382            self.aperture.size.x = data_point
383        elif key == u'y_gap':
384            self.aperture.size.y = data_point
385
386    def process_source(self, data_point, key, unit):
387        """
388        SASsource processor
389        :param data_point: Single point from an HDF5 data file
390        :param key: class name data_point was taken from
391        :param unit: unit attribute from data set
392        """
393        if key == u'incident_wavelength':
394            self.current_datainfo.source.wavelength = data_point
395            self.current_datainfo.source.wavelength_unit = unit
396        elif key == u'wavelength_max':
397            self.current_datainfo.source.wavelength_max = data_point
398            self.current_datainfo.source.wavelength_max_unit = unit
399        elif key == u'wavelength_min':
400            self.current_datainfo.source.wavelength_min = data_point
401            self.current_datainfo.source.wavelength_min_unit = unit
402        elif key == u'incident_wavelength_spread':
403            self.current_datainfo.source.wavelength_spread = data_point
404            self.current_datainfo.source.wavelength_spread_unit = unit
405        elif key == u'beam_size_x':
406            self.current_datainfo.source.beam_size.x = data_point
407            self.current_datainfo.source.beam_size_unit = unit
408        elif key == u'beam_size_y':
409            self.current_datainfo.source.beam_size.y = data_point
410            self.current_datainfo.source.beam_size_unit = unit
411        elif key == u'beam_shape':
412            self.current_datainfo.source.beam_shape = data_point
413        elif key == u'radiation':
414            self.current_datainfo.source.radiation = data_point
415
416    def process_process(self, data_point, key):
417        """
418        SASprocess processor
419        :param data_point: Single point from an HDF5 data file
420        :param key: class name data_point was taken from
421        """
422        if key == u'Title':  # CanSAS 2.0
423            self.process.name = data_point
424        elif key == u'name':  # NXcanSAS
425            self.process.name = data_point
426        elif key == u'description':
427            self.process.description = data_point
428        elif key == u'date':
429            self.process.date = data_point
430        elif key == u'term':
431            self.process.term = data_point
432        else:
433            self.process.notes.append(data_point)
434
435    def add_intermediate(self):
436        """
437        This method stores any intermediate objects within the final data set
438        after fully reading the set.
439
440        :param parent: The NXclass name for the h5py Group object that just
441                       finished being processed
442        """
443
444        if self.parent_class == u'SASprocess':
445            self.current_datainfo.process.append(self.process)
446            self.process = Process()
447        elif self.parent_class == u'SASdetector':
448            self.current_datainfo.detector.append(self.detector)
449            self.detector = Detector()
450        elif self.parent_class == u'SAStransmission_spectrum':
451            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
452            self.trans_spectrum = TransmissionSpectrum()
453        elif self.parent_class == u'SAScollimation':
454            self.current_datainfo.collimation.append(self.collimation)
455            self.collimation = Collimation()
456        elif self.parent_class == u'SASaperture':
457            self.collimation.aperture.append(self.aperture)
458            self.aperture = Aperture()
459        elif self.parent_class == u'SASdata':
460            if isinstance(self.current_dataset, plottable_2D):
461                self.data2d.append(self.current_dataset)
462            elif isinstance(self.current_dataset, plottable_1D):
463                self.data1d.append(self.current_dataset)
464
465    def final_data_cleanup(self):
466        """
467        Does some final cleanup and formatting on self.current_datainfo and
468        all data1D and data2D objects and then combines the data and info into
469        Data1D and Data2D objects
470        """
471        # Type cast data arrays to float64
472        if len(self.current_datainfo.trans_spectrum) > 0:
473            spectrum_list = []
474            for spectrum in self.current_datainfo.trans_spectrum:
475                spectrum.transmission = np.delete(spectrum.transmission, [0])
476                spectrum.transmission = spectrum.transmission.astype(np.float64)
477                spectrum.transmission_deviation = np.delete(
478                    spectrum.transmission_deviation, [0])
479                spectrum.transmission_deviation = \
480                    spectrum.transmission_deviation.astype(np.float64)
481                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
482                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
483                if len(spectrum.transmission) > 0:
484                    spectrum_list.append(spectrum)
485            self.current_datainfo.trans_spectrum = spectrum_list
486
487        # Append errors to dataset and reset class errors
488        self.current_datainfo.errors = self.errors
489        self.errors.clear()
490
491        # Combine all plottables with datainfo and append each to output
492        # Type cast data arrays to float64 and find min/max as appropriate
493        for dataset in self.data2d:
494            zeros = np.ones(dataset.data.size, dtype=bool)
495            try:
496                for i in range(0, dataset.mask.size - 1):
497                    zeros[i] = dataset.mask[i]
498            except:
499                self.errors.add(sys.exc_value)
500            dataset.mask = zeros
501            # Calculate the actual Q matrix
502            try:
503                if dataset.q_data.size <= 1:
504                    dataset.q_data = np.sqrt(dataset.qx_data
505                                             * dataset.qx_data
506                                             + dataset.qy_data
507                                             * dataset.qy_data)
508            except:
509                dataset.q_data = None
510
511            if dataset.data.ndim == 2:
512                (n_rows, n_cols) = dataset.data.shape
513                dataset.y_bins = dataset.qy_data[0::n_cols]
514                dataset.x_bins = dataset.qx_data[:n_cols]
515                dataset.data = dataset.data.flatten()
516            self.current_dataset = dataset
517            self.send_to_output()
518
519        for dataset in self.data1d:
520            self.current_dataset = dataset
521            self.send_to_output()
522
523    def add_data_set(self, key=""):
524        """
525        Adds the current_dataset to the list of outputs after preforming final
526        processing on the data and then calls a private method to generate a
527        new data set.
528
529        :param key: NeXus group name for current tree level
530        """
531
532        if self.current_datainfo and self.current_dataset:
533            self.final_data_cleanup()
534        self.data1d = []
535        self.data2d = []
536        self.current_datainfo = DataInfo()
537
538    def _initialize_new_data_set(self, value=None):
539        """
540        A private class method to generate a new 1D or 2D data object based on
541        the type of data within the set. Outside methods should call
542        add_data_set() to be sure any existing data is stored properly.
543
544        :param parent_list: List of names of parent elements
545        """
546        if self._is2d(value):
547            self.current_dataset = plottable_2D()
548        else:
549            x = np.array(0)
550            y = np.array(0)
551            self.current_dataset = plottable_1D(x, y)
552        self.current_datainfo.filename = self.raw_data.filename
553        self.mask_name = ""
554        self.i_name = ""
555        self.i_node = ""
556        self.q_name = []
557        self.q_uncertainties = []
558        self.q_resolutions = []
559        self.i_uncertainties = ""
560
561    def _find_data_attributes(self, value):
562        """
563        A class to find the indices for Q, the name of the Qdev and Idev, and
564        the name of the mask.
565        :param value: SASdata/NXdata HDF5 Group
566        """
567        signal = "I"
568        i_axes = ["Q"]
569        q_indices = [0]
570        attrs = value.attrs
571        if hasattr(attrs, "signal"):
572            signal = attrs.get("signal")
573        if hasattr(attrs, "I_axes"):
574            i_axes = np.array(str(attrs.get("I_axes")).split(","))
575        if hasattr(attrs, "Q_indices"):
576            q_indices = np.int_(attrs.get("Q_indices").split(","))
577        keys = value.keys()
578        self.mask_name = attrs.get("mask")
579        for val in q_indices:
580            self.q_name.append(i_axes[val])
581        self.i_name = signal
582        self.i_node = value.get(self.i_name)
583        for item in self.q_name:
584            if item in keys:
585                q_vals = value.get(item)
586                if q_vals.attrs.get("uncertainties"):
587                    self.q_uncertainties = q_vals.attrs.get("uncertainties")
588                elif q_vals.attrs.get("uncertainty"):
589                    self.q_uncertainties = q_vals.attrs.get("uncertainty")
590                if isinstance(self.q_uncertainties, str):
591                    self.q_uncertainties = [self.q_uncertainties]
592                if q_vals.attrs.get("resolutions"):
593                    self.q_resolutions = q_vals.attrs.get("resolutions")
594                if isinstance(self.q_resolutions, str):
595                    self.q_resolutions = [self.q_resolutions]
596        if self.i_name in keys:
597            i_vals = value.get(self.i_name)
598            self.i_uncertainties = i_vals.attrs.get("uncertainties")
599            if self.i_uncertainties is None:
600                self.i_uncertainties = i_vals.attrs.get("uncertainty")
601
602    def _is2d(self, value, basename="I"):
603        """
604        A private class to determine if the data set is 1d or 2d.
605
606        :param parent_list: List of parents nodes in the HDF5 file
607        :param basename: Approximate name of an entry to search for
608        :return: True if 2D, otherwise false
609        """
610
611        vals = value.get(basename)
612        return (vals is not None and vals.shape is not None
613                and len(vals.shape) != 1)
614
615    def _create_unique_key(self, dictionary, name, numb=0):
616        """
617        Create a unique key value for any dictionary to prevent overwriting
618        Recurses until a unique key value is found.
619
620        :param dictionary: A dictionary with any number of entries
621        :param name: The index of the item to be added to dictionary
622        :param numb: The number to be appended to the name, starts at 0
623        :return: The new name for the dictionary entry
624        """
625        if dictionary.get(name) is not None:
626            numb += 1
627            name = name.split("_")[0]
628            name += "_{0}".format(numb)
629            name = self._create_unique_key(dictionary, name, numb)
630        return name
631
632    def _get_unit(self, value):
633        """
634        Find the unit for a particular value within the h5py dictionary
635
636        :param value: attribute dictionary for a particular value set
637        :return: unit for the value passed to the method
638        """
639        unit = h5attr(value, u'units')
640        if unit is None:
641            unit = h5attr(value, u'unit')
642        return unit
Note: See TracBrowser for help on using the repository browser.