source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ 9e0dd49

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since 9e0dd49 was 9e0dd49, checked in by krzywon, 7 years ago

New method to read in dq and di data names.

  • Property mode set to 100644
File size: 26.5 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from sas.sascalc.dataloader.data_info import combine_data_info_with_plottable
15
16
17class Reader():
18    """
19    A class for reading in CanSAS v2.0 data files. The existing iteration opens
20    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
21    number of data sets may be present within the file and any dimensionality
22    of data may be used. Currently 1D and 2D SAS data sets are supported, but
23    future implementations will include 1D and 2D SESANS data.
24
25    Any number of SASdata sets may be present in a SASentry and the data within
26    can be either 1D I(Q) or 2D I(Qx, Qy).
27
28    Also supports reading NXcanSAS formatted HDF5 files
29
30    :Dependencies:
31        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
32    """
33
34    # CanSAS version
35    cansas_version = 2.0
36    # Logged warnings or messages
37    logging = None
38    # List of errors for the current data set
39    errors = None
40    # Raw file contents to be processed
41    raw_data = None
42    # Data info currently being read in
43    current_datainfo = None
44    # SASdata set currently being read in
45    current_dataset = None
46    # List of plottable1D objects that should be linked to the current_datainfo
47    data1d = None
48    # List of plottable2D objects that should be linked to the current_datainfo
49    data2d = None
50    # Data type name
51    type_name = "CanSAS 2.0"
52    # Wildcards
53    type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"]
54    # List of allowed extensions
55    ext = ['.h5', '.H5']
56    # Flag to bypass extension check
57    allow_all = True
58    # List of files to return
59    output = None
60
61    def read(self, filename):
62        """
63        This is the general read method that all SasView data_loaders must have.
64
65        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
66        :return: List of Data1D/2D objects and/or a list of errors.
67        """
68        # Reinitialize when loading a new data file to reset all class variables
69        self.reset_class_variables()
70        # Check that the file exists
71        if os.path.isfile(filename):
72            basename = os.path.basename(filename)
73            _, extension = os.path.splitext(basename)
74            # If the file type is not allowed, return empty list
75            if extension in self.ext or self.allow_all:
76                # Load the data file
77                self.raw_data = h5py.File(filename, 'r')
78                # Read in all child elements of top level SASroot
79                self.read_children(self.raw_data, [])
80                # Add the last data set to the list of outputs
81                self.add_data_set()
82                # Close the data file
83                self.raw_data.close()
84        # Return data set(s)
85        return self.output
86
87    def reset_class_variables(self):
88        """
89        Create the reader object and define initial states for class variables
90        """
91        self.current_datainfo = None
92        self.current_dataset = None
93        self.data1d = []
94        self.data2d = []
95        self.raw_data = None
96        self.errors = set()
97        self.logging = []
98        self.output = []
99        self.parent_class = u''
100        self.detector = Detector()
101        self.collimation = Collimation()
102        self.aperture = Aperture()
103        self.process = Process()
104        self.trans_spectrum = TransmissionSpectrum()
105
106    def read_children(self, data, parent_list):
107        """
108        A recursive method for stepping through the hierarchical data file.
109
110        :param data: h5py Group object of any kind
111        :param parent: h5py Group parent name
112        """
113
114        # Loop through each element of the parent and process accordingly
115        for key in data.keys():
116            # Get all information for the current key
117            value = data.get(key)
118            if value.attrs.get(u'canSAS_class') is not None:
119                class_name = value.attrs.get(u'canSAS_class')
120            elif value.attrs.get(u'NX_class') is not None:
121                class_name = value.attrs.get(u'NX_class')
122            else:
123                class_name = key
124            if class_name is not None:
125                class_prog = re.compile(class_name)
126            else:
127                class_prog = re.compile(value.name)
128
129            if isinstance(value, h5py.Group):
130                self.parent_class = class_name
131                parent_list.append(key)
132                # If a new sasentry, store the current data sets and create
133                # a fresh Data1D/2D object
134                if class_prog.match(u'SASentry'):
135                    self.add_data_set(key)
136                elif class_prog.match(u'SASdata'):
137                    self._initialize_new_data_set(parent_list)
138                    self._find_data_attributes(value)
139                # Recursion step to access data within the group
140                self.read_children(value, parent_list)
141                self.add_intermediate()
142                parent_list.remove(key)
143
144            elif isinstance(value, h5py.Dataset):
145                # If this is a dataset, store the data appropriately
146                data_set = data[key][:]
147                unit = self._get_unit(value)
148
149                for data_point in data_set:
150                    # Top Level Meta Data
151                    if key == u'definition':
152                        self.current_datainfo.meta_data['reader'] = data_point
153                    # Run
154                    elif key == u'run':
155                        self.current_datainfo.run.append(data_point)
156                        try:
157                            run_name = value.attrs['name']
158                            run_dict = {data_point: run_name}
159                            self.current_datainfo.run_name = run_dict
160                        except:
161                            pass
162                    # Title
163                    elif key == u'title':
164                        self.current_datainfo.title = data_point
165                    # Note
166                    elif key == u'SASnote':
167                        self.current_datainfo.notes.append(data_point)
168                    # Sample Information
169                    elif self.parent_class == u'SASsample':
170                        self.process_sample(data_point, key)
171                    # Instrumental Information
172                    elif (key == u'name'
173                          and self.parent_class == u'SASinstrument'):
174                        self.current_datainfo.instrument = data_point
175                    # Detector
176                    elif self.parent_class == u'SASdetector':
177                        self.process_detector(data_point, key, unit)
178                    # Collimation
179                    elif self.parent_class == u'SAScollimation':
180                        self.process_collimation(data_point, key, unit)
181                    # Aperture
182                    elif self.parent_class == u'SASaperture':
183                        self.process_aperture(data_point, key)
184                    # Process Information
185                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
186                        self.process_process(data_point, key)
187                    # Source
188                    elif self.parent_class == u'SASsource':
189                        self.process_source(data_point, key, unit)
190                    # Everything else goes in meta_data
191                    elif self.parent_class == u'SASdata':
192                        self.process_data_object(data_set, key, unit)
193                        break
194                    elif self.parent_class == u'SAStransmission_spectrum':
195                        self.process_trans_spectrum(data_set, key)
196                        break
197                    else:
198                        new_key = self._create_unique_key(
199                            self.current_datainfo.meta_data, key)
200                        self.current_datainfo.meta_data[new_key] = data_point
201
202            else:
203                # I don't know if this reachable code
204                self.errors.add("ShouldNeverHappenException")
205
206    def process_data_object(self, data_set, key, unit):
207        """
208        SASdata processor method
209        :param data_set: data from HDF5 file
210        :param key: canSAS_class attribute
211        :param unit: unit attribute
212        """
213        # FIXME: check attributes of SASdata for Q, dQ, Mask, etc.
214        if key == u'I':
215            if isinstance(self.current_dataset, plottable_2D):
216                self.current_dataset.data = data_set
217                self.current_dataset.zaxis("Intensity", unit)
218            else:
219                self.current_dataset.y = data_set.flatten()
220                self.current_dataset.yaxis("Intensity", unit)
221        elif key == u'Idev':
222            if isinstance(self.current_dataset, plottable_2D):
223                self.current_dataset.err_data = data_set.flatten()
224            else:
225                self.current_dataset.dy = data_set.flatten()
226        elif key == u'Q':
227            self.current_dataset.xaxis("Q", unit)
228            if isinstance(self.current_dataset, plottable_2D):
229                self.current_dataset.q = data_set.flatten()
230            else:
231                self.current_dataset.x = data_set.flatten()
232        elif key == u'Qdev':
233            self.current_dataset.dx = data_set.flatten()
234        elif key == u'dQw':
235            self.current_dataset.dxw = data_set.flatten()
236        elif key == u'dQl':
237            self.current_dataset.dxl = data_set.flatten()
238        elif key == u'Qy':
239            self.current_dataset.yaxis("Q_y", unit)
240            self.current_dataset.qy_data = data_set.flatten()
241        elif key == u'Qydev':
242            self.current_dataset.dqy_data = data_set.flatten()
243        elif key == u'Qx':
244            self.current_dataset.xaxis("Q_x", unit)
245            self.current_dataset.qx_data = data_set.flatten()
246        elif key == u'Qxdev':
247            self.current_dataset.dqx_data = data_set.flatten()
248        elif key == u'Mask':
249            self.current_dataset.mask = data_set.flatten()
250        elif key == u'wavelength':
251            self.current_datainfo.source.wavelength = data_set[0]
252            self.current_datainfo.source.wavelength_unit = unit
253
254    def process_trans_spectrum(self, data_set, key):
255        """
256        SAStransmission_spectrum processor
257        :param data_set: data from HDF5 file
258        :param key: canSAS_class attribute
259        """
260        if key == u'T':
261            self.trans_spectrum.transmission = data_set.flatten()
262        elif key == u'Tdev':
263            self.trans_spectrum.transmission_deviation = data_set.flatten()
264        elif key == u'lambda':
265            self.trans_spectrum.wavelength = data_set.flatten()
266
267    def process_sample(self, data_point, key):
268        """
269        SASsample processor
270        :param data_point: Single point from an HDF5 data file
271        :param key: class name data_point was taken from
272        """
273        if key == u'Title':
274            self.current_datainfo.sample.name = data_point
275        elif key == u'name':
276            self.current_datainfo.sample.name = data_point
277        elif key == u'ID':
278            self.current_datainfo.sample.name = data_point
279        elif key == u'thickness':
280            self.current_datainfo.sample.thickness = data_point
281        elif key == u'temperature':
282            self.current_datainfo.sample.temperature = data_point
283        elif key == u'transmission':
284            self.current_datainfo.sample.transmission = data_point
285        elif key == u'x_position':
286            self.current_datainfo.sample.position.x = data_point
287        elif key == u'y_position':
288            self.current_datainfo.sample.position.y = data_point
289        elif key == u'pitch':
290            self.current_datainfo.sample.orientation.x = data_point
291        elif key == u'yaw':
292            self.current_datainfo.sample.orientation.y = data_point
293        elif key == u'roll':
294            self.current_datainfo.sample.orientation.z = data_point
295        elif key == u'details':
296            self.current_datainfo.sample.details.append(data_point)
297
298    def process_detector(self, data_point, key, unit):
299        """
300        SASdetector processor
301        :param data_point: Single point from an HDF5 data file
302        :param key: class name data_point was taken from
303        :param unit: unit attribute from data set
304        """
305        if key == u'name':
306            self.detector.name = data_point
307        elif key == u'SDD':
308            self.detector.distance = float(data_point)
309            self.detector.distance_unit = unit
310        elif key == u'slit_length':
311            self.detector.slit_length = float(data_point)
312            self.detector.slit_length_unit = unit
313        elif key == u'x_position':
314            self.detector.offset.x = float(data_point)
315            self.detector.offset_unit = unit
316        elif key == u'y_position':
317            self.detector.offset.y = float(data_point)
318            self.detector.offset_unit = unit
319        elif key == u'pitch':
320            self.detector.orientation.x = float(data_point)
321            self.detector.orientation_unit = unit
322        elif key == u'roll':
323            self.detector.orientation.z = float(data_point)
324            self.detector.orientation_unit = unit
325        elif key == u'yaw':
326            self.detector.orientation.y = float(data_point)
327            self.detector.orientation_unit = unit
328        elif key == u'beam_center_x':
329            self.detector.beam_center.x = float(data_point)
330            self.detector.beam_center_unit = unit
331        elif key == u'beam_center_y':
332            self.detector.beam_center.y = float(data_point)
333            self.detector.beam_center_unit = unit
334        elif key == u'x_pixel_size':
335            self.detector.pixel_size.x = float(data_point)
336            self.detector.pixel_size_unit = unit
337        elif key == u'y_pixel_size':
338            self.detector.pixel_size.y = float(data_point)
339            self.detector.pixel_size_unit = unit
340
341    def process_collimation(self, data_point, key, unit):
342        """
343        SAScollimation processor
344        :param data_point: Single point from an HDF5 data file
345        :param key: class name data_point was taken from
346        :param unit: unit attribute from data set
347        """
348        if key == u'distance':
349            self.collimation.length = data_point
350            self.collimation.length_unit = unit
351        elif key == u'name':
352            self.collimation.name = data_point
353
354    def process_aperture(self, data_point, key):
355        """
356        SASaperture processor
357        :param data_point: Single point from an HDF5 data file
358        :param key: class name data_point was taken from
359        """
360        if key == u'shape':
361            self.aperture.shape = data_point
362        elif key == u'x_gap':
363            self.aperture.size.x = data_point
364        elif key == u'y_gap':
365            self.aperture.size.y = data_point
366
367    def process_source(self, data_point, key, unit):
368        """
369        SASsource processor
370        :param data_point: Single point from an HDF5 data file
371        :param key: class name data_point was taken from
372        :param unit: unit attribute from data set
373        """
374        if key == u'incident_wavelength':
375            self.current_datainfo.source.wavelength = data_point
376            self.current_datainfo.source.wavelength_unit = unit
377        elif key == u'wavelength_max':
378            self.current_datainfo.source.wavelength_max = data_point
379            self.current_datainfo.source.wavelength_max_unit = unit
380        elif key == u'wavelength_min':
381            self.current_datainfo.source.wavelength_min = data_point
382            self.current_datainfo.source.wavelength_min_unit = unit
383        elif key == u'incident_wavelength_spread':
384            self.current_datainfo.source.wavelength_spread = data_point
385            self.current_datainfo.source.wavelength_spread_unit = unit
386        elif key == u'beam_size_x':
387            self.current_datainfo.source.beam_size.x = data_point
388            self.current_datainfo.source.beam_size_unit = unit
389        elif key == u'beam_size_y':
390            self.current_datainfo.source.beam_size.y = data_point
391            self.current_datainfo.source.beam_size_unit = unit
392        elif key == u'beam_shape':
393            self.current_datainfo.source.beam_shape = data_point
394        elif key == u'radiation':
395            self.current_datainfo.source.radiation = data_point
396
397    def process_process(self, data_point, key):
398        """
399        SASprocess processor
400        :param data_point: Single point from an HDF5 data file
401        :param key: class name data_point was taken from
402        """
403        if key == u'Title':  # CanSAS 2.0
404            self.process.name = data_point
405        elif key == u'name':  # NXcanSAS
406            self.process.name = data_point
407        elif key == u'description':
408            self.process.description = data_point
409        elif key == u'date':
410            self.process.date = data_point
411        elif key == u'term':
412            self.process.term = data_point
413        else:
414            self.process.notes.append(data_point)
415
416    def add_intermediate(self):
417        """
418        This method stores any intermediate objects within the final data set
419        after fully reading the set.
420
421        :param parent: The NXclass name for the h5py Group object that just
422                       finished being processed
423        """
424
425        if self.parent_class == u'SASprocess':
426            self.current_datainfo.process.append(self.process)
427            self.process = Process()
428        elif self.parent_class == u'SASdetector':
429            self.current_datainfo.detector.append(self.detector)
430            self.detector = Detector()
431        elif self.parent_class == u'SAStransmission_spectrum':
432            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
433            self.trans_spectrum = TransmissionSpectrum()
434        elif self.parent_class == u'SAScollimation':
435            self.current_datainfo.collimation.append(self.collimation)
436            self.collimation = Collimation()
437        elif self.parent_class == u'SASaperture':
438            self.collimation.aperture.append(self.aperture)
439            self.aperture = Aperture()
440        elif self.parent_class == u'SASdata':
441            if isinstance(self.current_dataset, plottable_2D):
442                self.data2d.append(self.current_dataset)
443            elif isinstance(self.current_dataset, plottable_1D):
444                self.data1d.append(self.current_dataset)
445
446    def final_data_cleanup(self):
447        """
448        Does some final cleanup and formatting on self.current_datainfo and
449        all data1D and data2D objects and then combines the data and info into
450        Data1D and Data2D objects
451        """
452
453        # Type cast data arrays to float64
454        if len(self.current_datainfo.trans_spectrum) > 0:
455            spectrum_list = []
456            for spectrum in self.current_datainfo.trans_spectrum:
457                spectrum.transmission = np.delete(spectrum.transmission, [0])
458                spectrum.transmission = spectrum.transmission.astype(np.float64)
459                spectrum.transmission_deviation = np.delete(
460                    spectrum.transmission_deviation, [0])
461                spectrum.transmission_deviation = \
462                    spectrum.transmission_deviation.astype(np.float64)
463                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
464                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
465                if len(spectrum.transmission) > 0:
466                    spectrum_list.append(spectrum)
467            self.current_datainfo.trans_spectrum = spectrum_list
468
469        # Append errors to dataset and reset class errors
470        self.current_datainfo.errors = self.errors
471        self.errors.clear()
472
473        # Combine all plottables with datainfo and append each to output
474        # Type cast data arrays to float64 and find min/max as appropriate
475        for dataset in self.data2d:
476            dataset.data = dataset.data.astype(np.float64)
477            dataset.err_data = dataset.err_data.astype(np.float64)
478            if dataset.qx_data is not None:
479                dataset.xmin = np.min(dataset.qx_data)
480                dataset.xmax = np.max(dataset.qx_data)
481                dataset.qx_data = dataset.qx_data.astype(np.float64)
482            if dataset.dqx_data is not None:
483                dataset.dqx_data = dataset.dqx_data.astype(np.float64)
484            if dataset.qy_data is not None:
485                dataset.ymin = np.min(dataset.qy_data)
486                dataset.ymax = np.max(dataset.qy_data)
487                dataset.qy_data = dataset.qy_data.astype(np.float64)
488            if dataset.dqy_data is not None:
489                dataset.dqy_data = dataset.dqy_data.astype(np.float64)
490            if dataset.q_data is not None:
491                dataset.q_data = dataset.q_data.astype(np.float64)
492            zeros = np.ones(dataset.data.size, dtype=bool)
493            try:
494                for i in range(0, dataset.mask.size - 1):
495                    zeros[i] = dataset.mask[i]
496            except:
497                self.errors.add(sys.exc_value)
498            dataset.mask = zeros
499            # Calculate the actual Q matrix
500            try:
501                if dataset.q_data.size <= 1:
502                    dataset.q_data = np.sqrt(dataset.qx_data
503                                             * dataset.qx_data
504                                             + dataset.qy_data
505                                             * dataset.qy_data)
506            except:
507                dataset.q_data = None
508
509            if dataset.data.ndim == 2:
510                (n_rows, n_cols) = dataset.data.shape
511                dataset.y_bins = dataset.qy_data[0::n_cols]
512                dataset.x_bins = dataset.qx_data[:n_cols]
513                dataset.data = dataset.data.flatten()
514
515            final_dataset = combine_data_info_with_plottable(
516                dataset, self.current_datainfo)
517            self.output.append(final_dataset)
518
519        for dataset in self.data1d:
520            if dataset.x is not None:
521                dataset.x = dataset.x.astype(np.float64)
522                dataset.xmin = np.min(dataset.x)
523                dataset.xmax = np.max(dataset.x)
524            if dataset.y is not None:
525                dataset.y = dataset.y.astype(np.float64)
526                dataset.ymin = np.min(dataset.y)
527                dataset.ymax = np.max(dataset.y)
528            if dataset.dx is not None:
529                dataset.dx = dataset.dx.astype(np.float64)
530            if dataset.dxl is not None:
531                dataset.dxl = dataset.dxl.astype(np.float64)
532            if dataset.dxw is not None:
533                dataset.dxw = dataset.dxw.astype(np.float64)
534            if dataset.dy is not None:
535                dataset.dy = dataset.dy.astype(np.float64)
536            final_dataset = combine_data_info_with_plottable(
537                dataset, self.current_datainfo)
538            self.output.append(final_dataset)
539
540    def add_data_set(self, key=""):
541        """
542        Adds the current_dataset to the list of outputs after preforming final
543        processing on the data and then calls a private method to generate a
544        new data set.
545
546        :param key: NeXus group name for current tree level
547        """
548
549        if self.current_datainfo and self.current_dataset:
550            self.final_data_cleanup()
551        self.data1d = []
552        self.data2d = []
553        self.current_datainfo = DataInfo()
554
555
556    def _initialize_new_data_set(self, parent_list=None):
557        """
558        A private class method to generate a new 1D or 2D data object based on
559        the type of data within the set. Outside methods should call
560        add_data_set() to be sure any existing data is stored properly.
561
562        :param parent_list: List of names of parent elements
563        """
564
565        if parent_list is None:
566            parent_list = []
567        if self._find_intermediate(parent_list, "Qx"):
568            self.current_dataset = plottable_2D()
569        else:
570            x = np.array(0)
571            y = np.array(0)
572            self.current_dataset = plottable_1D(x, y)
573        self.current_datainfo.filename = self.raw_data.filename
574
575    def _find_data_attributes(self, value):
576        attrs = value.attrs
577        signal = attrs.get("signal")
578        i_axes = attrs.get("I_axes")
579        q_indices = attrs.get("Q_indices")
580        mask = attrs.get("mask")
581        mask_indices = attrs.get("Mask_indices")
582        keys = value.keys()
583        self.q_name = i_axes[q_indices]
584        self.mask_name = mask[mask_indices]
585        self.i_name = signal
586        if self.q_name in keys:
587            q_vals = value.get(self.q_name)
588            self.q_uncertainties = q_vals.attrs.get("uncertainties")
589            self.q_resolutions = q_vals.attrs.get("resolutions")
590        if self.i_name in keys:
591            i_vals = value.get(self.i_name)
592            self.i_uncertainties = i_vals.attrs.get("uncertainties")
593
594    def _find_intermediate(self, parent_list, basename=""):
595        """
596        A private class used to find an entry by either using a direct key or
597        knowing the approximate basename.
598
599        :param parent_list: List of parents nodes in the HDF5 file
600        :param basename: Approximate name of an entry to search for
601        :return:
602        """
603
604        entry = False
605        key_prog = re.compile(basename)
606        top = self.raw_data
607        for parent in parent_list:
608            top = top.get(parent)
609        for key in top.keys():
610            if key_prog.match(key):
611                entry = True
612                break
613        return entry
614
615    def _create_unique_key(self, dictionary, name, numb=0):
616        """
617        Create a unique key value for any dictionary to prevent overwriting
618        Recurses until a unique key value is found.
619
620        :param dictionary: A dictionary with any number of entries
621        :param name: The index of the item to be added to dictionary
622        :param numb: The number to be appended to the name, starts at 0
623        :return: The new name for the dictionary entry
624        """
625        if dictionary.get(name) is not None:
626            numb += 1
627            name = name.split("_")[0]
628            name += "_{0}".format(numb)
629            name = self._create_unique_key(dictionary, name, numb)
630        return name
631
632    def _get_unit(self, value):
633        """
634        Find the unit for a particular value within the h5py dictionary
635
636        :param value: attribute dictionary for a particular value set
637        :return: unit for the value passed to the method
638        """
639        unit = value.attrs.get(u'units')
640        if unit is None:
641            unit = value.attrs.get(u'unit')
642        # Convert the unit formats
643        if unit == "1/A":
644            unit = "A^{-1}"
645        elif unit == "1/cm":
646            unit = "cm^{-1}"
647        return unit
Note: See TracBrowser for help on using the repository browser.