source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ d7fd7be

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since d7fd7be was d7fd7be, checked in by krzywon, 6 years ago

Merge branch 'master' into ticket-976

  • Property mode set to 100644
File size: 26.3 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from ..data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from ..data_info import combine_data_info_with_plottable
15from ..loader_exceptions import FileContentsException, DefaultReaderException
16from ..file_reader_base_class import FileReader, decode
17
18def h5attr(node, key, default=None):
19    return decode(node.attrs.get(key, default))
20
21class Reader(FileReader):
22    """
23    A class for reading in CanSAS v2.0 data files. The existing iteration opens
24    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
25    number of data sets may be present within the file and any dimensionality
26    of data may be used. Currently 1D and 2D SAS data sets are supported, but
27    future implementations will include 1D and 2D SESANS data.
28
29    Any number of SASdata sets may be present in a SASentry and the data within
30    can be either 1D I(Q) or 2D I(Qx, Qy).
31
32    Also supports reading NXcanSAS formatted HDF5 files
33
34    :Dependencies:
35        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
36    """
37
38    # CanSAS version
39    cansas_version = 2.0
40    # Logged warnings or messages
41    logging = None
42    # List of errors for the current data set
43    errors = None
44    # Raw file contents to be processed
45    raw_data = None
46    # List of plottable1D objects that should be linked to the current_datainfo
47    data1d = None
48    # List of plottable2D objects that should be linked to the current_datainfo
49    data2d = None
50    # Data type name
51    type_name = "CanSAS 2.0"
52    # Wildcards
53    type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"]
54    # List of allowed extensions
55    ext = ['.h5', '.H5']
56    # Flag to bypass extension check
57    allow_all = True
58
59    def get_file_contents(self):
60        """
61        This is the general read method that all SasView data_loaders must have.
62
63        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
64        :return: List of Data1D/2D objects and/or a list of errors.
65        """
66        # Reinitialize when loading a new data file to reset all class variables
67        self.reset_state()
68
69        filename = self.f_open.name
70        self.f_open.close() # IO handled by h5py
71
72        # Check that the file exists
73        if os.path.isfile(filename):
74            basename = os.path.basename(filename)
75            _, extension = os.path.splitext(basename)
76            # If the file type is not allowed, return empty list
77            if extension in self.ext or self.allow_all:
78                # Load the data file
79                try:
80                    self.raw_data = h5py.File(filename, 'r')
81                except Exception as e:
82                    if extension not in self.ext:
83                        msg = "CanSAS2.0 HDF5 Reader could not load file {}".format(basename + extension)
84                        raise DefaultReaderException(msg)
85                    raise FileContentsException(e.message)
86                try:
87                    # Read in all child elements of top level SASroot
88                    self.read_children(self.raw_data, [])
89                    # Add the last data set to the list of outputs
90                    self.add_data_set()
91                except Exception as exc:
92                    raise FileContentsException(exc.message)
93                finally:
94                    # Close the data file
95                    self.raw_data.close()
96
97                for dataset in self.output:
98                    if isinstance(dataset, Data1D):
99                        if dataset.x.size < 5:
100                            self.output = []
101                            raise FileContentsException("Fewer than 5 data points found.")
102
103    def reset_state(self):
104        """
105        Create the reader object and define initial states for class variables
106        """
107        super(Reader, self).reset_state()
108        self.data1d = []
109        self.data2d = []
110        self.raw_data = None
111        self.errors = set()
112        self.logging = []
113        self.output = []
114        self.q_name = []
115        self.mask_name = u''
116        self.i_name = u''
117        self.i_node = u''
118        self.q_uncertainties = u''
119        self.q_resolutions = u''
120        self.i_uncertainties = u''
121        self.parent_class = u''
122        self.detector = Detector()
123        self.collimation = Collimation()
124        self.aperture = Aperture()
125        self.process = Process()
126        self.trans_spectrum = TransmissionSpectrum()
127
128    def read_children(self, data, parent_list):
129        """
130        A recursive method for stepping through the hierarchical data file.
131
132        :param data: h5py Group object of any kind
133        :param parent: h5py Group parent name
134        """
135
136        # Loop through each element of the parent and process accordingly
137        for key in data.keys():
138            # Get all information for the current key
139            value = data.get(key)
140            class_name = h5attr(value, u'canSAS_class')
141            if class_name is None:
142                class_name = h5attr(value, u'NX_class')
143            if class_name is not None:
144                class_prog = re.compile(class_name)
145            else:
146                class_prog = re.compile(value.name)
147
148            if isinstance(value, h5py.Group):
149                # Set parent class before recursion
150                self.parent_class = class_name
151                parent_list.append(key)
152                # If a new sasentry, store the current data sets and create
153                # a fresh Data1D/2D object
154                if class_prog.match(u'SASentry'):
155                    self.add_data_set(key)
156                elif class_prog.match(u'SASdata'):
157                    self._find_data_attributes(value)
158                    self._initialize_new_data_set(parent_list)
159                # Recursion step to access data within the group
160                self.read_children(value, parent_list)
161                # Reset parent class when returning from recursive method
162                self.parent_class = class_name
163                self.add_intermediate()
164                parent_list.remove(key)
165
166            elif isinstance(value, h5py.Dataset):
167                # If this is a dataset, store the data appropriately
168                data_set = data[key][:]
169                unit = self._get_unit(value)
170
171                for data_point in data_set:
172                    if isinstance(data_point, np.ndarray):
173                        if data_point.dtype.char == 'S':
174                            data_point = decode(bytes(data_point))
175                    else:
176                        data_point = decode(data_point)
177                    # Top Level Meta Data
178                    if key == u'definition':
179                        self.current_datainfo.meta_data['reader'] = data_point
180                    # Run
181                    elif key == u'run':
182                        self.current_datainfo.run.append(data_point)
183                        try:
184                            run_name = h5attr(value, 'name')
185                            run_dict = {data_point: run_name}
186                            self.current_datainfo.run_name = run_dict
187                        except Exception:
188                            pass
189                    # Title
190                    elif key == u'title':
191                        self.current_datainfo.title = data_point
192                    # Note
193                    elif key == u'SASnote':
194                        self.current_datainfo.notes.append(data_point)
195                    # Sample Information
196                    elif self.parent_class == u'SASsample':
197                        self.process_sample(data_point, key)
198                    # Instrumental Information
199                    elif (key == u'name'
200                          and self.parent_class == u'SASinstrument'):
201                        self.current_datainfo.instrument = data_point
202                    # Detector
203                    elif self.parent_class == u'SASdetector':
204                        self.process_detector(data_point, key, unit)
205                    # Collimation
206                    elif self.parent_class == u'SAScollimation':
207                        self.process_collimation(data_point, key, unit)
208                    # Aperture
209                    elif self.parent_class == u'SASaperture':
210                        self.process_aperture(data_point, key)
211                    # Process Information
212                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
213                        self.process_process(data_point, key)
214                    # Source
215                    elif self.parent_class == u'SASsource':
216                        self.process_source(data_point, key, unit)
217                    # Everything else goes in meta_data
218                    elif self.parent_class == u'SASdata':
219                        self.process_data_object(data_set, key, unit)
220                        break
221                    elif self.parent_class == u'SAStransmission_spectrum':
222                        self.process_trans_spectrum(data_set, key)
223                        break
224                    else:
225                        new_key = self._create_unique_key(
226                            self.current_datainfo.meta_data, key)
227                        self.current_datainfo.meta_data[new_key] = data_point
228
229            else:
230                # I don't know if this reachable code
231                self.errors.add("ShouldNeverHappenException")
232
233    def process_data_object(self, data_set, key, unit):
234        """
235        SASdata processor method
236        :param data_set: data from HDF5 file
237        :param key: canSAS_class attribute
238        :param unit: unit attribute
239        """
240        # FIXME: check attributes of SASdata for Q, dQ, Mask, etc.
241        if key == self.i_name:
242            if isinstance(self.current_dataset, plottable_2D):
243                self.current_dataset.data = data_set
244                self.current_dataset.zaxis("Intensity", unit)
245            else:
246                self.current_dataset.y = data_set.flatten()
247                self.current_dataset.yaxis("Intensity", unit)
248        elif key == self.i_uncertainties:
249            if isinstance(self.current_dataset, plottable_2D):
250                self.current_dataset.err_data = data_set.flatten()
251            else:
252                self.current_dataset.dy = data_set.flatten()
253        elif key in self.q_name:
254            self.current_dataset.xaxis("Q", unit)
255            if isinstance(self.current_dataset, plottable_2D):
256                self.current_dataset.q = data_set.flatten()
257            else:
258                self.current_dataset.x = data_set.flatten()
259        elif key in self.q_resolutions:
260            if key == u'dQw':
261                self.current_dataset.dxw = data_set.flatten()
262            elif key == u'dQl':
263                self.current_dataset.dxl = data_set.flatten()
264            else:
265                self.current_dataset.dx = data_set.flatten()
266        elif key == u'Qy':
267            self.current_dataset.yaxis("Q_y", unit)
268            self.current_dataset.qy_data = data_set.flatten()
269        elif key == u'Qydev':
270            self.current_dataset.dqy_data = data_set.flatten()
271        elif key == u'Qx':
272            self.current_dataset.xaxis("Q_x", unit)
273            self.current_dataset.qx_data = data_set.flatten()
274        elif key == u'Qxdev':
275            self.current_dataset.dqx_data = data_set.flatten()
276        elif key == self.mask_name:
277            self.current_dataset.mask = data_set.flatten()
278        elif key == u'wavelength':
279            self.current_datainfo.source.wavelength = data_set[0]
280            self.current_datainfo.source.wavelength_unit = unit
281
282    def process_trans_spectrum(self, data_set, key):
283        """
284        SAStransmission_spectrum processor
285        :param data_set: data from HDF5 file
286        :param key: canSAS_class attribute
287        """
288        if key == u'T':
289            self.trans_spectrum.transmission = data_set.flatten()
290        elif key == u'Tdev':
291            self.trans_spectrum.transmission_deviation = data_set.flatten()
292        elif key == u'lambda':
293            self.trans_spectrum.wavelength = data_set.flatten()
294
295    def process_sample(self, data_point, key):
296        """
297        SASsample processor
298        :param data_point: Single point from an HDF5 data file
299        :param key: class name data_point was taken from
300        """
301        if key == u'Title':
302            self.current_datainfo.sample.name = data_point
303        elif key == u'name':
304            self.current_datainfo.sample.name = data_point
305        elif key == u'ID':
306            self.current_datainfo.sample.name = data_point
307        elif key == u'thickness':
308            self.current_datainfo.sample.thickness = data_point
309        elif key == u'temperature':
310            self.current_datainfo.sample.temperature = data_point
311        elif key == u'transmission':
312            self.current_datainfo.sample.transmission = data_point
313        elif key == u'x_position':
314            self.current_datainfo.sample.position.x = data_point
315        elif key == u'y_position':
316            self.current_datainfo.sample.position.y = data_point
317        elif key == u'pitch':
318            self.current_datainfo.sample.orientation.x = data_point
319        elif key == u'yaw':
320            self.current_datainfo.sample.orientation.y = data_point
321        elif key == u'roll':
322            self.current_datainfo.sample.orientation.z = data_point
323        elif key == u'details':
324            self.current_datainfo.sample.details.append(data_point)
325
326    def process_detector(self, data_point, key, unit):
327        """
328        SASdetector processor
329        :param data_point: Single point from an HDF5 data file
330        :param key: class name data_point was taken from
331        :param unit: unit attribute from data set
332        """
333        if key == u'name':
334            self.detector.name = data_point
335        elif key == u'SDD':
336            self.detector.distance = float(data_point)
337            self.detector.distance_unit = unit
338        elif key == u'slit_length':
339            self.detector.slit_length = float(data_point)
340            self.detector.slit_length_unit = unit
341        elif key == u'x_position':
342            self.detector.offset.x = float(data_point)
343            self.detector.offset_unit = unit
344        elif key == u'y_position':
345            self.detector.offset.y = float(data_point)
346            self.detector.offset_unit = unit
347        elif key == u'pitch':
348            self.detector.orientation.x = float(data_point)
349            self.detector.orientation_unit = unit
350        elif key == u'roll':
351            self.detector.orientation.z = float(data_point)
352            self.detector.orientation_unit = unit
353        elif key == u'yaw':
354            self.detector.orientation.y = float(data_point)
355            self.detector.orientation_unit = unit
356        elif key == u'beam_center_x':
357            self.detector.beam_center.x = float(data_point)
358            self.detector.beam_center_unit = unit
359        elif key == u'beam_center_y':
360            self.detector.beam_center.y = float(data_point)
361            self.detector.beam_center_unit = unit
362        elif key == u'x_pixel_size':
363            self.detector.pixel_size.x = float(data_point)
364            self.detector.pixel_size_unit = unit
365        elif key == u'y_pixel_size':
366            self.detector.pixel_size.y = float(data_point)
367            self.detector.pixel_size_unit = unit
368
369    def process_collimation(self, data_point, key, unit):
370        """
371        SAScollimation processor
372        :param data_point: Single point from an HDF5 data file
373        :param key: class name data_point was taken from
374        :param unit: unit attribute from data set
375        """
376        if key == u'distance':
377            self.collimation.length = data_point
378            self.collimation.length_unit = unit
379        elif key == u'name':
380            self.collimation.name = data_point
381
382    def process_aperture(self, data_point, key):
383        """
384        SASaperture processor
385        :param data_point: Single point from an HDF5 data file
386        :param key: class name data_point was taken from
387        """
388        if key == u'shape':
389            self.aperture.shape = data_point
390        elif key == u'x_gap':
391            self.aperture.size.x = data_point
392        elif key == u'y_gap':
393            self.aperture.size.y = data_point
394
395    def process_source(self, data_point, key, unit):
396        """
397        SASsource processor
398        :param data_point: Single point from an HDF5 data file
399        :param key: class name data_point was taken from
400        :param unit: unit attribute from data set
401        """
402        if key == u'incident_wavelength':
403            self.current_datainfo.source.wavelength = data_point
404            self.current_datainfo.source.wavelength_unit = unit
405        elif key == u'wavelength_max':
406            self.current_datainfo.source.wavelength_max = data_point
407            self.current_datainfo.source.wavelength_max_unit = unit
408        elif key == u'wavelength_min':
409            self.current_datainfo.source.wavelength_min = data_point
410            self.current_datainfo.source.wavelength_min_unit = unit
411        elif key == u'incident_wavelength_spread':
412            self.current_datainfo.source.wavelength_spread = data_point
413            self.current_datainfo.source.wavelength_spread_unit = unit
414        elif key == u'beam_size_x':
415            self.current_datainfo.source.beam_size.x = data_point
416            self.current_datainfo.source.beam_size_unit = unit
417        elif key == u'beam_size_y':
418            self.current_datainfo.source.beam_size.y = data_point
419            self.current_datainfo.source.beam_size_unit = unit
420        elif key == u'beam_shape':
421            self.current_datainfo.source.beam_shape = data_point
422        elif key == u'radiation':
423            self.current_datainfo.source.radiation = data_point
424
425    def process_process(self, data_point, key):
426        """
427        SASprocess processor
428        :param data_point: Single point from an HDF5 data file
429        :param key: class name data_point was taken from
430        """
431        if key == u'Title':  # CanSAS 2.0
432            self.process.name = data_point
433        elif key == u'name':  # NXcanSAS
434            self.process.name = data_point
435        elif key == u'description':
436            self.process.description = data_point
437        elif key == u'date':
438            self.process.date = data_point
439        elif key == u'term':
440            self.process.term = data_point
441        else:
442            self.process.notes.append(data_point)
443
444    def add_intermediate(self):
445        """
446        This method stores any intermediate objects within the final data set
447        after fully reading the set.
448
449        :param parent: The NXclass name for the h5py Group object that just
450                       finished being processed
451        """
452
453        if self.parent_class == u'SASprocess':
454            self.current_datainfo.process.append(self.process)
455            self.process = Process()
456        elif self.parent_class == u'SASdetector':
457            self.current_datainfo.detector.append(self.detector)
458            self.detector = Detector()
459        elif self.parent_class == u'SAStransmission_spectrum':
460            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
461            self.trans_spectrum = TransmissionSpectrum()
462        elif self.parent_class == u'SAScollimation':
463            self.current_datainfo.collimation.append(self.collimation)
464            self.collimation = Collimation()
465        elif self.parent_class == u'SASaperture':
466            self.collimation.aperture.append(self.aperture)
467            self.aperture = Aperture()
468        elif self.parent_class == u'SASdata':
469            if isinstance(self.current_dataset, plottable_2D):
470                self.data2d.append(self.current_dataset)
471            elif isinstance(self.current_dataset, plottable_1D):
472                self.data1d.append(self.current_dataset)
473
474    def final_data_cleanup(self):
475        """
476        Does some final cleanup and formatting on self.current_datainfo and
477        all data1D and data2D objects and then combines the data and info into
478        Data1D and Data2D objects
479        """
480        # Type cast data arrays to float64
481        if len(self.current_datainfo.trans_spectrum) > 0:
482            spectrum_list = []
483            for spectrum in self.current_datainfo.trans_spectrum:
484                spectrum.transmission = np.delete(spectrum.transmission, [0])
485                spectrum.transmission = spectrum.transmission.astype(np.float64)
486                spectrum.transmission_deviation = np.delete(
487                    spectrum.transmission_deviation, [0])
488                spectrum.transmission_deviation = \
489                    spectrum.transmission_deviation.astype(np.float64)
490                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
491                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
492                if len(spectrum.transmission) > 0:
493                    spectrum_list.append(spectrum)
494            self.current_datainfo.trans_spectrum = spectrum_list
495
496        # Append errors to dataset and reset class errors
497        self.current_datainfo.errors = self.errors
498        self.errors.clear()
499
500        # Combine all plottables with datainfo and append each to output
501        # Type cast data arrays to float64 and find min/max as appropriate
502        for dataset in self.data2d:
503            zeros = np.ones(dataset.data.size, dtype=bool)
504            try:
505                for i in range(0, dataset.mask.size - 1):
506                    zeros[i] = dataset.mask[i]
507            except:
508                self.errors.add(sys.exc_value)
509            dataset.mask = zeros
510            # Calculate the actual Q matrix
511            try:
512                if dataset.q_data.size <= 1:
513                    dataset.q_data = np.sqrt(dataset.qx_data
514                                             * dataset.qx_data
515                                             + dataset.qy_data
516                                             * dataset.qy_data)
517            except:
518                dataset.q_data = None
519
520            if dataset.data.ndim == 2:
521                (n_rows, n_cols) = dataset.data.shape
522                dataset.y_bins = dataset.qy_data[0::n_cols]
523                dataset.x_bins = dataset.qx_data[:n_cols]
524                dataset.data = dataset.data.flatten()
525            self.current_dataset = dataset
526            self.send_to_output()
527
528        for dataset in self.data1d:
529            self.current_dataset = dataset
530            self.send_to_output()
531
532    def add_data_set(self, key=""):
533        """
534        Adds the current_dataset to the list of outputs after preforming final
535        processing on the data and then calls a private method to generate a
536        new data set.
537
538        :param key: NeXus group name for current tree level
539        """
540
541        if self.current_datainfo and self.current_dataset:
542            self.final_data_cleanup()
543        self.data1d = []
544        self.data2d = []
545        self.current_datainfo = DataInfo()
546
547    def _initialize_new_data_set(self, parent_list=None):
548        """
549        A private class method to generate a new 1D or 2D data object based on
550        the type of data within the set. Outside methods should call
551        add_data_set() to be sure any existing data is stored properly.
552
553        :param parent_list: List of names of parent elements
554        """
555
556        if parent_list is None:
557            parent_list = []
558        if self._find_intermediate(parent_list, "Qx"):
559            self.current_dataset = plottable_2D()
560        else:
561            x = np.array(0)
562            y = np.array(0)
563            self.current_dataset = plottable_1D(x, y)
564        self.current_datainfo.filename = self.raw_data.filename
565
566    def _find_data_attributes(self, value):
567        """
568        A class to find the indices for Q, the name of the Qdev and Idev, and
569        the name of the mask.
570        :param value: SASdata/NXdata HDF5 Group
571        """
572        attrs = value.attrs
573        signal = attrs.get("signal")
574
575        i_axes = np.array(attrs.get("I_axes").split(","))
576        q_indices = np.array(attrs.get("Q_indices"))
577        self.mask_name = attrs.get("mask")
578        keys = value.keys()
579        for val in q_indices:
580            self.q_name.append(i_axes[val])
581        self.i_name = signal
582        self.i_node = value.get(self.i_name)
583        for item in self.q_name:
584            if item in keys:
585                q_vals = value.get(item)
586                self.q_uncertainties = q_vals.attrs.get("uncertainty")
587                self.q_resolutions = q_vals.attrs.get("resolution")
588        if self.i_name in keys:
589            i_vals = value.get(self.i_name)
590            self.i_uncertainties = i_vals.attrs.get("uncertainty")
591
592    def _find_intermediate(self, parent_list, basename=""):
593        """
594        A private class used to find an entry by either using a direct key or
595        knowing the approximate basename.
596
597        :param parent_list: List of parents nodes in the HDF5 file
598        :param basename: Approximate name of an entry to search for
599        :return:
600        """
601
602        entry = False
603        key_prog = re.compile(basename)
604        top = self.raw_data
605        for parent in parent_list:
606            top = top.get(parent)
607        for key in top.keys():
608            if key_prog.match(key):
609                entry = True
610                break
611        return entry
612
613    def _create_unique_key(self, dictionary, name, numb=0):
614        """
615        Create a unique key value for any dictionary to prevent overwriting
616        Recurses until a unique key value is found.
617
618        :param dictionary: A dictionary with any number of entries
619        :param name: The index of the item to be added to dictionary
620        :param numb: The number to be appended to the name, starts at 0
621        :return: The new name for the dictionary entry
622        """
623        if dictionary.get(name) is not None:
624            numb += 1
625            name = name.split("_")[0]
626            name += "_{0}".format(numb)
627            name = self._create_unique_key(dictionary, name, numb)
628        return name
629
630    def _get_unit(self, value):
631        """
632        Find the unit for a particular value within the h5py dictionary
633
634        :param value: attribute dictionary for a particular value set
635        :return: unit for the value passed to the method
636        """
637        unit = h5attr(value, u'units')
638        if unit is None:
639            unit = h5attr(value, u'unit')
640        # Convert the unit formats
641        if unit == "1/A":
642            unit = "A^{-1}"
643        elif unit == "1/cm":
644            unit = "cm^{-1}"
645        return unit
Note: See TracBrowser for help on using the repository browser.