source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ ac38ab4

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since ac38ab4 was ac38ab4, checked in by krzywon, 14 months ago

Save process and transmission spectrum and fix issues with loading them.

  • Property mode set to 100644
File size: 26.4 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from ..data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from ..data_info import combine_data_info_with_plottable
15from ..loader_exceptions import FileContentsException, DefaultReaderException
16from ..file_reader_base_class import FileReader, decode
17
18def h5attr(node, key, default=None):
19    return decode(node.attrs.get(key, default))
20
21class Reader(FileReader):
22    """
23    A class for reading in CanSAS v2.0 data files. The existing iteration opens
24    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
25    number of data sets may be present within the file and any dimensionality
26    of data may be used. Currently 1D and 2D SAS data sets are supported, but
27    future implementations will include 1D and 2D SESANS data.
28
29    Any number of SASdata sets may be present in a SASentry and the data within
30    can be either 1D I(Q) or 2D I(Qx, Qy).
31
32    Also supports reading NXcanSAS formatted HDF5 files
33
34    :Dependencies:
35        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
36    """
37
38    # CanSAS version
39    cansas_version = 2.0
40    # Data type name
41    type_name = "NXcanSAS"
42    # Wildcards
43    type = ["NXcanSAS HDF5 Files (*.h5)|*.h5|"]
44    # List of allowed extensions
45    ext = ['.h5', '.H5']
46    # Flag to bypass extension check
47    allow_all = True
48
49    def get_file_contents(self):
50        """
51        This is the general read method that all SasView data_loaders must have.
52
53        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
54        :return: List of Data1D/2D objects and/or a list of errors.
55        """
56        # Reinitialize when loading a new data file to reset all class variables
57        self.reset_state()
58
59        filename = self.f_open.name
60        self.f_open.close() # IO handled by h5py
61
62        # Check that the file exists
63        if os.path.isfile(filename):
64            basename = os.path.basename(filename)
65            _, extension = os.path.splitext(basename)
66            # If the file type is not allowed, return empty list
67            if extension in self.ext or self.allow_all:
68                # Load the data file
69                try:
70                    self.raw_data = h5py.File(filename, 'r')
71                except Exception as e:
72                    if extension not in self.ext:
73                        msg = "NXcanSAS HDF5 Reader could not load file {}".format(basename + extension)
74                        raise DefaultReaderException(msg)
75                    raise FileContentsException(e.message)
76                try:
77                    # Read in all child elements of top level SASroot
78                    self.read_children(self.raw_data, [])
79                    # Add the last data set to the list of outputs
80                    self.add_data_set()
81                except Exception as exc:
82                    raise FileContentsException(exc.message)
83                finally:
84                    # Close the data file
85                    self.raw_data.close()
86
87                for dataset in self.output:
88                    if isinstance(dataset, Data1D):
89                        if dataset.x.size < 5:
90                            self.output = []
91                            raise FileContentsException("Fewer than 5 data points found.")
92
93    def reset_state(self):
94        """
95        Create the reader object and define initial states for class variables
96        """
97        super(Reader, self).reset_state()
98        self.data1d = []
99        self.data2d = []
100        self.raw_data = None
101        self.errors = set()
102        self.logging = []
103        self.q_name = []
104        self.mask_name = u''
105        self.i_name = u''
106        self.i_node = u''
107        self.q_uncertainties = []
108        self.q_resolutions = []
109        self.i_uncertainties = u''
110        self.parent_class = u''
111        self.detector = Detector()
112        self.collimation = Collimation()
113        self.aperture = Aperture()
114        self.process = Process()
115        self.trans_spectrum = TransmissionSpectrum()
116
117    def read_children(self, data, parent_list):
118        """
119        A recursive method for stepping through the hierarchical data file.
120
121        :param data: h5py Group object of any kind
122        :param parent: h5py Group parent name
123        """
124
125        # Loop through each element of the parent and process accordingly
126        for key in data.keys():
127            # Get all information for the current key
128            value = data.get(key)
129            class_name = h5attr(value, u'canSAS_class')
130            if class_name is None:
131                class_name = h5attr(value, u'NX_class')
132            if class_name is not None:
133                class_prog = re.compile(class_name)
134            else:
135                class_prog = re.compile(value.name)
136
137            if isinstance(value, h5py.Group):
138                # Set parent class before recursion
139                last_parent_class = self.parent_class
140                self.parent_class = class_name
141                parent_list.append(key)
142                # If a new sasentry, store the current data sets and create
143                # a fresh Data1D/2D object
144                if class_prog.match(u'SASentry'):
145                    self.add_data_set(key)
146                elif class_prog.match(u'SASdata'):
147                    self._initialize_new_data_set(value)
148                    self._find_data_attributes(value)
149                # Recursion step to access data within the group
150                self.read_children(value, parent_list)
151                self.add_intermediate()
152                # Reset parent class when returning from recursive method
153                self.parent_class = last_parent_class
154                parent_list.remove(key)
155
156            elif isinstance(value, h5py.Dataset):
157                # If this is a dataset, store the data appropriately
158                data_set = data[key][:]
159                unit = self._get_unit(value)
160
161                for data_point in data_set:
162                    if isinstance(data_point, np.ndarray):
163                        if data_point.dtype.char == 'S':
164                            data_point = decode(bytes(data_point))
165                    else:
166                        data_point = decode(data_point)
167                    # Top Level Meta Data
168                    if key == u'definition':
169                        self.current_datainfo.meta_data['reader'] = data_point
170                    # Run
171                    elif key == u'run':
172                        self.current_datainfo.run.append(data_point)
173                        try:
174                            run_name = h5attr(value, 'name')
175                            run_dict = {data_point: run_name}
176                            self.current_datainfo.run_name = run_dict
177                        except Exception:
178                            pass
179                    # Title
180                    elif key == u'title':
181                        self.current_datainfo.title = data_point
182                    # Note
183                    elif key == u'SASnote':
184                        self.current_datainfo.notes.append(data_point)
185                    # Sample Information
186                    elif self.parent_class == u'SASsample':
187                        self.process_sample(data_point, key)
188                    # Instrumental Information
189                    elif (key == u'name'
190                          and self.parent_class == u'SASinstrument'):
191                        self.current_datainfo.instrument = data_point
192                    # Detector
193                    elif self.parent_class == u'SASdetector':
194                        self.process_detector(data_point, key, unit)
195                    # Collimation
196                    elif self.parent_class == u'SAScollimation':
197                        self.process_collimation(data_point, key, unit)
198                    # Aperture
199                    elif self.parent_class == u'SASaperture':
200                        self.process_aperture(data_point, key)
201                    # Process Information
202                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
203                        self.process_process(data_point, key)
204                    # Source
205                    elif self.parent_class == u'SASsource':
206                        self.process_source(data_point, key, unit)
207                    # Everything else goes in meta_data
208                    elif self.parent_class == u'SASdata':
209                        self.process_data_object(data_set, key, unit)
210                        break
211                    elif self.parent_class == u'SAStransmission_spectrum':
212                        self.process_trans_spectrum(data_set, key)
213                        break
214                    else:
215                        new_key = self._create_unique_key(
216                            self.current_datainfo.meta_data, key)
217                        self.current_datainfo.meta_data[new_key] = data_point
218
219            else:
220                # I don't know if this reachable code
221                self.errors.add("ShouldNeverHappenException")
222
223    def process_data_object(self, data_set, key, unit):
224        """
225        SASdata processor method
226        :param data_set: data from HDF5 file
227        :param key: canSAS_class attribute
228        :param unit: unit attribute
229        """
230        if key == self.i_name:
231            if isinstance(self.current_dataset, plottable_2D):
232                self.current_dataset.data = data_set
233                self.current_dataset.zaxis("Intensity", unit)
234            else:
235                self.current_dataset.y = data_set.flatten()
236                self.current_dataset.yaxis("Intensity", unit)
237        elif key == self.i_uncertainties:
238            if isinstance(self.current_dataset, plottable_2D):
239                self.current_dataset.err_data = data_set.flatten()
240            else:
241                self.current_dataset.dy = data_set.flatten()
242        elif key in self.q_name:
243            self.current_dataset.xaxis("Q", unit)
244            if isinstance(self.current_dataset, plottable_2D):
245                self.current_dataset.q = data_set.flatten()
246            else:
247                self.current_dataset.x = data_set.flatten()
248        elif key in self.q_uncertainties or key in self.q_resolutions:
249            if (len(self.q_resolutions) > 1
250                    and np.where(self.q_resolutions == key)[0] == 0):
251                self.current_dataset.dxw = data_set.flatten()
252            elif (len(self.q_resolutions) > 1
253                  and np.where(self.q_resolutions == key)[0] == 1):
254                self.current_dataset.dxl = data_set.flatten()
255            else:
256                self.current_dataset.dx = data_set.flatten()
257        elif key == u'Qy':
258            self.current_dataset.yaxis("Q_y", unit)
259            self.current_dataset.qy_data = data_set.flatten()
260        elif key == u'Qydev':
261            self.current_dataset.dqy_data = data_set.flatten()
262        elif key == u'Qx':
263            self.current_dataset.xaxis("Q_x", unit)
264            self.current_dataset.qx_data = data_set.flatten()
265        elif key == u'Qxdev':
266            self.current_dataset.dqx_data = data_set.flatten()
267        elif key == self.mask_name:
268            self.current_dataset.mask = data_set.flatten()
269        elif key == u'wavelength':
270            self.current_datainfo.source.wavelength = data_set[0]
271            self.current_datainfo.source.wavelength_unit = unit
272
273    def process_trans_spectrum(self, data_set, key):
274        """
275        SAStransmission_spectrum processor
276        :param data_set: data from HDF5 file
277        :param key: canSAS_class attribute
278        """
279        if key == u'T':
280            self.trans_spectrum.transmission = data_set.flatten()
281        elif key == u'Tdev':
282            self.trans_spectrum.transmission_deviation = data_set.flatten()
283        elif key == u'lambda':
284            self.trans_spectrum.wavelength = data_set.flatten()
285
286    def process_sample(self, data_point, key):
287        """
288        SASsample processor
289        :param data_point: Single point from an HDF5 data file
290        :param key: class name data_point was taken from
291        """
292        if key == u'Title':
293            self.current_datainfo.sample.name = data_point
294        elif key == u'name':
295            self.current_datainfo.sample.name = data_point
296        elif key == u'ID':
297            self.current_datainfo.sample.name = data_point
298        elif key == u'thickness':
299            self.current_datainfo.sample.thickness = data_point
300        elif key == u'temperature':
301            self.current_datainfo.sample.temperature = data_point
302        elif key == u'transmission':
303            self.current_datainfo.sample.transmission = data_point
304        elif key == u'x_position':
305            self.current_datainfo.sample.position.x = data_point
306        elif key == u'y_position':
307            self.current_datainfo.sample.position.y = data_point
308        elif key == u'pitch':
309            self.current_datainfo.sample.orientation.x = data_point
310        elif key == u'yaw':
311            self.current_datainfo.sample.orientation.y = data_point
312        elif key == u'roll':
313            self.current_datainfo.sample.orientation.z = data_point
314        elif key == u'details':
315            self.current_datainfo.sample.details.append(data_point)
316
317    def process_detector(self, data_point, key, unit):
318        """
319        SASdetector processor
320        :param data_point: Single point from an HDF5 data file
321        :param key: class name data_point was taken from
322        :param unit: unit attribute from data set
323        """
324        if key == u'name':
325            self.detector.name = data_point
326        elif key == u'SDD':
327            self.detector.distance = float(data_point)
328            self.detector.distance_unit = unit
329        elif key == u'slit_length':
330            self.detector.slit_length = float(data_point)
331            self.detector.slit_length_unit = unit
332        elif key == u'x_position':
333            self.detector.offset.x = float(data_point)
334            self.detector.offset_unit = unit
335        elif key == u'y_position':
336            self.detector.offset.y = float(data_point)
337            self.detector.offset_unit = unit
338        elif key == u'pitch':
339            self.detector.orientation.x = float(data_point)
340            self.detector.orientation_unit = unit
341        elif key == u'roll':
342            self.detector.orientation.z = float(data_point)
343            self.detector.orientation_unit = unit
344        elif key == u'yaw':
345            self.detector.orientation.y = float(data_point)
346            self.detector.orientation_unit = unit
347        elif key == u'beam_center_x':
348            self.detector.beam_center.x = float(data_point)
349            self.detector.beam_center_unit = unit
350        elif key == u'beam_center_y':
351            self.detector.beam_center.y = float(data_point)
352            self.detector.beam_center_unit = unit
353        elif key == u'x_pixel_size':
354            self.detector.pixel_size.x = float(data_point)
355            self.detector.pixel_size_unit = unit
356        elif key == u'y_pixel_size':
357            self.detector.pixel_size.y = float(data_point)
358            self.detector.pixel_size_unit = unit
359
360    def process_collimation(self, data_point, key, unit):
361        """
362        SAScollimation processor
363        :param data_point: Single point from an HDF5 data file
364        :param key: class name data_point was taken from
365        :param unit: unit attribute from data set
366        """
367        if key == u'distance':
368            self.collimation.length = data_point
369            self.collimation.length_unit = unit
370        elif key == u'name':
371            self.collimation.name = data_point
372
373    def process_aperture(self, data_point, key):
374        """
375        SASaperture processor
376        :param data_point: Single point from an HDF5 data file
377        :param key: class name data_point was taken from
378        """
379        if key == u'shape':
380            self.aperture.shape = data_point
381        elif key == u'x_gap':
382            self.aperture.size.x = data_point
383        elif key == u'y_gap':
384            self.aperture.size.y = data_point
385
386    def process_source(self, data_point, key, unit):
387        """
388        SASsource processor
389        :param data_point: Single point from an HDF5 data file
390        :param key: class name data_point was taken from
391        :param unit: unit attribute from data set
392        """
393        if key == u'incident_wavelength':
394            self.current_datainfo.source.wavelength = data_point
395            self.current_datainfo.source.wavelength_unit = unit
396        elif key == u'wavelength_max':
397            self.current_datainfo.source.wavelength_max = data_point
398            self.current_datainfo.source.wavelength_max_unit = unit
399        elif key == u'wavelength_min':
400            self.current_datainfo.source.wavelength_min = data_point
401            self.current_datainfo.source.wavelength_min_unit = unit
402        elif key == u'incident_wavelength_spread':
403            self.current_datainfo.source.wavelength_spread = data_point
404            self.current_datainfo.source.wavelength_spread_unit = unit
405        elif key == u'beam_size_x':
406            self.current_datainfo.source.beam_size.x = data_point
407            self.current_datainfo.source.beam_size_unit = unit
408        elif key == u'beam_size_y':
409            self.current_datainfo.source.beam_size.y = data_point
410            self.current_datainfo.source.beam_size_unit = unit
411        elif key == u'beam_shape':
412            self.current_datainfo.source.beam_shape = data_point
413        elif key == u'radiation':
414            self.current_datainfo.source.radiation = data_point
415
416    def process_process(self, data_point, key):
417        """
418        SASprocess processor
419        :param data_point: Single point from an HDF5 data file
420        :param key: class name data_point was taken from
421        """
422        term_match = re.compile(u'^term[0-9]+$')
423        if key == u'Title':  # CanSAS 2.0
424            self.process.name = data_point
425        elif key == u'name':  # NXcanSAS
426            self.process.name = data_point
427        elif key == u'description':
428            self.process.description = data_point
429        elif key == u'date':
430            self.process.date = data_point
431        elif term_match.match(key):
432            self.process.term.append(data_point)
433        else:
434            self.process.notes.append(data_point)
435
436    def add_intermediate(self):
437        """
438        This method stores any intermediate objects within the final data set
439        after fully reading the set.
440
441        :param parent: The NXclass name for the h5py Group object that just
442                       finished being processed
443        """
444
445        if self.parent_class == u'SASprocess':
446            self.current_datainfo.process.append(self.process)
447            self.process = Process()
448        elif self.parent_class == u'SASdetector':
449            self.current_datainfo.detector.append(self.detector)
450            self.detector = Detector()
451        elif self.parent_class == u'SAStransmission_spectrum':
452            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
453            self.trans_spectrum = TransmissionSpectrum()
454        elif self.parent_class == u'SAScollimation':
455            self.current_datainfo.collimation.append(self.collimation)
456            self.collimation = Collimation()
457        elif self.parent_class == u'SASaperture':
458            self.collimation.aperture.append(self.aperture)
459            self.aperture = Aperture()
460        elif self.parent_class == u'SASdata':
461            if isinstance(self.current_dataset, plottable_2D):
462                self.data2d.append(self.current_dataset)
463            elif isinstance(self.current_dataset, plottable_1D):
464                self.data1d.append(self.current_dataset)
465
466    def final_data_cleanup(self):
467        """
468        Does some final cleanup and formatting on self.current_datainfo and
469        all data1D and data2D objects and then combines the data and info into
470        Data1D and Data2D objects
471        """
472        # Type cast data arrays to float64
473        if len(self.current_datainfo.trans_spectrum) > 0:
474            spectrum_list = []
475            for spectrum in self.current_datainfo.trans_spectrum:
476                spectrum.transmission = spectrum.transmission.astype(np.float64)
477                spectrum.transmission_deviation = \
478                    spectrum.transmission_deviation.astype(np.float64)
479                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
480                if len(spectrum.transmission) > 0:
481                    spectrum_list.append(spectrum)
482            self.current_datainfo.trans_spectrum = spectrum_list
483
484        # Append errors to dataset and reset class errors
485        self.current_datainfo.errors = self.errors
486        self.errors.clear()
487
488        # Combine all plottables with datainfo and append each to output
489        # Type cast data arrays to float64 and find min/max as appropriate
490        for dataset in self.data2d:
491            zeros = np.ones(dataset.data.size, dtype=bool)
492            try:
493                for i in range(0, dataset.mask.size - 1):
494                    zeros[i] = dataset.mask[i]
495            except:
496                self.errors.add(sys.exc_value)
497            dataset.mask = zeros
498            # Calculate the actual Q matrix
499            try:
500                if dataset.q_data.size <= 1:
501                    dataset.q_data = np.sqrt(dataset.qx_data
502                                             * dataset.qx_data
503                                             + dataset.qy_data
504                                             * dataset.qy_data)
505            except:
506                dataset.q_data = None
507
508            if dataset.data.ndim == 2:
509                (n_rows, n_cols) = dataset.data.shape
510                dataset.y_bins = dataset.qy_data[0::n_cols]
511                dataset.x_bins = dataset.qx_data[:n_cols]
512                dataset.data = dataset.data.flatten()
513            self.current_dataset = dataset
514            self.send_to_output()
515
516        for dataset in self.data1d:
517            self.current_dataset = dataset
518            self.send_to_output()
519
520    def add_data_set(self, key=""):
521        """
522        Adds the current_dataset to the list of outputs after preforming final
523        processing on the data and then calls a private method to generate a
524        new data set.
525
526        :param key: NeXus group name for current tree level
527        """
528
529        if self.current_datainfo and self.current_dataset:
530            self.final_data_cleanup()
531        self.data1d = []
532        self.data2d = []
533        self.current_datainfo = DataInfo()
534
535    def _initialize_new_data_set(self, value=None):
536        """
537        A private class method to generate a new 1D or 2D data object based on
538        the type of data within the set. Outside methods should call
539        add_data_set() to be sure any existing data is stored properly.
540
541        :param parent_list: List of names of parent elements
542        """
543        if self._is2d(value):
544            self.current_dataset = plottable_2D()
545        else:
546            x = np.array(0)
547            y = np.array(0)
548            self.current_dataset = plottable_1D(x, y)
549        self.current_datainfo.filename = self.raw_data.filename
550        self.mask_name = ""
551        self.i_name = ""
552        self.i_node = ""
553        self.q_name = []
554        self.q_uncertainties = []
555        self.q_resolutions = []
556        self.i_uncertainties = ""
557
558    def _find_data_attributes(self, value):
559        """
560        A class to find the indices for Q, the name of the Qdev and Idev, and
561        the name of the mask.
562        :param value: SASdata/NXdata HDF5 Group
563        """
564        signal = "I"
565        i_axes = ["Q"]
566        q_indices = [0]
567        attrs = value.attrs
568        if hasattr(attrs, "signal"):
569            signal = attrs.get("signal")
570        if hasattr(attrs, "I_axes"):
571            i_axes = np.array(str(attrs.get("I_axes")).split(","))
572        if hasattr(attrs, "Q_indices"):
573            q_indices = np.int_(attrs.get("Q_indices").split(","))
574        keys = value.keys()
575        self.mask_name = attrs.get("mask")
576        for val in q_indices:
577            self.q_name.append(i_axes[val])
578        self.i_name = signal
579        self.i_node = value.get(self.i_name)
580        for item in self.q_name:
581            if item in keys:
582                q_vals = value.get(item)
583                if q_vals.attrs.get("uncertainties"):
584                    self.q_uncertainties = q_vals.attrs.get("uncertainties")
585                elif q_vals.attrs.get("uncertainty"):
586                    self.q_uncertainties = q_vals.attrs.get("uncertainty")
587                if isinstance(self.q_uncertainties, str):
588                    self.q_uncertainties = [self.q_uncertainties]
589                if q_vals.attrs.get("resolutions"):
590                    self.q_resolutions = q_vals.attrs.get("resolutions")
591                if isinstance(self.q_resolutions, str):
592                    self.q_resolutions = [self.q_resolutions]
593        if self.i_name in keys:
594            i_vals = value.get(self.i_name)
595            self.i_uncertainties = i_vals.attrs.get("uncertainties")
596            if self.i_uncertainties is None:
597                self.i_uncertainties = i_vals.attrs.get("uncertainty")
598
599    def _is2d(self, value, basename="I"):
600        """
601        A private class to determine if the data set is 1d or 2d.
602
603        :param parent_list: List of parents nodes in the HDF5 file
604        :param basename: Approximate name of an entry to search for
605        :return: True if 2D, otherwise false
606        """
607
608        vals = value.get(basename)
609        return (vals is not None and vals.shape is not None
610                and len(vals.shape) != 1)
611
612    def _create_unique_key(self, dictionary, name, numb=0):
613        """
614        Create a unique key value for any dictionary to prevent overwriting
615        Recurses until a unique key value is found.
616
617        :param dictionary: A dictionary with any number of entries
618        :param name: The index of the item to be added to dictionary
619        :param numb: The number to be appended to the name, starts at 0
620        :return: The new name for the dictionary entry
621        """
622        if dictionary.get(name) is not None:
623            numb += 1
624            name = name.split("_")[0]
625            name += "_{0}".format(numb)
626            name = self._create_unique_key(dictionary, name, numb)
627        return name
628
629    def _get_unit(self, value):
630        """
631        Find the unit for a particular value within the h5py dictionary
632
633        :param value: attribute dictionary for a particular value set
634        :return: unit for the value passed to the method
635        """
636        unit = h5attr(value, u'units')
637        if unit is None:
638            unit = h5attr(value, u'unit')
639        return unit
Note: See TracBrowser for help on using the repository browser.