source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ 96d06a4

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since 96d06a4 was 96d06a4, checked in by krzywon, 6 years ago

Separate 1d and 2d data processing, and add new NXcanSAS compliant data files.

  • Property mode set to 100644
File size: 27.3 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from ..data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from ..data_info import combine_data_info_with_plottable
15from ..loader_exceptions import FileContentsException, DefaultReaderException
16from ..file_reader_base_class import FileReader, decode
17
18def h5attr(node, key, default=None):
19    return decode(node.attrs.get(key, default))
20
21class Reader(FileReader):
22    """
23    A class for reading in CanSAS v2.0 data files. The existing iteration opens
24    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
25    number of data sets may be present within the file and any dimensionality
26    of data may be used. Currently 1D and 2D SAS data sets are supported, but
27    future implementations will include 1D and 2D SESANS data.
28
29    Any number of SASdata sets may be present in a SASentry and the data within
30    can be either 1D I(Q) or 2D I(Qx, Qy).
31
32    Also supports reading NXcanSAS formatted HDF5 files
33
34    :Dependencies:
35        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
36    """
37
38    # CanSAS version
39    cansas_version = 2.0
40    # Data type name
41    type_name = "NXcanSAS"
42    # Wildcards
43    type = ["NXcanSAS HDF5 Files (*.h5)|*.h5|"]
44    # List of allowed extensions
45    ext = ['.h5', '.H5']
46    # Flag to bypass extension check
47    allow_all = True
48
49    def get_file_contents(self):
50        """
51        This is the general read method that all SasView data_loaders must have.
52
53        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
54        :return: List of Data1D/2D objects and/or a list of errors.
55        """
56        # Reinitialize when loading a new data file to reset all class variables
57        self.reset_state()
58
59        filename = self.f_open.name
60        self.f_open.close() # IO handled by h5py
61
62        # Check that the file exists
63        if os.path.isfile(filename):
64            basename = os.path.basename(filename)
65            _, extension = os.path.splitext(basename)
66            # If the file type is not allowed, return empty list
67            if extension in self.ext or self.allow_all:
68                # Load the data file
69                try:
70                    self.raw_data = h5py.File(filename, 'r')
71                except Exception as e:
72                    if extension not in self.ext:
73                        msg = "NXcanSAS HDF5 Reader could not load file {}".format(basename + extension)
74                        raise DefaultReaderException(msg)
75                    raise FileContentsException(e.message)
76                try:
77                    # Read in all child elements of top level SASroot
78                    self.read_children(self.raw_data, [])
79                    # Add the last data set to the list of outputs
80                    self.add_data_set()
81                except Exception as exc:
82                    raise FileContentsException(exc.message)
83                finally:
84                    # Close the data file
85                    self.raw_data.close()
86
87                for dataset in self.output:
88                    if isinstance(dataset, Data1D):
89                        if dataset.x.size < 5:
90                            self.output = []
91                            raise FileContentsException("Fewer than 5 data points found.")
92
93    def reset_state(self):
94        """
95        Create the reader object and define initial states for class variables
96        """
97        super(Reader, self).reset_state()
98        self.data1d = []
99        self.data2d = []
100        self.raw_data = None
101        self.errors = set()
102        self.logging = []
103        self.q_name = []
104        self.mask_name = u''
105        self.i_name = u''
106        self.i_node = u''
107        self.q_uncertainties = []
108        self.q_resolutions = []
109        self.i_uncertainties = u''
110        self.parent_class = u''
111        self.detector = Detector()
112        self.collimation = Collimation()
113        self.aperture = Aperture()
114        self.process = Process()
115        self.trans_spectrum = TransmissionSpectrum()
116
117    def read_children(self, data, parent_list):
118        """
119        A recursive method for stepping through the hierarchical data file.
120
121        :param data: h5py Group object of any kind
122        :param parent: h5py Group parent name
123        """
124
125        # Loop through each element of the parent and process accordingly
126        for key in data.keys():
127            # Get all information for the current key
128            value = data.get(key)
129            class_name = h5attr(value, u'canSAS_class')
130            if class_name is None:
131                class_name = h5attr(value, u'NX_class')
132            if class_name is not None:
133                class_prog = re.compile(class_name)
134            else:
135                class_prog = re.compile(value.name)
136
137            if isinstance(value, h5py.Group):
138                # Set parent class before recursion
139                last_parent_class = self.parent_class
140                self.parent_class = class_name
141                parent_list.append(key)
142                # If a new sasentry, store the current data sets and create
143                # a fresh Data1D/2D object
144                if class_prog.match(u'SASentry'):
145                    self.add_data_set(key)
146                elif class_prog.match(u'SASdata'):
147                    self._initialize_new_data_set(value)
148                    self._find_data_attributes(value)
149                # Recursion step to access data within the group
150                self.read_children(value, parent_list)
151                self.add_intermediate()
152                # Reset parent class when returning from recursive method
153                self.parent_class = last_parent_class
154                parent_list.remove(key)
155
156            elif isinstance(value, h5py.Dataset):
157                # If this is a dataset, store the data appropriately
158                data_set = data[key][:]
159                unit = self._get_unit(value)
160
161                for data_point in data_set:
162                    if isinstance(data_point, np.ndarray):
163                        if data_point.dtype.char == 'S':
164                            data_point = decode(bytes(data_point))
165                    else:
166                        data_point = decode(data_point)
167                    # Top Level Meta Data
168                    if key == u'definition':
169                        self.current_datainfo.meta_data['reader'] = data_point
170                    # Run
171                    elif key == u'run':
172                        self.current_datainfo.run.append(data_point)
173                        try:
174                            run_name = h5attr(value, 'name')
175                            run_dict = {data_point: run_name}
176                            self.current_datainfo.run_name = run_dict
177                        except Exception:
178                            pass
179                    # Title
180                    elif key == u'title':
181                        self.current_datainfo.title = data_point
182                    # Note
183                    elif key == u'SASnote':
184                        self.current_datainfo.notes.append(data_point)
185                    # Sample Information
186                    elif self.parent_class == u'SASsample':
187                        self.process_sample(data_point, key)
188                    # Instrumental Information
189                    elif (key == u'name'
190                          and self.parent_class == u'SASinstrument'):
191                        self.current_datainfo.instrument = data_point
192                    # Detector
193                    elif self.parent_class == u'SASdetector':
194                        self.process_detector(data_point, key, unit)
195                    # Collimation
196                    elif self.parent_class == u'SAScollimation':
197                        self.process_collimation(data_point, key, unit)
198                    # Aperture
199                    elif self.parent_class == u'SASaperture':
200                        self.process_aperture(data_point, key)
201                    # Process Information
202                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
203                        self.process_process(data_point, key)
204                    # Source
205                    elif self.parent_class == u'SASsource':
206                        self.process_source(data_point, key, unit)
207                    # Everything else goes in meta_data
208                    elif self.parent_class == u'SASdata':
209                        if isinstance(self.current_dataset, plottable_2D):
210                            self.process_2d_data_object(data_set, key, unit)
211                        else:
212                            self.process_1d_data_object(data_set, key, unit)
213
214                        break
215                    elif self.parent_class == u'SAStransmission_spectrum':
216                        self.process_trans_spectrum(data_set, key)
217                        break
218                    else:
219                        new_key = self._create_unique_key(
220                            self.current_datainfo.meta_data, key)
221                        self.current_datainfo.meta_data[new_key] = data_point
222
223            else:
224                # I don't know if this reachable code
225                self.errors.add("ShouldNeverHappenException")
226
227    def process_1d_data_object(self, data_set, key, unit):
228        """
229        SASdata processor method for 1d data items
230        :param data_set: data from HDF5 file
231        :param key: canSAS_class attribute
232        :param unit: unit attribute
233        """
234        if key == self.i_name:
235            self.current_dataset.y = data_set.flatten()
236            self.current_dataset.yaxis("Intensity", unit)
237        elif key == self.i_uncertainties:
238            self.current_dataset.dy = data_set.flatten()
239        elif key in self.q_name:
240            self.current_dataset.xaxis("Q", unit)
241            self.current_dataset.x = data_set.flatten()
242        elif key in self.q_uncertainties or key in self.q_resolutions:
243            if (len(self.q_resolutions) > 1
244                    and np.where(self.q_resolutions == key)[0] == 0):
245                self.current_dataset.dxw = data_set.flatten()
246            elif (len(self.q_resolutions) > 1
247                  and np.where(self.q_resolutions == key)[0] == 1):
248                self.current_dataset.dxl = data_set.flatten()
249            else:
250                self.current_dataset.dx = data_set.flatten()
251        elif key == self.mask_name:
252            self.current_dataset.mask = data_set.flatten()
253        elif key == u'wavelength':
254            self.current_datainfo.source.wavelength = data_set[0]
255            self.current_datainfo.source.wavelength_unit = unit
256
257    def process_2d_data_object(self, data_set, key, unit):
258        if key == self.i_name:
259            self.current_dataset.data = data_set
260            self.current_dataset.zaxis("Intensity", unit)
261        elif key == self.i_uncertainties:
262            self.current_dataset.err_data = data_set.flatten()
263        elif key in self.q_name:
264            self.current_dataset.xaxis("Q", unit)
265            self.current_dataset.yaxis("Q", unit)
266            #FIXME: This is broken - need to properly handle 2D data
267            # TODO: Check shape of array (2d - cash money, homey!)
268            # TODO: 3D - check dims, etc.
269            # TODO: Put data where it belongs
270            pass
271        elif key in self.q_uncertainties or key in self.q_resolutions:
272            # FIXME: This isn't right either.
273            # TODO: find resolution/uncertainty specific to q_name
274            pass
275        elif key == u'Qy':
276            self.current_dataset.yaxis("Q_y", unit)
277            self.current_dataset.qy_data = data_set.flatten()
278        elif key == u'Qydev':
279            self.current_dataset.dqy_data = data_set.flatten()
280        elif key == u'Qx':
281            self.current_dataset.xaxis("Q_x", unit)
282            self.current_dataset.qx_data = data_set.flatten()
283        elif key == u'Qxdev':
284            self.current_dataset.dqx_data = data_set.flatten()
285
286    def process_trans_spectrum(self, data_set, key):
287        """
288        SAStransmission_spectrum processor
289        :param data_set: data from HDF5 file
290        :param key: canSAS_class attribute
291        """
292        if key == u'T':
293            self.trans_spectrum.transmission = data_set.flatten()
294        elif key == u'Tdev':
295            self.trans_spectrum.transmission_deviation = data_set.flatten()
296        elif key == u'lambda':
297            self.trans_spectrum.wavelength = data_set.flatten()
298
299    def process_sample(self, data_point, key):
300        """
301        SASsample processor
302        :param data_point: Single point from an HDF5 data file
303        :param key: class name data_point was taken from
304        """
305        if key == u'Title':
306            self.current_datainfo.sample.name = data_point
307        elif key == u'name':
308            self.current_datainfo.sample.name = data_point
309        elif key == u'ID':
310            self.current_datainfo.sample.name = data_point
311        elif key == u'thickness':
312            self.current_datainfo.sample.thickness = data_point
313        elif key == u'temperature':
314            self.current_datainfo.sample.temperature = data_point
315        elif key == u'transmission':
316            self.current_datainfo.sample.transmission = data_point
317        elif key == u'x_position':
318            self.current_datainfo.sample.position.x = data_point
319        elif key == u'y_position':
320            self.current_datainfo.sample.position.y = data_point
321        elif key == u'pitch':
322            self.current_datainfo.sample.orientation.x = data_point
323        elif key == u'yaw':
324            self.current_datainfo.sample.orientation.y = data_point
325        elif key == u'roll':
326            self.current_datainfo.sample.orientation.z = data_point
327        elif key == u'details':
328            self.current_datainfo.sample.details.append(data_point)
329
330    def process_detector(self, data_point, key, unit):
331        """
332        SASdetector processor
333        :param data_point: Single point from an HDF5 data file
334        :param key: class name data_point was taken from
335        :param unit: unit attribute from data set
336        """
337        if key == u'name':
338            self.detector.name = data_point
339        elif key == u'SDD':
340            self.detector.distance = float(data_point)
341            self.detector.distance_unit = unit
342        elif key == u'slit_length':
343            self.detector.slit_length = float(data_point)
344            self.detector.slit_length_unit = unit
345        elif key == u'x_position':
346            self.detector.offset.x = float(data_point)
347            self.detector.offset_unit = unit
348        elif key == u'y_position':
349            self.detector.offset.y = float(data_point)
350            self.detector.offset_unit = unit
351        elif key == u'pitch':
352            self.detector.orientation.x = float(data_point)
353            self.detector.orientation_unit = unit
354        elif key == u'roll':
355            self.detector.orientation.z = float(data_point)
356            self.detector.orientation_unit = unit
357        elif key == u'yaw':
358            self.detector.orientation.y = float(data_point)
359            self.detector.orientation_unit = unit
360        elif key == u'beam_center_x':
361            self.detector.beam_center.x = float(data_point)
362            self.detector.beam_center_unit = unit
363        elif key == u'beam_center_y':
364            self.detector.beam_center.y = float(data_point)
365            self.detector.beam_center_unit = unit
366        elif key == u'x_pixel_size':
367            self.detector.pixel_size.x = float(data_point)
368            self.detector.pixel_size_unit = unit
369        elif key == u'y_pixel_size':
370            self.detector.pixel_size.y = float(data_point)
371            self.detector.pixel_size_unit = unit
372
373    def process_collimation(self, data_point, key, unit):
374        """
375        SAScollimation processor
376        :param data_point: Single point from an HDF5 data file
377        :param key: class name data_point was taken from
378        :param unit: unit attribute from data set
379        """
380        if key == u'distance':
381            self.collimation.length = data_point
382            self.collimation.length_unit = unit
383        elif key == u'name':
384            self.collimation.name = data_point
385
386    def process_aperture(self, data_point, key):
387        """
388        SASaperture processor
389        :param data_point: Single point from an HDF5 data file
390        :param key: class name data_point was taken from
391        """
392        if key == u'shape':
393            self.aperture.shape = data_point
394        elif key == u'x_gap':
395            self.aperture.size.x = data_point
396        elif key == u'y_gap':
397            self.aperture.size.y = data_point
398
399    def process_source(self, data_point, key, unit):
400        """
401        SASsource processor
402        :param data_point: Single point from an HDF5 data file
403        :param key: class name data_point was taken from
404        :param unit: unit attribute from data set
405        """
406        if key == u'incident_wavelength':
407            self.current_datainfo.source.wavelength = data_point
408            self.current_datainfo.source.wavelength_unit = unit
409        elif key == u'wavelength_max':
410            self.current_datainfo.source.wavelength_max = data_point
411            self.current_datainfo.source.wavelength_max_unit = unit
412        elif key == u'wavelength_min':
413            self.current_datainfo.source.wavelength_min = data_point
414            self.current_datainfo.source.wavelength_min_unit = unit
415        elif key == u'incident_wavelength_spread':
416            self.current_datainfo.source.wavelength_spread = data_point
417            self.current_datainfo.source.wavelength_spread_unit = unit
418        elif key == u'beam_size_x':
419            self.current_datainfo.source.beam_size.x = data_point
420            self.current_datainfo.source.beam_size_unit = unit
421        elif key == u'beam_size_y':
422            self.current_datainfo.source.beam_size.y = data_point
423            self.current_datainfo.source.beam_size_unit = unit
424        elif key == u'beam_shape':
425            self.current_datainfo.source.beam_shape = data_point
426        elif key == u'radiation':
427            self.current_datainfo.source.radiation = data_point
428
429    def process_process(self, data_point, key):
430        """
431        SASprocess processor
432        :param data_point: Single point from an HDF5 data file
433        :param key: class name data_point was taken from
434        """
435        term_match = re.compile(u'^term[0-9]+$')
436        if key == u'Title':  # CanSAS 2.0
437            self.process.name = data_point
438        elif key == u'name':  # NXcanSAS
439            self.process.name = data_point
440        elif key == u'description':
441            self.process.description = data_point
442        elif key == u'date':
443            self.process.date = data_point
444        elif term_match.match(key):
445            self.process.term.append(data_point)
446        else:
447            self.process.notes.append(data_point)
448
449    def add_intermediate(self):
450        """
451        This method stores any intermediate objects within the final data set
452        after fully reading the set.
453
454        :param parent: The NXclass name for the h5py Group object that just
455                       finished being processed
456        """
457
458        if self.parent_class == u'SASprocess':
459            self.current_datainfo.process.append(self.process)
460            self.process = Process()
461        elif self.parent_class == u'SASdetector':
462            self.current_datainfo.detector.append(self.detector)
463            self.detector = Detector()
464        elif self.parent_class == u'SAStransmission_spectrum':
465            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
466            self.trans_spectrum = TransmissionSpectrum()
467        elif self.parent_class == u'SAScollimation':
468            self.current_datainfo.collimation.append(self.collimation)
469            self.collimation = Collimation()
470        elif self.parent_class == u'SASaperture':
471            self.collimation.aperture.append(self.aperture)
472            self.aperture = Aperture()
473        elif self.parent_class == u'SASdata':
474            if isinstance(self.current_dataset, plottable_2D):
475                self.data2d.append(self.current_dataset)
476            elif isinstance(self.current_dataset, plottable_1D):
477                self.data1d.append(self.current_dataset)
478
479    def final_data_cleanup(self):
480        """
481        Does some final cleanup and formatting on self.current_datainfo and
482        all data1D and data2D objects and then combines the data and info into
483        Data1D and Data2D objects
484        """
485        # Type cast data arrays to float64
486        if len(self.current_datainfo.trans_spectrum) > 0:
487            spectrum_list = []
488            for spectrum in self.current_datainfo.trans_spectrum:
489                spectrum.transmission = spectrum.transmission.astype(np.float64)
490                spectrum.transmission_deviation = \
491                    spectrum.transmission_deviation.astype(np.float64)
492                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
493                if len(spectrum.transmission) > 0:
494                    spectrum_list.append(spectrum)
495            self.current_datainfo.trans_spectrum = spectrum_list
496
497        # Append errors to dataset and reset class errors
498        self.current_datainfo.errors = self.errors
499        self.errors.clear()
500
501        # Combine all plottables with datainfo and append each to output
502        # Type cast data arrays to float64 and find min/max as appropriate
503        for dataset in self.data2d:
504            zeros = np.ones(dataset.data.size, dtype=bool)
505            try:
506                for i in range(0, dataset.mask.size - 1):
507                    zeros[i] = dataset.mask[i]
508            except:
509                self.errors.add(sys.exc_value)
510            dataset.mask = zeros
511            # Calculate the actual Q matrix
512            try:
513                if dataset.q_data.size <= 1:
514                    dataset.q_data = np.sqrt(dataset.qx_data
515                                             * dataset.qx_data
516                                             + dataset.qy_data
517                                             * dataset.qy_data)
518            except:
519                dataset.q_data = None
520
521            if dataset.data.ndim == 2:
522                (n_rows, n_cols) = dataset.data.shape
523                dataset.y_bins = dataset.qy_data[0::n_cols]
524                dataset.x_bins = dataset.qx_data[0::n_rows]
525                dataset.data = dataset.data.flatten()
526            self.current_dataset = dataset
527            self.send_to_output()
528
529        for dataset in self.data1d:
530            self.current_dataset = dataset
531            self.send_to_output()
532
533    def add_data_set(self, key=""):
534        """
535        Adds the current_dataset to the list of outputs after preforming final
536        processing on the data and then calls a private method to generate a
537        new data set.
538
539        :param key: NeXus group name for current tree level
540        """
541
542        if self.current_datainfo and self.current_dataset:
543            self.final_data_cleanup()
544        self.data1d = []
545        self.data2d = []
546        self.current_datainfo = DataInfo()
547
548    def _initialize_new_data_set(self, value=None):
549        """
550        A private class method to generate a new 1D or 2D data object based on
551        the type of data within the set. Outside methods should call
552        add_data_set() to be sure any existing data is stored properly.
553
554        :param parent_list: List of names of parent elements
555        """
556        if self._is2d(value):
557            self.current_dataset = plottable_2D()
558        else:
559            x = np.array(0)
560            y = np.array(0)
561            self.current_dataset = plottable_1D(x, y)
562        self.current_datainfo.filename = self.raw_data.filename
563        self.mask_name = ""
564        self.i_name = ""
565        self.i_node = ""
566        self.q_name = []
567        self.q_uncertainties = []
568        self.q_resolutions = []
569        self.i_uncertainties = ""
570
571    @staticmethod
572    def check_is_list_or_array(iterable):
573        try:
574            iter(iterable)
575            if (not isinstance(iterable, np.ndarray)) or (isinstance(iterable, str)
576                    or isinstance(iterable, unicode)):
577                raise TypeError
578        except TypeError:
579            iterable = iterable.split(",")
580        return iterable
581
582    def _find_data_attributes(self, value):
583        """
584        A class to find the indices for Q, the name of the Qdev and Idev, and
585        the name of the mask.
586        :param value: SASdata/NXdata HDF5 Group
587        """
588        attrs = value.attrs
589        signal = attrs.get("signal", "I")
590        i_axes = attrs.get("I_axes", ["Q"])
591        q_indices = attrs.get("Q_indices", [0])
592        q_indices = map(int, self.check_is_list_or_array(q_indices))
593        i_axes = self.check_is_list_or_array(i_axes)
594        keys = value.keys()
595        self.mask_name = attrs.get("mask")
596        for val in q_indices:
597            self.q_name.append(i_axes[val])
598        self.i_name = signal
599        self.i_node = value.get(self.i_name)
600        for item in self.q_name:
601            if item in keys:
602                q_vals = value.get(item)
603                if q_vals.attrs.get("uncertainties") is not None:
604                    self.q_uncertainties = q_vals.attrs.get("uncertainties")
605                elif q_vals.attrs.get("uncertainty") is not None:
606                    self.q_uncertainties = q_vals.attrs.get("uncertainty")
607                if isinstance(self.q_uncertainties, str) is not None:
608                    self.q_uncertainties = [self.q_uncertainties]
609                if q_vals.attrs.get("resolutions") is not None:
610                    self.q_resolutions = q_vals.attrs.get("resolutions")
611                if isinstance(self.q_resolutions, str):
612                    self.q_resolutions = self.q_resolutions.split(",")
613        if self.i_name in keys:
614            i_vals = value.get(self.i_name)
615            self.i_uncertainties = i_vals.attrs.get("uncertainties")
616            if self.i_uncertainties is None:
617                self.i_uncertainties = i_vals.attrs.get("uncertainty")
618
619    def _is2d(self, value, basename="I"):
620        """
621        A private class to determine if the data set is 1d or 2d.
622
623        :param parent_list: List of parents nodes in the HDF5 file
624        :param basename: Approximate name of an entry to search for
625        :return: True if 2D, otherwise false
626        """
627
628        vals = value.get(basename)
629        return (vals is not None and vals.shape is not None
630                and len(vals.shape) != 1)
631
632    def _create_unique_key(self, dictionary, name, numb=0):
633        """
634        Create a unique key value for any dictionary to prevent overwriting
635        Recurses until a unique key value is found.
636
637        :param dictionary: A dictionary with any number of entries
638        :param name: The index of the item to be added to dictionary
639        :param numb: The number to be appended to the name, starts at 0
640        :return: The new name for the dictionary entry
641        """
642        if dictionary.get(name) is not None:
643            numb += 1
644            name = name.split("_")[0]
645            name += "_{0}".format(numb)
646            name = self._create_unique_key(dictionary, name, numb)
647        return name
648
649    def _get_unit(self, value):
650        """
651        Find the unit for a particular value within the h5py dictionary
652
653        :param value: attribute dictionary for a particular value set
654        :return: unit for the value passed to the method
655        """
656        unit = h5attr(value, u'units')
657        if unit is None:
658            unit = h5attr(value, u'unit')
659        return unit
Note: See TracBrowser for help on using the repository browser.