source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ cf29187

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since cf29187 was cf29187, checked in by krzywon, 6 years ago

Fix typo in writer, output new files, and fix reader to properly read in indices.

  • Property mode set to 100644
File size: 26.9 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from ..data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from ..data_info import combine_data_info_with_plottable
15from ..loader_exceptions import FileContentsException, DefaultReaderException
16from ..file_reader_base_class import FileReader, decode
17
18def h5attr(node, key, default=None):
19    return decode(node.attrs.get(key, default))
20
21class Reader(FileReader):
22    """
23    A class for reading in CanSAS v2.0 data files. The existing iteration opens
24    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
25    number of data sets may be present within the file and any dimensionality
26    of data may be used. Currently 1D and 2D SAS data sets are supported, but
27    future implementations will include 1D and 2D SESANS data.
28
29    Any number of SASdata sets may be present in a SASentry and the data within
30    can be either 1D I(Q) or 2D I(Qx, Qy).
31
32    Also supports reading NXcanSAS formatted HDF5 files
33
34    :Dependencies:
35        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
36    """
37
38    # CanSAS version
39    cansas_version = 2.0
40    # Data type name
41    type_name = "NXcanSAS"
42    # Wildcards
43    type = ["NXcanSAS HDF5 Files (*.h5)|*.h5|"]
44    # List of allowed extensions
45    ext = ['.h5', '.H5']
46    # Flag to bypass extension check
47    allow_all = True
48
49    def get_file_contents(self):
50        """
51        This is the general read method that all SasView data_loaders must have.
52
53        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
54        :return: List of Data1D/2D objects and/or a list of errors.
55        """
56        # Reinitialize when loading a new data file to reset all class variables
57        self.reset_state()
58
59        filename = self.f_open.name
60        self.f_open.close() # IO handled by h5py
61
62        # Check that the file exists
63        if os.path.isfile(filename):
64            basename = os.path.basename(filename)
65            _, extension = os.path.splitext(basename)
66            # If the file type is not allowed, return empty list
67            if extension in self.ext or self.allow_all:
68                # Load the data file
69                try:
70                    self.raw_data = h5py.File(filename, 'r')
71                except Exception as e:
72                    if extension not in self.ext:
73                        msg = "NXcanSAS HDF5 Reader could not load file {}".format(basename + extension)
74                        raise DefaultReaderException(msg)
75                    raise FileContentsException(e.message)
76                try:
77                    # Read in all child elements of top level SASroot
78                    self.read_children(self.raw_data, [])
79                    # Add the last data set to the list of outputs
80                    self.add_data_set()
81                except Exception as exc:
82                    raise FileContentsException(exc.message)
83                finally:
84                    # Close the data file
85                    self.raw_data.close()
86
87                for dataset in self.output:
88                    if isinstance(dataset, Data1D):
89                        if dataset.x.size < 5:
90                            self.output = []
91                            raise FileContentsException("Fewer than 5 data points found.")
92
93    def reset_state(self):
94        """
95        Create the reader object and define initial states for class variables
96        """
97        super(Reader, self).reset_state()
98        self.data1d = []
99        self.data2d = []
100        self.raw_data = None
101        self.errors = set()
102        self.logging = []
103        self.q_name = []
104        self.mask_name = u''
105        self.i_name = u''
106        self.i_node = u''
107        self.q_uncertainties = []
108        self.q_resolutions = []
109        self.i_uncertainties = u''
110        self.parent_class = u''
111        self.detector = Detector()
112        self.collimation = Collimation()
113        self.aperture = Aperture()
114        self.process = Process()
115        self.trans_spectrum = TransmissionSpectrum()
116
117    def read_children(self, data, parent_list):
118        """
119        A recursive method for stepping through the hierarchical data file.
120
121        :param data: h5py Group object of any kind
122        :param parent: h5py Group parent name
123        """
124
125        # Loop through each element of the parent and process accordingly
126        for key in data.keys():
127            # Get all information for the current key
128            value = data.get(key)
129            class_name = h5attr(value, u'canSAS_class')
130            if class_name is None:
131                class_name = h5attr(value, u'NX_class')
132            if class_name is not None:
133                class_prog = re.compile(class_name)
134            else:
135                class_prog = re.compile(value.name)
136
137            if isinstance(value, h5py.Group):
138                # Set parent class before recursion
139                last_parent_class = self.parent_class
140                self.parent_class = class_name
141                parent_list.append(key)
142                # If a new sasentry, store the current data sets and create
143                # a fresh Data1D/2D object
144                if class_prog.match(u'SASentry'):
145                    self.add_data_set(key)
146                elif class_prog.match(u'SASdata'):
147                    self._initialize_new_data_set(value)
148                    self._find_data_attributes(value)
149                # Recursion step to access data within the group
150                self.read_children(value, parent_list)
151                self.add_intermediate()
152                # Reset parent class when returning from recursive method
153                self.parent_class = last_parent_class
154                parent_list.remove(key)
155
156            elif isinstance(value, h5py.Dataset):
157                # If this is a dataset, store the data appropriately
158                data_set = data[key][:]
159                unit = self._get_unit(value)
160
161                for data_point in data_set:
162                    if isinstance(data_point, np.ndarray):
163                        if data_point.dtype.char == 'S':
164                            data_point = decode(bytes(data_point))
165                    else:
166                        data_point = decode(data_point)
167                    # Top Level Meta Data
168                    if key == u'definition':
169                        self.current_datainfo.meta_data['reader'] = data_point
170                    # Run
171                    elif key == u'run':
172                        self.current_datainfo.run.append(data_point)
173                        try:
174                            run_name = h5attr(value, 'name')
175                            run_dict = {data_point: run_name}
176                            self.current_datainfo.run_name = run_dict
177                        except Exception:
178                            pass
179                    # Title
180                    elif key == u'title':
181                        self.current_datainfo.title = data_point
182                    # Note
183                    elif key == u'SASnote':
184                        self.current_datainfo.notes.append(data_point)
185                    # Sample Information
186                    elif self.parent_class == u'SASsample':
187                        self.process_sample(data_point, key)
188                    # Instrumental Information
189                    elif (key == u'name'
190                          and self.parent_class == u'SASinstrument'):
191                        self.current_datainfo.instrument = data_point
192                    # Detector
193                    elif self.parent_class == u'SASdetector':
194                        self.process_detector(data_point, key, unit)
195                    # Collimation
196                    elif self.parent_class == u'SAScollimation':
197                        self.process_collimation(data_point, key, unit)
198                    # Aperture
199                    elif self.parent_class == u'SASaperture':
200                        self.process_aperture(data_point, key)
201                    # Process Information
202                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
203                        self.process_process(data_point, key)
204                    # Source
205                    elif self.parent_class == u'SASsource':
206                        self.process_source(data_point, key, unit)
207                    # Everything else goes in meta_data
208                    elif self.parent_class == u'SASdata':
209                        self.process_data_object(data_set, key, unit)
210                        break
211                    elif self.parent_class == u'SAStransmission_spectrum':
212                        self.process_trans_spectrum(data_set, key)
213                        break
214                    else:
215                        new_key = self._create_unique_key(
216                            self.current_datainfo.meta_data, key)
217                        self.current_datainfo.meta_data[new_key] = data_point
218
219            else:
220                # I don't know if this reachable code
221                self.errors.add("ShouldNeverHappenException")
222
223    def process_data_object(self, data_set, key, unit):
224        """
225        SASdata processor method
226        :param data_set: data from HDF5 file
227        :param key: canSAS_class attribute
228        :param unit: unit attribute
229        """
230        if key == self.i_name:
231            if isinstance(self.current_dataset, plottable_2D):
232                self.current_dataset.data = data_set
233                self.current_dataset.zaxis("Intensity", unit)
234            else:
235                self.current_dataset.y = data_set.flatten()
236                self.current_dataset.yaxis("Intensity", unit)
237        elif key == self.i_uncertainties:
238            if isinstance(self.current_dataset, plottable_2D):
239                self.current_dataset.err_data = data_set.flatten()
240            else:
241                self.current_dataset.dy = data_set.flatten()
242        elif key in self.q_name:
243            self.current_dataset.xaxis("Q", unit)
244            if isinstance(self.current_dataset, plottable_2D):
245                #FIXME: This is broken - need to properly handle 2D data
246                self.current_dataset.q = data_set.flatten()
247            else:
248                self.current_dataset.x = data_set.flatten()
249        elif key in self.q_uncertainties or key in self.q_resolutions:
250            # FIXME: This isn't right either.
251            if (len(self.q_resolutions) > 1
252                    and np.where(self.q_resolutions == key)[0] == 0):
253                self.current_dataset.dxw = data_set.flatten()
254            elif (len(self.q_resolutions) > 1
255                  and np.where(self.q_resolutions == key)[0] == 1):
256                self.current_dataset.dxl = data_set.flatten()
257            else:
258                self.current_dataset.dx = data_set.flatten()
259        elif key == u'Qy':
260            self.current_dataset.yaxis("Q_y", unit)
261            self.current_dataset.qy_data = data_set.flatten()
262        elif key == u'Qydev':
263            self.current_dataset.dqy_data = data_set.flatten()
264        elif key == u'Qx':
265            self.current_dataset.xaxis("Q_x", unit)
266            self.current_dataset.qx_data = data_set.flatten()
267        elif key == u'Qxdev':
268            self.current_dataset.dqx_data = data_set.flatten()
269        elif key == self.mask_name:
270            self.current_dataset.mask = data_set.flatten()
271        elif key == u'wavelength':
272            self.current_datainfo.source.wavelength = data_set[0]
273            self.current_datainfo.source.wavelength_unit = unit
274
275    def process_trans_spectrum(self, data_set, key):
276        """
277        SAStransmission_spectrum processor
278        :param data_set: data from HDF5 file
279        :param key: canSAS_class attribute
280        """
281        if key == u'T':
282            self.trans_spectrum.transmission = data_set.flatten()
283        elif key == u'Tdev':
284            self.trans_spectrum.transmission_deviation = data_set.flatten()
285        elif key == u'lambda':
286            self.trans_spectrum.wavelength = data_set.flatten()
287
288    def process_sample(self, data_point, key):
289        """
290        SASsample processor
291        :param data_point: Single point from an HDF5 data file
292        :param key: class name data_point was taken from
293        """
294        if key == u'Title':
295            self.current_datainfo.sample.name = data_point
296        elif key == u'name':
297            self.current_datainfo.sample.name = data_point
298        elif key == u'ID':
299            self.current_datainfo.sample.name = data_point
300        elif key == u'thickness':
301            self.current_datainfo.sample.thickness = data_point
302        elif key == u'temperature':
303            self.current_datainfo.sample.temperature = data_point
304        elif key == u'transmission':
305            self.current_datainfo.sample.transmission = data_point
306        elif key == u'x_position':
307            self.current_datainfo.sample.position.x = data_point
308        elif key == u'y_position':
309            self.current_datainfo.sample.position.y = data_point
310        elif key == u'pitch':
311            self.current_datainfo.sample.orientation.x = data_point
312        elif key == u'yaw':
313            self.current_datainfo.sample.orientation.y = data_point
314        elif key == u'roll':
315            self.current_datainfo.sample.orientation.z = data_point
316        elif key == u'details':
317            self.current_datainfo.sample.details.append(data_point)
318
319    def process_detector(self, data_point, key, unit):
320        """
321        SASdetector processor
322        :param data_point: Single point from an HDF5 data file
323        :param key: class name data_point was taken from
324        :param unit: unit attribute from data set
325        """
326        if key == u'name':
327            self.detector.name = data_point
328        elif key == u'SDD':
329            self.detector.distance = float(data_point)
330            self.detector.distance_unit = unit
331        elif key == u'slit_length':
332            self.detector.slit_length = float(data_point)
333            self.detector.slit_length_unit = unit
334        elif key == u'x_position':
335            self.detector.offset.x = float(data_point)
336            self.detector.offset_unit = unit
337        elif key == u'y_position':
338            self.detector.offset.y = float(data_point)
339            self.detector.offset_unit = unit
340        elif key == u'pitch':
341            self.detector.orientation.x = float(data_point)
342            self.detector.orientation_unit = unit
343        elif key == u'roll':
344            self.detector.orientation.z = float(data_point)
345            self.detector.orientation_unit = unit
346        elif key == u'yaw':
347            self.detector.orientation.y = float(data_point)
348            self.detector.orientation_unit = unit
349        elif key == u'beam_center_x':
350            self.detector.beam_center.x = float(data_point)
351            self.detector.beam_center_unit = unit
352        elif key == u'beam_center_y':
353            self.detector.beam_center.y = float(data_point)
354            self.detector.beam_center_unit = unit
355        elif key == u'x_pixel_size':
356            self.detector.pixel_size.x = float(data_point)
357            self.detector.pixel_size_unit = unit
358        elif key == u'y_pixel_size':
359            self.detector.pixel_size.y = float(data_point)
360            self.detector.pixel_size_unit = unit
361
362    def process_collimation(self, data_point, key, unit):
363        """
364        SAScollimation processor
365        :param data_point: Single point from an HDF5 data file
366        :param key: class name data_point was taken from
367        :param unit: unit attribute from data set
368        """
369        if key == u'distance':
370            self.collimation.length = data_point
371            self.collimation.length_unit = unit
372        elif key == u'name':
373            self.collimation.name = data_point
374
375    def process_aperture(self, data_point, key):
376        """
377        SASaperture processor
378        :param data_point: Single point from an HDF5 data file
379        :param key: class name data_point was taken from
380        """
381        if key == u'shape':
382            self.aperture.shape = data_point
383        elif key == u'x_gap':
384            self.aperture.size.x = data_point
385        elif key == u'y_gap':
386            self.aperture.size.y = data_point
387
388    def process_source(self, data_point, key, unit):
389        """
390        SASsource processor
391        :param data_point: Single point from an HDF5 data file
392        :param key: class name data_point was taken from
393        :param unit: unit attribute from data set
394        """
395        if key == u'incident_wavelength':
396            self.current_datainfo.source.wavelength = data_point
397            self.current_datainfo.source.wavelength_unit = unit
398        elif key == u'wavelength_max':
399            self.current_datainfo.source.wavelength_max = data_point
400            self.current_datainfo.source.wavelength_max_unit = unit
401        elif key == u'wavelength_min':
402            self.current_datainfo.source.wavelength_min = data_point
403            self.current_datainfo.source.wavelength_min_unit = unit
404        elif key == u'incident_wavelength_spread':
405            self.current_datainfo.source.wavelength_spread = data_point
406            self.current_datainfo.source.wavelength_spread_unit = unit
407        elif key == u'beam_size_x':
408            self.current_datainfo.source.beam_size.x = data_point
409            self.current_datainfo.source.beam_size_unit = unit
410        elif key == u'beam_size_y':
411            self.current_datainfo.source.beam_size.y = data_point
412            self.current_datainfo.source.beam_size_unit = unit
413        elif key == u'beam_shape':
414            self.current_datainfo.source.beam_shape = data_point
415        elif key == u'radiation':
416            self.current_datainfo.source.radiation = data_point
417
418    def process_process(self, data_point, key):
419        """
420        SASprocess processor
421        :param data_point: Single point from an HDF5 data file
422        :param key: class name data_point was taken from
423        """
424        term_match = re.compile(u'^term[0-9]+$')
425        if key == u'Title':  # CanSAS 2.0
426            self.process.name = data_point
427        elif key == u'name':  # NXcanSAS
428            self.process.name = data_point
429        elif key == u'description':
430            self.process.description = data_point
431        elif key == u'date':
432            self.process.date = data_point
433        elif term_match.match(key):
434            self.process.term.append(data_point)
435        else:
436            self.process.notes.append(data_point)
437
438    def add_intermediate(self):
439        """
440        This method stores any intermediate objects within the final data set
441        after fully reading the set.
442
443        :param parent: The NXclass name for the h5py Group object that just
444                       finished being processed
445        """
446
447        if self.parent_class == u'SASprocess':
448            self.current_datainfo.process.append(self.process)
449            self.process = Process()
450        elif self.parent_class == u'SASdetector':
451            self.current_datainfo.detector.append(self.detector)
452            self.detector = Detector()
453        elif self.parent_class == u'SAStransmission_spectrum':
454            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
455            self.trans_spectrum = TransmissionSpectrum()
456        elif self.parent_class == u'SAScollimation':
457            self.current_datainfo.collimation.append(self.collimation)
458            self.collimation = Collimation()
459        elif self.parent_class == u'SASaperture':
460            self.collimation.aperture.append(self.aperture)
461            self.aperture = Aperture()
462        elif self.parent_class == u'SASdata':
463            if isinstance(self.current_dataset, plottable_2D):
464                self.data2d.append(self.current_dataset)
465            elif isinstance(self.current_dataset, plottable_1D):
466                self.data1d.append(self.current_dataset)
467
468    def final_data_cleanup(self):
469        """
470        Does some final cleanup and formatting on self.current_datainfo and
471        all data1D and data2D objects and then combines the data and info into
472        Data1D and Data2D objects
473        """
474        # Type cast data arrays to float64
475        if len(self.current_datainfo.trans_spectrum) > 0:
476            spectrum_list = []
477            for spectrum in self.current_datainfo.trans_spectrum:
478                spectrum.transmission = spectrum.transmission.astype(np.float64)
479                spectrum.transmission_deviation = \
480                    spectrum.transmission_deviation.astype(np.float64)
481                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
482                if len(spectrum.transmission) > 0:
483                    spectrum_list.append(spectrum)
484            self.current_datainfo.trans_spectrum = spectrum_list
485
486        # Append errors to dataset and reset class errors
487        self.current_datainfo.errors = self.errors
488        self.errors.clear()
489
490        # Combine all plottables with datainfo and append each to output
491        # Type cast data arrays to float64 and find min/max as appropriate
492        for dataset in self.data2d:
493            zeros = np.ones(dataset.data.size, dtype=bool)
494            try:
495                for i in range(0, dataset.mask.size - 1):
496                    zeros[i] = dataset.mask[i]
497            except:
498                self.errors.add(sys.exc_value)
499            dataset.mask = zeros
500            # Calculate the actual Q matrix
501            try:
502                if dataset.q_data.size <= 1:
503                    dataset.q_data = np.sqrt(dataset.qx_data
504                                             * dataset.qx_data
505                                             + dataset.qy_data
506                                             * dataset.qy_data)
507            except:
508                dataset.q_data = None
509
510            if dataset.data.ndim == 2:
511                (n_rows, n_cols) = dataset.data.shape
512                dataset.y_bins = dataset.qy_data[0::n_cols]
513                dataset.x_bins = dataset.qx_data[0::n_rows]
514                dataset.data = dataset.data.flatten()
515            self.current_dataset = dataset
516            self.send_to_output()
517
518        for dataset in self.data1d:
519            self.current_dataset = dataset
520            self.send_to_output()
521
522    def add_data_set(self, key=""):
523        """
524        Adds the current_dataset to the list of outputs after preforming final
525        processing on the data and then calls a private method to generate a
526        new data set.
527
528        :param key: NeXus group name for current tree level
529        """
530
531        if self.current_datainfo and self.current_dataset:
532            self.final_data_cleanup()
533        self.data1d = []
534        self.data2d = []
535        self.current_datainfo = DataInfo()
536
537    def _initialize_new_data_set(self, value=None):
538        """
539        A private class method to generate a new 1D or 2D data object based on
540        the type of data within the set. Outside methods should call
541        add_data_set() to be sure any existing data is stored properly.
542
543        :param parent_list: List of names of parent elements
544        """
545        if self._is2d(value):
546            self.current_dataset = plottable_2D()
547        else:
548            x = np.array(0)
549            y = np.array(0)
550            self.current_dataset = plottable_1D(x, y)
551        self.current_datainfo.filename = self.raw_data.filename
552        self.mask_name = ""
553        self.i_name = ""
554        self.i_node = ""
555        self.q_name = []
556        self.q_uncertainties = []
557        self.q_resolutions = []
558        self.i_uncertainties = ""
559
560    @staticmethod
561    def check_is_list_or_array(iterable):
562        try:
563            iter(iterable)
564            if (not isinstance(iterable, np.ndarray)) or (isinstance(iterable, str)
565                    or isinstance(iterable, unicode)):
566                raise TypeError
567        except TypeError:
568            iterable = iterable.split(",")
569        return iterable
570
571    def _find_data_attributes(self, value):
572        """
573        A class to find the indices for Q, the name of the Qdev and Idev, and
574        the name of the mask.
575        :param value: SASdata/NXdata HDF5 Group
576        """
577        attrs = value.attrs
578        signal = attrs.get("signal", "I")
579        i_axes = attrs.get("I_axes", ["Q"])
580        q_indices = attrs.get("Q_indices", [0])
581        q_indices = map(int, self.check_is_list_or_array(q_indices))
582        i_axes = self.check_is_list_or_array(i_axes)
583        keys = value.keys()
584        self.mask_name = attrs.get("mask")
585        for val in q_indices:
586            self.q_name.append(i_axes[val])
587        self.i_name = signal
588        self.i_node = value.get(self.i_name)
589        for item in self.q_name:
590            if item in keys:
591                q_vals = value.get(item)
592                if q_vals.attrs.get("uncertainties") is not None:
593                    self.q_uncertainties = q_vals.attrs.get("uncertainties")
594                elif q_vals.attrs.get("uncertainty") is not None:
595                    self.q_uncertainties = q_vals.attrs.get("uncertainty")
596                if isinstance(self.q_uncertainties, str) is not None:
597                    self.q_uncertainties = [self.q_uncertainties]
598                if q_vals.attrs.get("resolutions") is not None:
599                    self.q_resolutions = q_vals.attrs.get("resolutions")
600                if isinstance(self.q_resolutions, str):
601                    self.q_resolutions = self.q_resolutions.split(",")
602        if self.i_name in keys:
603            i_vals = value.get(self.i_name)
604            self.i_uncertainties = i_vals.attrs.get("uncertainties")
605            if self.i_uncertainties is None:
606                self.i_uncertainties = i_vals.attrs.get("uncertainty")
607
608    def _is2d(self, value, basename="I"):
609        """
610        A private class to determine if the data set is 1d or 2d.
611
612        :param parent_list: List of parents nodes in the HDF5 file
613        :param basename: Approximate name of an entry to search for
614        :return: True if 2D, otherwise false
615        """
616
617        vals = value.get(basename)
618        return (vals is not None and vals.shape is not None
619                and len(vals.shape) != 1)
620
621    def _create_unique_key(self, dictionary, name, numb=0):
622        """
623        Create a unique key value for any dictionary to prevent overwriting
624        Recurses until a unique key value is found.
625
626        :param dictionary: A dictionary with any number of entries
627        :param name: The index of the item to be added to dictionary
628        :param numb: The number to be appended to the name, starts at 0
629        :return: The new name for the dictionary entry
630        """
631        if dictionary.get(name) is not None:
632            numb += 1
633            name = name.split("_")[0]
634            name += "_{0}".format(numb)
635            name = self._create_unique_key(dictionary, name, numb)
636        return name
637
638    def _get_unit(self, value):
639        """
640        Find the unit for a particular value within the h5py dictionary
641
642        :param value: attribute dictionary for a particular value set
643        :return: unit for the value passed to the method
644        """
645        unit = h5attr(value, u'units')
646        if unit is None:
647            unit = h5attr(value, u'unit')
648        return unit
Note: See TracBrowser for help on using the repository browser.