source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ 2651724

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since 2651724 was 2651724, checked in by krzywon, 7 years ago

Get the names specified by the file when loading in NXcanSAS data.

  • Property mode set to 100644
File size: 27.0 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from sas.sascalc.dataloader.data_info import combine_data_info_with_plottable
15
16
17class Reader():
18    """
19    A class for reading in CanSAS v2.0 data files. The existing iteration opens
20    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
21    number of data sets may be present within the file and any dimensionality
22    of data may be used. Currently 1D and 2D SAS data sets are supported, but
23    future implementations will include 1D and 2D SESANS data.
24
25    Any number of SASdata sets may be present in a SASentry and the data within
26    can be either 1D I(Q) or 2D I(Qx, Qy).
27
28    Also supports reading NXcanSAS formatted HDF5 files
29
30    :Dependencies:
31        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
32    """
33
34    # CanSAS version
35    cansas_version = 2.0
36    # Logged warnings or messages
37    logging = None
38    # List of errors for the current data set
39    errors = None
40    # Raw file contents to be processed
41    raw_data = None
42    # Data info currently being read in
43    current_datainfo = None
44    # SASdata set currently being read in
45    current_dataset = None
46    # List of plottable1D objects that should be linked to the current_datainfo
47    data1d = None
48    # List of plottable2D objects that should be linked to the current_datainfo
49    data2d = None
50    # Data type name
51    type_name = "CanSAS 2.0"
52    # Wildcards
53    type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"]
54    # List of allowed extensions
55    ext = ['.h5', '.H5']
56    # Flag to bypass extension check
57    allow_all = True
58    # List of files to return
59    output = None
60
61    def read(self, filename):
62        """
63        This is the general read method that all SasView data_loaders must have.
64
65        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
66        :return: List of Data1D/2D objects and/or a list of errors.
67        """
68        # Reinitialize when loading a new data file to reset all class variables
69        self.reset_class_variables()
70        # Check that the file exists
71        if os.path.isfile(filename):
72            basename = os.path.basename(filename)
73            _, extension = os.path.splitext(basename)
74            # If the file type is not allowed, return empty list
75            if extension in self.ext or self.allow_all:
76                # Load the data file
77                self.raw_data = h5py.File(filename, 'r')
78                # Read in all child elements of top level SASroot
79                self.read_children(self.raw_data, [])
80                # Add the last data set to the list of outputs
81                self.add_data_set()
82                # Close the data file
83                self.raw_data.close()
84        # Return data set(s)
85        return self.output
86
87    def reset_class_variables(self):
88        """
89        Create the reader object and define initial states for class variables
90        """
91        self.current_datainfo = None
92        self.current_dataset = None
93        self.data1d = []
94        self.data2d = []
95        self.raw_data = None
96        self.errors = set()
97        self.logging = []
98        self.output = []
99        self.q_name = []
100        self.mask_name = u''
101        self.i_name = u''
102        self.i_node = u''
103        self.q_uncertainties = u''
104        self.q_resolutions = u''
105        self.i_uncertainties = u''
106        self.parent_class = u''
107        self.detector = Detector()
108        self.collimation = Collimation()
109        self.aperture = Aperture()
110        self.process = Process()
111        self.trans_spectrum = TransmissionSpectrum()
112
113    def read_children(self, data, parent_list):
114        """
115        A recursive method for stepping through the hierarchical data file.
116
117        :param data: h5py Group object of any kind
118        :param parent: h5py Group parent name
119        """
120
121        # Loop through each element of the parent and process accordingly
122        for key in data.keys():
123            # Get all information for the current key
124            value = data.get(key)
125            if value.attrs.get(u'canSAS_class') is not None:
126                class_name = value.attrs.get(u'canSAS_class')
127            elif value.attrs.get(u'NX_class') is not None:
128                class_name = value.attrs.get(u'NX_class')
129            else:
130                class_name = key
131            if class_name is not None:
132                class_prog = re.compile(class_name)
133            else:
134                class_prog = re.compile(value.name)
135
136            if isinstance(value, h5py.Group):
137                self.parent_class = class_name
138                parent_list.append(key)
139                # If a new sasentry, store the current data sets and create
140                # a fresh Data1D/2D object
141                if class_prog.match(u'SASentry'):
142                    self.add_data_set(key)
143                elif class_prog.match(u'SASdata'):
144                    self._find_data_attributes(value)
145                    self._initialize_new_data_set(parent_list)
146                # Recursion step to access data within the group
147                self.read_children(value, parent_list)
148                self.add_intermediate()
149                parent_list.remove(key)
150
151            elif isinstance(value, h5py.Dataset):
152                # If this is a dataset, store the data appropriately
153                data_set = data[key][:]
154                unit = self._get_unit(value)
155
156                for data_point in data_set:
157                    # Top Level Meta Data
158                    if key == u'definition':
159                        self.current_datainfo.meta_data['reader'] = data_point
160                    # Run
161                    elif key == u'run':
162                        self.current_datainfo.run.append(data_point)
163                        try:
164                            run_name = value.attrs['name']
165                            run_dict = {data_point: run_name}
166                            self.current_datainfo.run_name = run_dict
167                        except:
168                            pass
169                    # Title
170                    elif key == u'title':
171                        self.current_datainfo.title = data_point
172                    # Note
173                    elif key == u'SASnote':
174                        self.current_datainfo.notes.append(data_point)
175                    # Sample Information
176                    elif self.parent_class == u'SASsample':
177                        self.process_sample(data_point, key)
178                    # Instrumental Information
179                    elif (key == u'name'
180                          and self.parent_class == u'SASinstrument'):
181                        self.current_datainfo.instrument = data_point
182                    # Detector
183                    elif self.parent_class == u'SASdetector':
184                        self.process_detector(data_point, key, unit)
185                    # Collimation
186                    elif self.parent_class == u'SAScollimation':
187                        self.process_collimation(data_point, key, unit)
188                    # Aperture
189                    elif self.parent_class == u'SASaperture':
190                        self.process_aperture(data_point, key)
191                    # Process Information
192                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
193                        self.process_process(data_point, key)
194                    # Source
195                    elif self.parent_class == u'SASsource':
196                        self.process_source(data_point, key, unit)
197                    # Everything else goes in meta_data
198                    elif self.parent_class == u'SASdata':
199                        self.process_data_object(data_set, key, unit)
200                        break
201                    elif self.parent_class == u'SAStransmission_spectrum':
202                        self.process_trans_spectrum(data_set, key)
203                        break
204                    else:
205                        new_key = self._create_unique_key(
206                            self.current_datainfo.meta_data, key)
207                        self.current_datainfo.meta_data[new_key] = data_point
208
209            else:
210                # I don't know if this reachable code
211                self.errors.add("ShouldNeverHappenException")
212
213    def process_data_object(self, data_set, key, unit):
214        """
215        SASdata processor method
216        :param data_set: data from HDF5 file
217        :param key: canSAS_class attribute
218        :param unit: unit attribute
219        """
220        # FIXME: check attributes of SASdata for Q, dQ, Mask, etc.
221        if key == self.i_name:
222            if isinstance(self.current_dataset, plottable_2D):
223                self.current_dataset.data = data_set
224                self.current_dataset.zaxis("Intensity", unit)
225            else:
226                self.current_dataset.y = data_set.flatten()
227                self.current_dataset.yaxis("Intensity", unit)
228        elif key == self.i_uncertainties:
229            if isinstance(self.current_dataset, plottable_2D):
230                self.current_dataset.err_data = data_set.flatten()
231            else:
232                self.current_dataset.dy = data_set.flatten()
233        elif key in self.q_name:
234            self.current_dataset.xaxis("Q", unit)
235            if isinstance(self.current_dataset, plottable_2D):
236                self.current_dataset.q = data_set.flatten()
237            else:
238                self.current_dataset.x = data_set.flatten()
239        elif key in self.q_resolutions:
240            if key == u'dQw':
241                self.current_dataset.dxw = data_set.flatten()
242            elif key == u'dQl':
243                self.current_dataset.dxl = data_set.flatten()
244            else:
245                self.current_dataset.dx = data_set.flatten()
246        elif key == u'Qy':
247            self.current_dataset.yaxis("Q_y", unit)
248            self.current_dataset.qy_data = data_set.flatten()
249        elif key == u'Qydev':
250            self.current_dataset.dqy_data = data_set.flatten()
251        elif key == u'Qx':
252            self.current_dataset.xaxis("Q_x", unit)
253            self.current_dataset.qx_data = data_set.flatten()
254        elif key == u'Qxdev':
255            self.current_dataset.dqx_data = data_set.flatten()
256        elif key == self.mask_name:
257            self.current_dataset.mask = data_set.flatten()
258        elif key == u'wavelength':
259            self.current_datainfo.source.wavelength = data_set[0]
260            self.current_datainfo.source.wavelength_unit = unit
261
262    def process_trans_spectrum(self, data_set, key):
263        """
264        SAStransmission_spectrum processor
265        :param data_set: data from HDF5 file
266        :param key: canSAS_class attribute
267        """
268        if key == u'T':
269            self.trans_spectrum.transmission = data_set.flatten()
270        elif key == u'Tdev':
271            self.trans_spectrum.transmission_deviation = data_set.flatten()
272        elif key == u'lambda':
273            self.trans_spectrum.wavelength = data_set.flatten()
274
275    def process_sample(self, data_point, key):
276        """
277        SASsample processor
278        :param data_point: Single point from an HDF5 data file
279        :param key: class name data_point was taken from
280        """
281        if key == u'Title':
282            self.current_datainfo.sample.name = data_point
283        elif key == u'name':
284            self.current_datainfo.sample.name = data_point
285        elif key == u'ID':
286            self.current_datainfo.sample.name = data_point
287        elif key == u'thickness':
288            self.current_datainfo.sample.thickness = data_point
289        elif key == u'temperature':
290            self.current_datainfo.sample.temperature = data_point
291        elif key == u'transmission':
292            self.current_datainfo.sample.transmission = data_point
293        elif key == u'x_position':
294            self.current_datainfo.sample.position.x = data_point
295        elif key == u'y_position':
296            self.current_datainfo.sample.position.y = data_point
297        elif key == u'pitch':
298            self.current_datainfo.sample.orientation.x = data_point
299        elif key == u'yaw':
300            self.current_datainfo.sample.orientation.y = data_point
301        elif key == u'roll':
302            self.current_datainfo.sample.orientation.z = data_point
303        elif key == u'details':
304            self.current_datainfo.sample.details.append(data_point)
305
306    def process_detector(self, data_point, key, unit):
307        """
308        SASdetector processor
309        :param data_point: Single point from an HDF5 data file
310        :param key: class name data_point was taken from
311        :param unit: unit attribute from data set
312        """
313        if key == u'name':
314            self.detector.name = data_point
315        elif key == u'SDD':
316            self.detector.distance = float(data_point)
317            self.detector.distance_unit = unit
318        elif key == u'slit_length':
319            self.detector.slit_length = float(data_point)
320            self.detector.slit_length_unit = unit
321        elif key == u'x_position':
322            self.detector.offset.x = float(data_point)
323            self.detector.offset_unit = unit
324        elif key == u'y_position':
325            self.detector.offset.y = float(data_point)
326            self.detector.offset_unit = unit
327        elif key == u'pitch':
328            self.detector.orientation.x = float(data_point)
329            self.detector.orientation_unit = unit
330        elif key == u'roll':
331            self.detector.orientation.z = float(data_point)
332            self.detector.orientation_unit = unit
333        elif key == u'yaw':
334            self.detector.orientation.y = float(data_point)
335            self.detector.orientation_unit = unit
336        elif key == u'beam_center_x':
337            self.detector.beam_center.x = float(data_point)
338            self.detector.beam_center_unit = unit
339        elif key == u'beam_center_y':
340            self.detector.beam_center.y = float(data_point)
341            self.detector.beam_center_unit = unit
342        elif key == u'x_pixel_size':
343            self.detector.pixel_size.x = float(data_point)
344            self.detector.pixel_size_unit = unit
345        elif key == u'y_pixel_size':
346            self.detector.pixel_size.y = float(data_point)
347            self.detector.pixel_size_unit = unit
348
349    def process_collimation(self, data_point, key, unit):
350        """
351        SAScollimation processor
352        :param data_point: Single point from an HDF5 data file
353        :param key: class name data_point was taken from
354        :param unit: unit attribute from data set
355        """
356        if key == u'distance':
357            self.collimation.length = data_point
358            self.collimation.length_unit = unit
359        elif key == u'name':
360            self.collimation.name = data_point
361
362    def process_aperture(self, data_point, key):
363        """
364        SASaperture processor
365        :param data_point: Single point from an HDF5 data file
366        :param key: class name data_point was taken from
367        """
368        if key == u'shape':
369            self.aperture.shape = data_point
370        elif key == u'x_gap':
371            self.aperture.size.x = data_point
372        elif key == u'y_gap':
373            self.aperture.size.y = data_point
374
375    def process_source(self, data_point, key, unit):
376        """
377        SASsource processor
378        :param data_point: Single point from an HDF5 data file
379        :param key: class name data_point was taken from
380        :param unit: unit attribute from data set
381        """
382        if key == u'incident_wavelength':
383            self.current_datainfo.source.wavelength = data_point
384            self.current_datainfo.source.wavelength_unit = unit
385        elif key == u'wavelength_max':
386            self.current_datainfo.source.wavelength_max = data_point
387            self.current_datainfo.source.wavelength_max_unit = unit
388        elif key == u'wavelength_min':
389            self.current_datainfo.source.wavelength_min = data_point
390            self.current_datainfo.source.wavelength_min_unit = unit
391        elif key == u'incident_wavelength_spread':
392            self.current_datainfo.source.wavelength_spread = data_point
393            self.current_datainfo.source.wavelength_spread_unit = unit
394        elif key == u'beam_size_x':
395            self.current_datainfo.source.beam_size.x = data_point
396            self.current_datainfo.source.beam_size_unit = unit
397        elif key == u'beam_size_y':
398            self.current_datainfo.source.beam_size.y = data_point
399            self.current_datainfo.source.beam_size_unit = unit
400        elif key == u'beam_shape':
401            self.current_datainfo.source.beam_shape = data_point
402        elif key == u'radiation':
403            self.current_datainfo.source.radiation = data_point
404
405    def process_process(self, data_point, key):
406        """
407        SASprocess processor
408        :param data_point: Single point from an HDF5 data file
409        :param key: class name data_point was taken from
410        """
411        if key == u'Title':  # CanSAS 2.0
412            self.process.name = data_point
413        elif key == u'name':  # NXcanSAS
414            self.process.name = data_point
415        elif key == u'description':
416            self.process.description = data_point
417        elif key == u'date':
418            self.process.date = data_point
419        elif key == u'term':
420            self.process.term = data_point
421        else:
422            self.process.notes.append(data_point)
423
424    def add_intermediate(self):
425        """
426        This method stores any intermediate objects within the final data set
427        after fully reading the set.
428
429        :param parent: The NXclass name for the h5py Group object that just
430                       finished being processed
431        """
432
433        if self.parent_class == u'SASprocess':
434            self.current_datainfo.process.append(self.process)
435            self.process = Process()
436        elif self.parent_class == u'SASdetector':
437            self.current_datainfo.detector.append(self.detector)
438            self.detector = Detector()
439        elif self.parent_class == u'SAStransmission_spectrum':
440            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
441            self.trans_spectrum = TransmissionSpectrum()
442        elif self.parent_class == u'SAScollimation':
443            self.current_datainfo.collimation.append(self.collimation)
444            self.collimation = Collimation()
445        elif self.parent_class == u'SASaperture':
446            self.collimation.aperture.append(self.aperture)
447            self.aperture = Aperture()
448        elif self.parent_class == u'SASdata':
449            if isinstance(self.current_dataset, plottable_2D):
450                self.data2d.append(self.current_dataset)
451            elif isinstance(self.current_dataset, plottable_1D):
452                self.data1d.append(self.current_dataset)
453
454    def final_data_cleanup(self):
455        """
456        Does some final cleanup and formatting on self.current_datainfo and
457        all data1D and data2D objects and then combines the data and info into
458        Data1D and Data2D objects
459        """
460
461        # Type cast data arrays to float64
462        if len(self.current_datainfo.trans_spectrum) > 0:
463            spectrum_list = []
464            for spectrum in self.current_datainfo.trans_spectrum:
465                spectrum.transmission = np.delete(spectrum.transmission, [0])
466                spectrum.transmission = spectrum.transmission.astype(np.float64)
467                spectrum.transmission_deviation = np.delete(
468                    spectrum.transmission_deviation, [0])
469                spectrum.transmission_deviation = \
470                    spectrum.transmission_deviation.astype(np.float64)
471                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
472                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
473                if len(spectrum.transmission) > 0:
474                    spectrum_list.append(spectrum)
475            self.current_datainfo.trans_spectrum = spectrum_list
476
477        # Append errors to dataset and reset class errors
478        self.current_datainfo.errors = self.errors
479        self.errors.clear()
480
481        # Combine all plottables with datainfo and append each to output
482        # Type cast data arrays to float64 and find min/max as appropriate
483        for dataset in self.data2d:
484            dataset.data = dataset.data.astype(np.float64)
485            dataset.err_data = dataset.err_data.astype(np.float64)
486            if dataset.qx_data is not None:
487                dataset.xmin = np.min(dataset.qx_data)
488                dataset.xmax = np.max(dataset.qx_data)
489                dataset.qx_data = dataset.qx_data.astype(np.float64)
490            if dataset.dqx_data is not None:
491                dataset.dqx_data = dataset.dqx_data.astype(np.float64)
492            if dataset.qy_data is not None:
493                dataset.ymin = np.min(dataset.qy_data)
494                dataset.ymax = np.max(dataset.qy_data)
495                dataset.qy_data = dataset.qy_data.astype(np.float64)
496            if dataset.dqy_data is not None:
497                dataset.dqy_data = dataset.dqy_data.astype(np.float64)
498            if dataset.q_data is not None:
499                dataset.q_data = dataset.q_data.astype(np.float64)
500            zeros = np.ones(dataset.data.size, dtype=bool)
501            try:
502                for i in range(0, dataset.mask.size - 1):
503                    zeros[i] = dataset.mask[i]
504            except:
505                self.errors.add(sys.exc_value)
506            dataset.mask = zeros
507            # Calculate the actual Q matrix
508            try:
509                if dataset.q_data.size <= 1:
510                    dataset.q_data = np.sqrt(dataset.qx_data
511                                             * dataset.qx_data
512                                             + dataset.qy_data
513                                             * dataset.qy_data)
514            except:
515                dataset.q_data = None
516
517            if dataset.data.ndim == 2:
518                (n_rows, n_cols) = dataset.data.shape
519                dataset.y_bins = dataset.qy_data[0::n_cols]
520                dataset.x_bins = dataset.qx_data[:n_cols]
521                dataset.data = dataset.data.flatten()
522
523            final_dataset = combine_data_info_with_plottable(
524                dataset, self.current_datainfo)
525            self.output.append(final_dataset)
526
527        for dataset in self.data1d:
528            if dataset.x is not None:
529                dataset.x = dataset.x.astype(np.float64)
530                dataset.xmin = np.min(dataset.x)
531                dataset.xmax = np.max(dataset.x)
532            if dataset.y is not None:
533                dataset.y = dataset.y.astype(np.float64)
534                dataset.ymin = np.min(dataset.y)
535                dataset.ymax = np.max(dataset.y)
536            if dataset.dx is not None:
537                dataset.dx = dataset.dx.astype(np.float64)
538            if dataset.dxl is not None:
539                dataset.dxl = dataset.dxl.astype(np.float64)
540            if dataset.dxw is not None:
541                dataset.dxw = dataset.dxw.astype(np.float64)
542            if dataset.dy is not None:
543                dataset.dy = dataset.dy.astype(np.float64)
544            final_dataset = combine_data_info_with_plottable(
545                dataset, self.current_datainfo)
546            self.output.append(final_dataset)
547
548    def add_data_set(self, key=""):
549        """
550        Adds the current_dataset to the list of outputs after preforming final
551        processing on the data and then calls a private method to generate a
552        new data set.
553
554        :param key: NeXus group name for current tree level
555        """
556
557        if self.current_datainfo and self.current_dataset:
558            self.final_data_cleanup()
559        self.data1d = []
560        self.data2d = []
561        self.current_datainfo = DataInfo()
562
563    def _initialize_new_data_set(self, parent_list=None):
564        """
565        A private class method to generate a new 1D or 2D data object based on
566        the type of data within the set. Outside methods should call
567        add_data_set() to be sure any existing data is stored properly.
568
569        :param parent_list: List of names of parent elements
570        """
571
572        if parent_list is None:
573            parent_list = []
574        if self._find_intermediate(parent_list, "Qx"):
575            self.current_dataset = plottable_2D()
576        else:
577            x = np.array(0)
578            y = np.array(0)
579            self.current_dataset = plottable_1D(x, y)
580        self.current_datainfo.filename = self.raw_data.filename
581
582    def _find_data_attributes(self, value):
583        """
584        A class to find the indices for Q, the name of the Qdev and Idev, and
585        the name of the mask.
586        :param value: SASdata/NXdata HDF5 Group
587        """
588        attrs = value.attrs
589        signal = attrs.get("signal")
590
591        i_axes = np.array(attrs.get("I_axes").split(","))
592        q_indices = np.array(attrs.get("Q_indices"))
593        self.mask_name = attrs.get("mask")
594        keys = value.keys()
595        for val in q_indices:
596            self.q_name.append(i_axes[val])
597        self.i_name = signal
598        self.i_node = value.get(self.i_name)
599        for item in self.q_name:
600            if item in keys:
601                q_vals = value.get(item)
602                self.q_uncertainties = q_vals.attrs.get("uncertainty")
603                self.q_resolutions = q_vals.attrs.get("resolution")
604        if self.i_name in keys:
605            i_vals = value.get(self.i_name)
606            self.i_uncertainties = i_vals.attrs.get("uncertainty")
607
608    def _find_intermediate(self, parent_list, basename=""):
609        """
610        A private class used to find an entry by either using a direct key or
611        knowing the approximate basename.
612
613        :param parent_list: List of parents nodes in the HDF5 file
614        :param basename: Approximate name of an entry to search for
615        :return:
616        """
617
618        entry = False
619        key_prog = re.compile(basename)
620        top = self.raw_data
621        for parent in parent_list:
622            top = top.get(parent)
623        for key in top.keys():
624            if key_prog.match(key):
625                entry = True
626                break
627        return entry
628
629    def _create_unique_key(self, dictionary, name, numb=0):
630        """
631        Create a unique key value for any dictionary to prevent overwriting
632        Recurses until a unique key value is found.
633
634        :param dictionary: A dictionary with any number of entries
635        :param name: The index of the item to be added to dictionary
636        :param numb: The number to be appended to the name, starts at 0
637        :return: The new name for the dictionary entry
638        """
639        if dictionary.get(name) is not None:
640            numb += 1
641            name = name.split("_")[0]
642            name += "_{0}".format(numb)
643            name = self._create_unique_key(dictionary, name, numb)
644        return name
645
646    def _get_unit(self, value):
647        """
648        Find the unit for a particular value within the h5py dictionary
649
650        :param value: attribute dictionary for a particular value set
651        :return: unit for the value passed to the method
652        """
653        unit = value.attrs.get(u'units')
654        if unit is None:
655            unit = value.attrs.get(u'unit')
656        # Convert the unit formats
657        if unit == "1/A":
658            unit = "A^{-1}"
659        elif unit == "1/cm":
660            unit = "cm^{-1}"
661        return unit
Note: See TracBrowser for help on using the repository browser.