source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ 0d93464

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since 0d93464 was 0d93464, checked in by krzywon, 7 years ago

Organize NXcanSAS reader to minimize if statements.

  • Property mode set to 100644
File size: 25.7 KB
Line 
1"""
2    CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3"""
4
5import h5py
6import numpy as np
7import re
8import os
9import sys
10
11from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D,\
12    Data1D, Data2D, DataInfo, Process, Aperture, Collimation, \
13    TransmissionSpectrum, Detector
14from sas.sascalc.dataloader.data_info import combine_data_info_with_plottable
15
16
17class Reader():
18    """
19    A class for reading in CanSAS v2.0 data files. The existing iteration opens
20    Mantid generated HDF5 formatted files with file extension .h5/.H5. Any
21    number of data sets may be present within the file and any dimensionality
22    of data may be used. Currently 1D and 2D SAS data sets are supported, but
23    future implementations will include 1D and 2D SESANS data.
24
25    Any number of SASdata sets may be present in a SASentry and the data within
26    can be either 1D I(Q) or 2D I(Qx, Qy).
27
28    Also supports reading NXcanSAS formatted HDF5 files
29
30    :Dependencies:
31        The CanSAS HDF5 reader requires h5py => v2.5.0 or later.
32    """
33
34    # CanSAS version
35    cansas_version = 2.0
36    # Logged warnings or messages
37    logging = None
38    # List of errors for the current data set
39    errors = None
40    # Raw file contents to be processed
41    raw_data = None
42    # Data info currently being read in
43    current_datainfo = None
44    # SASdata set currently being read in
45    current_dataset = None
46    # List of plottable1D objects that should be linked to the current_datainfo
47    data1d = None
48    # List of plottable2D objects that should be linked to the current_datainfo
49    data2d = None
50    # Data type name
51    type_name = "CanSAS 2.0"
52    # Wildcards
53    type = ["CanSAS 2.0 HDF5 Files (*.h5)|*.h5"]
54    # List of allowed extensions
55    ext = ['.h5', '.H5']
56    # Flag to bypass extension check
57    allow_all = True
58    # List of files to return
59    output = None
60
61    def read(self, filename):
62        """
63        This is the general read method that all SasView data_loaders must have.
64
65        :param filename: A path for an HDF5 formatted CanSAS 2D data file.
66        :return: List of Data1D/2D objects and/or a list of errors.
67        """
68        # Reinitialize when loading a new data file to reset all class variables
69        self.reset_class_variables()
70        # Check that the file exists
71        if os.path.isfile(filename):
72            basename = os.path.basename(filename)
73            _, extension = os.path.splitext(basename)
74            # If the file type is not allowed, return empty list
75            if extension in self.ext or self.allow_all:
76                # Load the data file
77                self.raw_data = h5py.File(filename, 'r')
78                # Read in all child elements of top level SASroot
79                self.read_children(self.raw_data, [])
80                # Add the last data set to the list of outputs
81                self.add_data_set()
82                # Close the data file
83                self.raw_data.close()
84        # Return data set(s)
85        return self.output
86
87    def reset_class_variables(self):
88        """
89        Create the reader object and define initial states for class variables
90        """
91        self.current_datainfo = None
92        self.current_dataset = None
93        self.data1d = []
94        self.data2d = []
95        self.raw_data = None
96        self.errors = set()
97        self.logging = []
98        self.output = []
99        self.parent_class = u''
100        self.detector = Detector()
101        self.collimation = Collimation()
102        self.aperture = Aperture()
103        self.process = Process()
104        self.trans_spectrum = TransmissionSpectrum()
105
106    def read_children(self, data, parent_list):
107        """
108        A recursive method for stepping through the hierarchical data file.
109
110        :param data: h5py Group object of any kind
111        :param parent: h5py Group parent name
112        """
113
114        # Loop through each element of the parent and process accordingly
115        for key in data.keys():
116            # Get all information for the current key
117            value = data.get(key)
118            if value.attrs.get(u'canSAS_class') is not None:
119                class_name = value.attrs.get(u'canSAS_class')
120            elif value.attrs.get(u'NX_class') is not None:
121                class_name = value.attrs.get(u'NX_class')
122            else:
123                class_name = key
124            if class_name is not None:
125                class_prog = re.compile(class_name)
126            else:
127                class_prog = re.compile(value.name)
128
129            if isinstance(value, h5py.Group):
130                self.parent_class = class_name
131                parent_list.append(key)
132                # If a new sasentry, store the current data sets and create
133                # a fresh Data1D/2D object
134                if class_prog.match(u'SASentry'):
135                    self.add_data_set(key)
136                elif class_prog.match(u'SASdata'):
137                    self._initialize_new_data_set(parent_list)
138                # Recursion step to access data within the group
139                self.read_children(value, parent_list)
140                self.add_intermediate()
141                parent_list.remove(key)
142
143            elif isinstance(value, h5py.Dataset):
144                # If this is a dataset, store the data appropriately
145                data_set = data[key][:]
146                unit = self._get_unit(value)
147
148                for data_point in data_set:
149                    # Top Level Meta Data
150                    if key == u'definition':
151                        self.current_datainfo.meta_data['reader'] = data_point
152                    # Run
153                    elif key == u'run':
154                        self.current_datainfo.run.append(data_point)
155                        try:
156                            run_name = value.attrs['name']
157                            run_dict = {data_point: run_name}
158                            self.current_datainfo.run_name = run_dict
159                        except:
160                            pass
161                    # Title
162                    elif key == u'title':
163                        self.current_datainfo.title = data_point
164                    # Note
165                    elif key == u'SASnote':
166                        self.current_datainfo.notes.append(data_point)
167                    # Sample Information
168                    elif self.parent_class == u'SASsample':
169                        self.process_sample(data_point, key)
170                    # Instrumental Information
171                    elif (key == u'name'
172                          and self.parent_class == u'SASinstrument'):
173                        self.current_datainfo.instrument = data_point
174                    # Detector
175                    elif self.parent_class == u'SASdetector':
176                        self.process_detector(data_point, key, unit)
177                    # Collimation
178                    elif self.parent_class == u'SAScollimation':
179                        self.process_collimation(data_point, key, unit)
180                    # Aperture
181                    elif self.parent_class == u'SASaperture':
182                        self.process_aperture(data_point, key)
183                    # Process Information
184                    elif self.parent_class == u'SASprocess': # CanSAS 2.0
185                        self.process_process(data_point, key)
186                    # Source
187                    elif self.parent_class == u'SASsource':
188                        self.process_source(data_point, key, unit)
189                    # Everything else goes in meta_data
190                    elif self.parent_class == u'SASdata':
191                        self.process_data_object(data_set, key, unit)
192                        break
193                    elif self.parent_class == u'SAStransmission_spectrum':
194                        self.process_trans_spectrum(data_set, key)
195                        break
196                    else:
197                        new_key = self._create_unique_key(
198                            self.current_datainfo.meta_data, key)
199                        self.current_datainfo.meta_data[new_key] = data_point
200
201            else:
202                # I don't know if this reachable code
203                self.errors.add("ShouldNeverHappenException")
204
205    def process_data_object(self, data_set, key, unit):
206        """
207        SASdata processor method
208        :param data_set: data from HDF5 file
209        :param key: canSAS_class attribute
210        :param unit: unit attribute
211        """
212        # FIXME: check attributes of SASdata for Q, dQ, Mask, etc.
213        if key == u'I':
214            if isinstance(self.current_dataset, plottable_2D):
215                self.current_dataset.data = data_set
216                self.current_dataset.zaxis("Intensity", unit)
217            else:
218                self.current_dataset.y = data_set.flatten()
219                self.current_dataset.yaxis("Intensity", unit)
220        elif key == u'Idev':
221            if isinstance(self.current_dataset, plottable_2D):
222                self.current_dataset.err_data = data_set.flatten()
223            else:
224                self.current_dataset.dy = data_set.flatten()
225        elif key == u'Q':
226            self.current_dataset.xaxis("Q", unit)
227            if isinstance(self.current_dataset, plottable_2D):
228                self.current_dataset.q = data_set.flatten()
229            else:
230                self.current_dataset.x = data_set.flatten()
231        elif key == u'Qdev':
232            self.current_dataset.dx = data_set.flatten()
233        elif key == u'dQw':
234            self.current_dataset.dxw = data_set.flatten()
235        elif key == u'dQl':
236            self.current_dataset.dxl = data_set.flatten()
237        elif key == u'Qy':
238            self.current_dataset.yaxis("Q_y", unit)
239            self.current_dataset.qy_data = data_set.flatten()
240        elif key == u'Qydev':
241            self.current_dataset.dqy_data = data_set.flatten()
242        elif key == u'Qx':
243            self.current_dataset.xaxis("Q_x", unit)
244            self.current_dataset.qx_data = data_set.flatten()
245        elif key == u'Qxdev':
246            self.current_dataset.dqx_data = data_set.flatten()
247        elif key == u'Mask':
248            self.current_dataset.mask = data_set.flatten()
249        elif key == u'wavelength':
250            self.current_datainfo.source.wavelength = data_set[0]
251            self.current_datainfo.source.wavelength_unit = unit
252
253    def process_trans_spectrum(self, data_set, key):
254        """
255        SAStransmission_spectrum processor
256        :param data_set: data from HDF5 file
257        :param key: canSAS_class attribute
258        """
259        if key == u'T':
260            self.trans_spectrum.transmission = data_set.flatten()
261        elif key == u'Tdev':
262            self.trans_spectrum.transmission_deviation = data_set.flatten()
263        elif key == u'lambda':
264            self.trans_spectrum.wavelength = data_set.flatten()
265
266    def process_sample(self, data_point, key):
267        """
268        SASsample processor
269        :param data_point: Single point from an HDF5 data file
270        :param key: class name data_point was taken from
271        """
272        if key == u'Title':
273            self.current_datainfo.sample.name = data_point
274        elif key == u'name':
275            self.current_datainfo.sample.name = data_point
276        elif key == u'ID':
277            self.current_datainfo.sample.name = data_point
278        elif key == u'thickness':
279            self.current_datainfo.sample.thickness = data_point
280        elif key == u'temperature':
281            self.current_datainfo.sample.temperature = data_point
282        elif key == u'transmission':
283            self.current_datainfo.sample.transmission = data_point
284        elif key == u'x_position':
285            self.current_datainfo.sample.position.x = data_point
286        elif key == u'y_position':
287            self.current_datainfo.sample.position.y = data_point
288        elif key == u'pitch':
289            self.current_datainfo.sample.orientation.x = data_point
290        elif key == u'yaw':
291            self.current_datainfo.sample.orientation.y = data_point
292        elif key == u'roll':
293            self.current_datainfo.sample.orientation.z = data_point
294        elif key == u'details':
295            self.current_datainfo.sample.details.append(data_point)
296
297    def process_detector(self, data_point, key, unit):
298        """
299        SASdetector processor
300        :param data_point: Single point from an HDF5 data file
301        :param key: class name data_point was taken from
302        :param unit: unit attribute from data set
303        """
304        if key == u'name':
305            self.detector.name = data_point
306        elif key == u'SDD':
307            self.detector.distance = float(data_point)
308            self.detector.distance_unit = unit
309        elif key == u'slit_length':
310            self.detector.slit_length = float(data_point)
311            self.detector.slit_length_unit = unit
312        elif key == u'x_position':
313            self.detector.offset.x = float(data_point)
314            self.detector.offset_unit = unit
315        elif key == u'y_position':
316            self.detector.offset.y = float(data_point)
317            self.detector.offset_unit = unit
318        elif key == u'pitch':
319            self.detector.orientation.x = float(data_point)
320            self.detector.orientation_unit = unit
321        elif key == u'roll':
322            self.detector.orientation.z = float(data_point)
323            self.detector.orientation_unit = unit
324        elif key == u'yaw':
325            self.detector.orientation.y = float(data_point)
326            self.detector.orientation_unit = unit
327        elif key == u'beam_center_x':
328            self.detector.beam_center.x = float(data_point)
329            self.detector.beam_center_unit = unit
330        elif key == u'beam_center_y':
331            self.detector.beam_center.y = float(data_point)
332            self.detector.beam_center_unit = unit
333        elif key == u'x_pixel_size':
334            self.detector.pixel_size.x = float(data_point)
335            self.detector.pixel_size_unit = unit
336        elif key == u'y_pixel_size':
337            self.detector.pixel_size.y = float(data_point)
338            self.detector.pixel_size_unit = unit
339
340    def process_collimation(self, data_point, key, unit):
341        """
342        SAScollimation processor
343        :param data_point: Single point from an HDF5 data file
344        :param key: class name data_point was taken from
345        :param unit: unit attribute from data set
346        """
347        if key == u'distance':
348            self.collimation.length = data_point
349            self.collimation.length_unit = unit
350        elif key == u'name':
351            self.collimation.name = data_point
352
353    def process_aperture(self, data_point, key):
354        """
355        SASaperture processor
356        :param data_point: Single point from an HDF5 data file
357        :param key: class name data_point was taken from
358        """
359        if key == u'shape':
360            self.aperture.shape = data_point
361        elif key == u'x_gap':
362            self.aperture.size.x = data_point
363        elif key == u'y_gap':
364            self.aperture.size.y = data_point
365
366    def process_source(self, data_point, key, unit):
367        """
368        SASsource processor
369        :param data_point: Single point from an HDF5 data file
370        :param key: class name data_point was taken from
371        :param unit: unit attribute from data set
372        """
373        if key == u'incident_wavelength':
374            self.current_datainfo.source.wavelength = data_point
375            self.current_datainfo.source.wavelength_unit = unit
376        elif key == u'wavelength_max':
377            self.current_datainfo.source.wavelength_max = data_point
378            self.current_datainfo.source.wavelength_max_unit = unit
379        elif key == u'wavelength_min':
380            self.current_datainfo.source.wavelength_min = data_point
381            self.current_datainfo.source.wavelength_min_unit = unit
382        elif key == u'incident_wavelength_spread':
383            self.current_datainfo.source.wavelength_spread = data_point
384            self.current_datainfo.source.wavelength_spread_unit = unit
385        elif key == u'beam_size_x':
386            self.current_datainfo.source.beam_size.x = data_point
387            self.current_datainfo.source.beam_size_unit = unit
388        elif key == u'beam_size_y':
389            self.current_datainfo.source.beam_size.y = data_point
390            self.current_datainfo.source.beam_size_unit = unit
391        elif key == u'beam_shape':
392            self.current_datainfo.source.beam_shape = data_point
393        elif key == u'radiation':
394            self.current_datainfo.source.radiation = data_point
395
396    def process_process(self, data_point, key):
397        """
398        SASprocess processor
399        :param data_point: Single point from an HDF5 data file
400        :param key: class name data_point was taken from
401        """
402        if key == u'Title':  # CanSAS 2.0
403            self.process.name = data_point
404        elif key == u'name':  # NXcanSAS
405            self.process.name = data_point
406        elif key == u'description':
407            self.process.description = data_point
408        elif key == u'date':
409            self.process.date = data_point
410        elif key == u'term':
411            self.process.term = data_point
412        else:
413            self.process.notes.append(data_point)
414
415    def add_intermediate(self):
416        """
417        This method stores any intermediate objects within the final data set
418        after fully reading the set.
419
420        :param parent: The NXclass name for the h5py Group object that just
421                       finished being processed
422        """
423
424        if self.parent_class == u'SASprocess':
425            self.current_datainfo.process.append(self.process)
426            self.process = Process()
427        elif self.parent_class == u'SASdetector':
428            self.current_datainfo.detector.append(self.detector)
429            self.detector = Detector()
430        elif self.parent_class == u'SAStransmission_spectrum':
431            self.current_datainfo.trans_spectrum.append(self.trans_spectrum)
432            self.trans_spectrum = TransmissionSpectrum()
433        elif self.parent_class == u'SAScollimation':
434            self.current_datainfo.collimation.append(self.collimation)
435            self.collimation = Collimation()
436        elif self.parent_class == u'SASaperture':
437            self.collimation.aperture.append(self.aperture)
438            self.aperture = Aperture()
439        elif self.parent_class == u'SASdata':
440            if isinstance(self.current_dataset, plottable_2D):
441                self.data2d.append(self.current_dataset)
442            elif isinstance(self.current_dataset, plottable_1D):
443                self.data1d.append(self.current_dataset)
444
445    def final_data_cleanup(self):
446        """
447        Does some final cleanup and formatting on self.current_datainfo and
448        all data1D and data2D objects and then combines the data and info into
449        Data1D and Data2D objects
450        """
451
452        # Type cast data arrays to float64
453        if len(self.current_datainfo.trans_spectrum) > 0:
454            spectrum_list = []
455            for spectrum in self.current_datainfo.trans_spectrum:
456                spectrum.transmission = np.delete(spectrum.transmission, [0])
457                spectrum.transmission = spectrum.transmission.astype(np.float64)
458                spectrum.transmission_deviation = np.delete(
459                    spectrum.transmission_deviation, [0])
460                spectrum.transmission_deviation = \
461                    spectrum.transmission_deviation.astype(np.float64)
462                spectrum.wavelength = np.delete(spectrum.wavelength, [0])
463                spectrum.wavelength = spectrum.wavelength.astype(np.float64)
464                if len(spectrum.transmission) > 0:
465                    spectrum_list.append(spectrum)
466            self.current_datainfo.trans_spectrum = spectrum_list
467
468        # Append errors to dataset and reset class errors
469        self.current_datainfo.errors = self.errors
470        self.errors.clear()
471
472        # Combine all plottables with datainfo and append each to output
473        # Type cast data arrays to float64 and find min/max as appropriate
474        for dataset in self.data2d:
475            dataset.data = dataset.data.astype(np.float64)
476            dataset.err_data = dataset.err_data.astype(np.float64)
477            if dataset.qx_data is not None:
478                dataset.xmin = np.min(dataset.qx_data)
479                dataset.xmax = np.max(dataset.qx_data)
480                dataset.qx_data = dataset.qx_data.astype(np.float64)
481            if dataset.dqx_data is not None:
482                dataset.dqx_data = dataset.dqx_data.astype(np.float64)
483            if dataset.qy_data is not None:
484                dataset.ymin = np.min(dataset.qy_data)
485                dataset.ymax = np.max(dataset.qy_data)
486                dataset.qy_data = dataset.qy_data.astype(np.float64)
487            if dataset.dqy_data is not None:
488                dataset.dqy_data = dataset.dqy_data.astype(np.float64)
489            if dataset.q_data is not None:
490                dataset.q_data = dataset.q_data.astype(np.float64)
491            zeros = np.ones(dataset.data.size, dtype=bool)
492            try:
493                for i in range(0, dataset.mask.size - 1):
494                    zeros[i] = dataset.mask[i]
495            except:
496                self.errors.add(sys.exc_value)
497            dataset.mask = zeros
498            # Calculate the actual Q matrix
499            try:
500                if dataset.q_data.size <= 1:
501                    dataset.q_data = np.sqrt(dataset.qx_data
502                                             * dataset.qx_data
503                                             + dataset.qy_data
504                                             * dataset.qy_data)
505            except:
506                dataset.q_data = None
507
508            if dataset.data.ndim == 2:
509                (n_rows, n_cols) = dataset.data.shape
510                dataset.y_bins = dataset.qy_data[0::n_cols]
511                dataset.x_bins = dataset.qx_data[:n_cols]
512                dataset.data = dataset.data.flatten()
513
514            final_dataset = combine_data_info_with_plottable(
515                dataset, self.current_datainfo)
516            self.output.append(final_dataset)
517
518        for dataset in self.data1d:
519            if dataset.x is not None:
520                dataset.x = dataset.x.astype(np.float64)
521                dataset.xmin = np.min(dataset.x)
522                dataset.xmax = np.max(dataset.x)
523            if dataset.y is not None:
524                dataset.y = dataset.y.astype(np.float64)
525                dataset.ymin = np.min(dataset.y)
526                dataset.ymax = np.max(dataset.y)
527            if dataset.dx is not None:
528                dataset.dx = dataset.dx.astype(np.float64)
529            if dataset.dxl is not None:
530                dataset.dxl = dataset.dxl.astype(np.float64)
531            if dataset.dxw is not None:
532                dataset.dxw = dataset.dxw.astype(np.float64)
533            if dataset.dy is not None:
534                dataset.dy = dataset.dy.astype(np.float64)
535            final_dataset = combine_data_info_with_plottable(
536                dataset, self.current_datainfo)
537            self.output.append(final_dataset)
538
539    def add_data_set(self, key=""):
540        """
541        Adds the current_dataset to the list of outputs after preforming final
542        processing on the data and then calls a private method to generate a
543        new data set.
544
545        :param key: NeXus group name for current tree level
546        """
547
548        if self.current_datainfo and self.current_dataset:
549            self.final_data_cleanup()
550        self.data1d = []
551        self.data2d = []
552        self.current_datainfo = DataInfo()
553
554
555    def _initialize_new_data_set(self, parent_list=None):
556        """
557        A private class method to generate a new 1D or 2D data object based on
558        the type of data within the set. Outside methods should call
559        add_data_set() to be sure any existing data is stored properly.
560
561        :param parent_list: List of names of parent elements
562        """
563
564        if parent_list is None:
565            parent_list = []
566        if self._find_intermediate(parent_list, "Qx"):
567            self.current_dataset = plottable_2D()
568        else:
569            x = np.array(0)
570            y = np.array(0)
571            self.current_dataset = plottable_1D(x, y)
572        self.current_datainfo.filename = self.raw_data.filename
573
574    def _find_intermediate(self, parent_list, basename=""):
575        """
576        A private class used to find an entry by either using a direct key or
577        knowing the approximate basename.
578
579        :param parent_list: List of parents nodes in the HDF5 file
580        :param basename: Approximate name of an entry to search for
581        :return:
582        """
583
584        entry = False
585        key_prog = re.compile(basename)
586        top = self.raw_data
587        for parent in parent_list:
588            top = top.get(parent)
589        for key in top.keys():
590            if key_prog.match(key):
591                entry = True
592                break
593        return entry
594
595    def _create_unique_key(self, dictionary, name, numb=0):
596        """
597        Create a unique key value for any dictionary to prevent overwriting
598        Recurses until a unique key value is found.
599
600        :param dictionary: A dictionary with any number of entries
601        :param name: The index of the item to be added to dictionary
602        :param numb: The number to be appended to the name, starts at 0
603        :return: The new name for the dictionary entry
604        """
605        if dictionary.get(name) is not None:
606            numb += 1
607            name = name.split("_")[0]
608            name += "_{0}".format(numb)
609            name = self._create_unique_key(dictionary, name, numb)
610        return name
611
612    def _get_unit(self, value):
613        """
614        Find the unit for a particular value within the h5py dictionary
615
616        :param value: attribute dictionary for a particular value set
617        :return: unit for the value passed to the method
618        """
619        unit = value.attrs.get(u'units')
620        if unit is None:
621            unit = value.attrs.get(u'unit')
622        # Convert the unit formats
623        if unit == "1/A":
624            unit = "A^{-1}"
625        elif unit == "1/cm":
626            unit = "cm^{-1}"
627        return unit
Note: See TracBrowser for help on using the repository browser.