[68aa210] | 1 | """ |
---|
| 2 | CanSAS 2D data reader for reading HDF5 formatted CanSAS files. |
---|
| 3 | """ |
---|
| 4 | |
---|
| 5 | import h5py |
---|
| 6 | import numpy as np |
---|
| 7 | import re |
---|
| 8 | import os |
---|
| 9 | import sys |
---|
| 10 | |
---|
| 11 | from sas.sascalc.dataloader.data_info import Data1D, Data2D, Sample, Source |
---|
| 12 | from sas.sascalc.dataloader.data_info import Process, Aperture, Collimation, TransmissionSpectrum, Detector |
---|
| 13 | |
---|
| 14 | |
---|
| 15 | class Reader(): |
---|
| 16 | """ |
---|
| 17 | This is a placeholder for the epic class description I plan on writing in the future. But not today. |
---|
| 18 | |
---|
| 19 | :Dependencies: |
---|
| 20 | The CanSAS HDF5 reader requires h5py v2.5.0 or later. |
---|
| 21 | """ |
---|
| 22 | |
---|
| 23 | ## CanSAS version |
---|
| 24 | cansas_version = 2.0 |
---|
| 25 | ## Logged warnings or messages |
---|
| 26 | logging = None |
---|
| 27 | ## List of errors for the current data set |
---|
| 28 | errors = None |
---|
| 29 | ## Raw file contents to be processed |
---|
| 30 | raw_data = None |
---|
| 31 | ## Data set being modified |
---|
| 32 | current_dataset = None |
---|
| 33 | ## For recursion and saving purposes, remember parent objects |
---|
| 34 | parent_list = None |
---|
| 35 | ## Data type name |
---|
| 36 | type_name = "CanSAS 2D" |
---|
| 37 | ## Wildcards |
---|
| 38 | type = ["CanSAS 2D HDF5 Files (*.h5)|*.h5"] |
---|
| 39 | ## List of allowed extensions |
---|
| 40 | ext = ['.h5', '.H5'] |
---|
| 41 | ## Flag to bypass extension check |
---|
| 42 | allow_all = False |
---|
| 43 | ## List of files to return |
---|
| 44 | output = None |
---|
| 45 | |
---|
| 46 | def __init__(self): |
---|
| 47 | """ |
---|
| 48 | Create the reader object and define initial states for certain class variables |
---|
| 49 | """ |
---|
| 50 | self.current_dataset = None |
---|
| 51 | self.raw_data = None |
---|
| 52 | self.errors = set() |
---|
| 53 | self.logging = [] |
---|
| 54 | self.parent_list = [] |
---|
| 55 | self.output = [] |
---|
| 56 | self.detector = Detector() |
---|
| 57 | self.collimation = Collimation() |
---|
| 58 | self.aperture = Aperture() |
---|
| 59 | self.process = Process() |
---|
| 60 | self.sample = Sample() |
---|
| 61 | self.source = Source() |
---|
| 62 | self.trans_spectrum = TransmissionSpectrum() |
---|
| 63 | |
---|
| 64 | def read(self, filename): |
---|
| 65 | """ |
---|
| 66 | General read method called by the top-level SasView data_loader. |
---|
| 67 | |
---|
| 68 | :param filename: A path for an HDF5 formatted CanSAS 2D data file. |
---|
| 69 | :return: List of Data1D/2D objects or a list of errors. |
---|
| 70 | """ |
---|
| 71 | |
---|
| 72 | ## Reinitialize the class when loading new data file to reset all class variables |
---|
| 73 | self.__init__() |
---|
| 74 | ## Check that the file exists |
---|
| 75 | if os.path.isfile(filename): |
---|
| 76 | basename = os.path.basename(filename) |
---|
| 77 | _, extension = os.path.splitext(basename) |
---|
| 78 | # If the file type is not allowed, return empty list |
---|
| 79 | if extension in self.ext or self.allow_all: |
---|
| 80 | ## Load the data file |
---|
| 81 | self.raw_data = h5py.File(filename, 'r') |
---|
| 82 | ## Read in all child elements of top level SASroot |
---|
| 83 | self.read_children(self.raw_data) |
---|
| 84 | self.add_data_set() |
---|
| 85 | ## Return data set(s) |
---|
| 86 | return self.output |
---|
| 87 | |
---|
| 88 | def read_children(self, data, parent=u'SASroot'): |
---|
| 89 | """ |
---|
| 90 | Recursive method for stepping through the hierarchy. Stores the data |
---|
| 91 | |
---|
| 92 | :param data: h5py Group object of any kind |
---|
| 93 | :param parent: h5py Group parent name |
---|
| 94 | :return: None |
---|
| 95 | """ |
---|
| 96 | |
---|
| 97 | ## Create regex for base sasentry and for parent |
---|
| 98 | parent_prog = re.compile(parent) |
---|
| 99 | |
---|
| 100 | ## Loop through each element of the parent and process accordingly |
---|
| 101 | for key in data.keys(): |
---|
| 102 | ## Get all information for the current key |
---|
| 103 | value = data.get(key) |
---|
| 104 | attr_keys = value.attrs.keys() |
---|
| 105 | attr_values = value.attrs.values() |
---|
| 106 | class_name = value.attrs.get(u'NX_class') |
---|
| 107 | if class_name is not None: |
---|
| 108 | class_prog = re.compile(class_name) |
---|
| 109 | else: |
---|
| 110 | class_prog = re.compile(value.name) |
---|
| 111 | |
---|
| 112 | if isinstance(value, h5py.Group): |
---|
| 113 | ## If this is a new sasentry, store the current data set and create a fresh Data1D/2D object |
---|
| 114 | if class_prog.match(u'SASentry'): |
---|
| 115 | self.add_data_set(key) |
---|
| 116 | ## If the value is a group of data, iterate |
---|
| 117 | ## TODO: If Process, Aperture, etc, store and renew |
---|
| 118 | ##Recursion step to access data within the |
---|
| 119 | self.read_children(data.get(key), class_name) |
---|
| 120 | |
---|
| 121 | elif isinstance(value, h5py.Dataset): |
---|
| 122 | ## If this is a dataset, store the data appropriately |
---|
| 123 | ## TODO: Add instrumental information |
---|
| 124 | data_set = data[key][:] |
---|
| 125 | |
---|
| 126 | for data_point in data_set: |
---|
| 127 | ## Top Level Meta Data |
---|
| 128 | if key == u'definition': |
---|
| 129 | self.current_dataset.meta_data['reader'] = data_point |
---|
| 130 | elif key == u'run': |
---|
| 131 | self.current_dataset.run.append(data_point) |
---|
| 132 | elif key == u'title': |
---|
| 133 | self.current_dataset.title = data_point |
---|
| 134 | elif key == u'SASnote': |
---|
| 135 | self.current_dataset.notes.append(data_point) |
---|
| 136 | |
---|
| 137 | ## I and Q Data |
---|
| 138 | elif key == u'I': |
---|
| 139 | i_unit = value.attrs.get(u'unit') |
---|
| 140 | if type(self.current_dataset) is Data2D: |
---|
| 141 | self.current_dataset.data = np.append(self.current_dataset.data, data_point) |
---|
| 142 | self.current_dataset.zaxis("Intensity (%s)" % (i_unit), i_unit) |
---|
| 143 | else: |
---|
| 144 | self.current_dataset.y = np.append(self.current_dataset.y, data_point) |
---|
| 145 | self.current_dataset.yaxis("Intensity (%s)" % (i_unit), i_unit) |
---|
| 146 | elif key == u'Idev': |
---|
| 147 | if type(self.current_dataset) is Data2D: |
---|
| 148 | self.current_dataset.err_data = np.append(self.current_dataset.err_data, data_point) |
---|
| 149 | else: |
---|
| 150 | self.current_dataset.dy = np.append(self.current_dataset.dy, data_point) |
---|
| 151 | elif key == u'Q': |
---|
| 152 | q_unit = value.attrs.get(u'unit') |
---|
| 153 | self.current_dataset.xaxis("Q (%s)" % (q_unit), q_unit) |
---|
| 154 | if type(self.current_dataset) is Data2D: |
---|
| 155 | self.current_dataset.q = np.append(self.current_dataset.q, data_point) |
---|
| 156 | else: |
---|
| 157 | self.current_dataset.x = np.append(self.current_dataset.x, data_point) |
---|
| 158 | elif key == u'Qy': |
---|
| 159 | q_unit = value.attrs.get(u'unit') |
---|
| 160 | self.current_dataset.yaxis("Q (%s)" % (q_unit), q_unit) |
---|
| 161 | self.current_dataset.qy_data = np.append(self.current_dataset.qy_data, data_point) |
---|
| 162 | elif key == u'Qydev': |
---|
| 163 | self.current_dataset.dqy_data = np.append(self.current_dataset.dqy_data, data_point) |
---|
| 164 | elif key == u'Qx': |
---|
| 165 | q_unit = value.attrs.get(u'unit') |
---|
| 166 | self.current_dataset.xaxis("Q (%s)" % (q_unit), q_unit) |
---|
| 167 | self.current_dataset.qx_data = np.append(self.current_dataset.qx_data, data_point) |
---|
| 168 | elif key == u'Qxdev': |
---|
| 169 | self.current_dataset.dqx_data = np.append(self.current_dataset.dqx_data, data_point) |
---|
| 170 | elif key == u'Mask': |
---|
| 171 | self.current_dataset.mask = np.append(self.current_dataset.mask, data_point) |
---|
| 172 | |
---|
| 173 | ## Other Information |
---|
| 174 | elif key == u'wavelength': |
---|
| 175 | if data_set.size > 1: |
---|
| 176 | self.trans_spectrum.wavelength.append(data_point) |
---|
| 177 | self.source.wavelength = sum(self.trans_spectrum.wavelength)\ |
---|
| 178 | / len(self.trans_spectrum.wavelength) |
---|
| 179 | else: |
---|
| 180 | self.source.wavelength = data_point |
---|
| 181 | elif key == u'probe_type': |
---|
| 182 | self.source.radiation = data_point |
---|
| 183 | elif key == u'transmission': |
---|
| 184 | if data_set.size > 1: |
---|
| 185 | self.trans_spectrum.transmission.append(data_point) |
---|
| 186 | self.sample.transmission = sum(self.trans_spectrum.transmission) \ |
---|
| 187 | / len(self.trans_spectrum.transmission) |
---|
| 188 | else: |
---|
| 189 | self.sample.transmission = data_point |
---|
| 190 | |
---|
| 191 | ## Sample Information |
---|
| 192 | elif key == u'Title' and parent == u'SASsample': |
---|
| 193 | self.sample.name = data_point |
---|
| 194 | elif key == u'thickness' and parent == u'SASsample': |
---|
| 195 | self.sample.thickness = data_point |
---|
| 196 | elif key == u'temperature' and parent == u'SASsample': |
---|
| 197 | self.sample.temperature = data_point |
---|
| 198 | |
---|
| 199 | ## Process Information |
---|
| 200 | elif key == u'name' and parent == u'SASprocess': |
---|
| 201 | self.process.name = data_point |
---|
| 202 | elif key == u'Title' and parent == u'SASprocess': |
---|
| 203 | self.process.name = data_point |
---|
| 204 | elif key == u'description' and parent == u'SASprocess': |
---|
| 205 | self.process.description = data_point |
---|
| 206 | elif key == u'date' and parent == u'SASprocess': |
---|
| 207 | self.process.date = data_point |
---|
| 208 | |
---|
| 209 | ## Everything else goes in meta_data |
---|
| 210 | else: |
---|
| 211 | new_key = self._create_unique_key(self.current_dataset.meta_data, key) |
---|
| 212 | self.current_dataset.meta_data[new_key] = data_point |
---|
| 213 | |
---|
| 214 | else: |
---|
| 215 | ## I don't know if this reachable code |
---|
| 216 | self.errors.add("ShouldNeverHappenException") |
---|
| 217 | |
---|
| 218 | return |
---|
| 219 | |
---|
| 220 | def final_data_cleanup(self): |
---|
| 221 | """ |
---|
| 222 | Does some final cleanup and formatting on self.current_dataset |
---|
| 223 | """ |
---|
| 224 | ## TODO: Add all cleanup items - NOT FINISHED |
---|
| 225 | ## TODO: All strings to float64 |
---|
| 226 | ## TODO: All intermediates (self.sample, etc.) put in self.current_dataset |
---|
| 227 | |
---|
| 228 | ## Type cast data arrays to float64 and find min/max as appropriate |
---|
| 229 | if type(self.current_dataset) is Data2D: |
---|
| 230 | self.current_dataset.data = np.delete(self.current_dataset.data, [0]) |
---|
| 231 | self.current_dataset.data = self.current_dataset.data.astype(np.float64) |
---|
| 232 | self.current_dataset.err_data = np.delete(self.current_dataset.err_data, [0]) |
---|
| 233 | self.current_dataset.err_data = self.current_dataset.err_data.astype(np.float64) |
---|
| 234 | self.current_dataset.mask = np.delete(self.current_dataset.mask, [0]) |
---|
| 235 | if self.current_dataset.qx_data is not None: |
---|
| 236 | self.current_dataset.qx_data = np.delete(self.current_dataset.qx_data, [0]) |
---|
| 237 | self.current_dataset.xmin = np.min(self.current_dataset.qx_data) |
---|
| 238 | self.current_dataset.xmax = np.max(self.current_dataset.qx_data) |
---|
| 239 | self.current_dataset.qx_data = self.current_dataset.qx_data.astype(np.float64) |
---|
| 240 | if self.current_dataset.dqx_data is not None: |
---|
| 241 | self.current_dataset.dqx_data = np.delete(self.current_dataset.dqx_data, [0]) |
---|
| 242 | self.current_dataset.dqx_data = self.current_dataset.dqx_data.astype(np.float64) |
---|
| 243 | if self.current_dataset.qy_data is not None: |
---|
| 244 | self.current_dataset.qy_data = np.delete(self.current_dataset.qy_data, [0]) |
---|
| 245 | self.current_dataset.ymin = np.min(self.current_dataset.qy_data) |
---|
| 246 | self.current_dataset.ymax = np.max(self.current_dataset.qy_data) |
---|
| 247 | self.current_dataset.qy_data = self.current_dataset.qy_data.astype(np.float64) |
---|
| 248 | if self.current_dataset.dqy_data is not None: |
---|
| 249 | self.current_dataset.dqy_data = np.delete(self.current_dataset.dqy_data, [0]) |
---|
| 250 | self.current_dataset.dqy_data = self.current_dataset.dqy_data.astype(np.float64) |
---|
| 251 | if self.current_dataset.q_data is not None: |
---|
| 252 | self.current_dataset.q_data = np.delete(self.current_dataset.q_data, [0]) |
---|
| 253 | self.current_dataset.q_data = self.current_dataset.q_data.astype(np.float64) |
---|
| 254 | zeros = np.ones(self.current_dataset.data.size, dtype=bool) |
---|
| 255 | try: |
---|
| 256 | for i in range (0, self.current_dataset.mask.size - 1): |
---|
| 257 | zeros[i] = self.current_dataset.mask[i] |
---|
| 258 | except: |
---|
| 259 | self.errors.add(sys.exc_value) |
---|
| 260 | self.current_dataset.mask = zeros |
---|
| 261 | |
---|
| 262 | ## Calculate the actual Q matrix |
---|
| 263 | try: |
---|
| 264 | if self.current_dataset.q_data.size <= 1: |
---|
| 265 | self.current_dataset.q_data = np.sqrt(self.current_dataset.qx_data * self.current_dataset.qx_data + |
---|
| 266 | self.current_dataset.qy_data * self.current_dataset.qy_data) |
---|
| 267 | except: |
---|
| 268 | self.current_dataset.q_data = None |
---|
| 269 | |
---|
| 270 | elif type(self.current_dataset) is Data1D: |
---|
| 271 | if self.current_dataset.x is not None: |
---|
| 272 | self.current_dataset.x = np.delete(self.current_dataset.x, [0]) |
---|
| 273 | self.current_dataset.x = self.current_dataset.x.astype(np.float64) |
---|
| 274 | self.current_dataset.xmin = np.min(self.current_dataset.x) |
---|
| 275 | self.current_dataset.xmax = np.max(self.current_dataset.x) |
---|
| 276 | if self.current_dataset.y is not None: |
---|
| 277 | self.current_dataset.y = np.delete(self.current_dataset.y, [0]) |
---|
| 278 | self.current_dataset.y = self.current_dataset.y.astype(np.float64) |
---|
| 279 | self.current_dataset.ymin = np.min(self.current_dataset.y) |
---|
| 280 | self.current_dataset.ymax = np.max(self.current_dataset.y) |
---|
| 281 | if self.current_dataset.dx is not None: |
---|
| 282 | self.current_dataset.dx = np.delete(self.current_dataset.dx, [0]) |
---|
| 283 | self.current_dataset.dx = self.current_dataset.dx.astype(np.float64) |
---|
| 284 | if self.current_dataset.dxl is not None: |
---|
| 285 | self.current_dataset.dxl = np.delete(self.current_dataset.dxl, [0]) |
---|
| 286 | self.current_dataset.dxl = self.current_dataset.dxl.astype(np.float64) |
---|
| 287 | if self.current_dataset.dxw is not None: |
---|
| 288 | self.current_dataset.dxw = np.delete(self.current_dataset.dxw, [0]) |
---|
| 289 | self.current_dataset.dxw = self.current_dataset.dxw.astype(np.float64) |
---|
| 290 | if self.current_dataset.dy is not None: |
---|
| 291 | self.current_dataset.dy = np.delete(self.current_dataset.dy, [0]) |
---|
| 292 | self.current_dataset.dy =self.current_dataset.dy.astype(np.float64) |
---|
| 293 | |
---|
| 294 | else: |
---|
| 295 | self.errors.add("ShouldNeverHappenException") |
---|
| 296 | |
---|
| 297 | ## Append intermediate objects to data |
---|
| 298 | self.current_dataset.sample = self.sample |
---|
| 299 | self.current_dataset.source = self.source |
---|
| 300 | self.current_dataset.collimation.append(self.collimation) |
---|
| 301 | |
---|
| 302 | ## Append errors to dataset and reset class errors |
---|
| 303 | self.current_dataset.errors = self.errors |
---|
| 304 | self.errors.clear() |
---|
| 305 | |
---|
| 306 | def add_data_set(self, key=""): |
---|
| 307 | """ |
---|
| 308 | Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a |
---|
| 309 | private method to generate a new data set. |
---|
| 310 | |
---|
| 311 | :param key: NeXus group name for current tree level |
---|
| 312 | :return: None |
---|
| 313 | """ |
---|
| 314 | if self.current_dataset is not None: |
---|
| 315 | self.final_data_cleanup() |
---|
| 316 | self.output.append(self.current_dataset) |
---|
| 317 | self._initialize_new_data_set(key) |
---|
| 318 | |
---|
| 319 | def _initialize_new_data_set(self, key=""): |
---|
| 320 | """ |
---|
| 321 | A private class method to generate a new 1D or 2D data object based on the type of data within the set. |
---|
| 322 | Outside methods should call add_data_set() to be sure any existing data is stored properly. |
---|
| 323 | |
---|
| 324 | :param key: NeXus group name for current tree level |
---|
| 325 | :return: None |
---|
| 326 | """ |
---|
| 327 | entry = [] |
---|
| 328 | if key is not "": |
---|
| 329 | entry = self.raw_data.get(key) |
---|
| 330 | else: |
---|
| 331 | key_prog = re.compile("sasentry*") |
---|
| 332 | for key in self.raw_data.keys(): |
---|
| 333 | if (key_prog.match(key)): |
---|
| 334 | entry = self.raw_data.get(key) |
---|
| 335 | break |
---|
| 336 | data = entry.get("sasdata") |
---|
| 337 | if data.get("Qx") is not None: |
---|
| 338 | self.current_dataset = Data2D() |
---|
| 339 | else: |
---|
| 340 | x = np.array(0) |
---|
| 341 | y = np.array(0) |
---|
| 342 | self.current_dataset = Data1D(x, y) |
---|
| 343 | self.current_dataset.filename = self.raw_data.filename |
---|
| 344 | |
---|
| 345 | def _create_unique_key(self, dictionary, name, numb=0): |
---|
| 346 | """ |
---|
| 347 | Create a unique key value for any dictionary to prevent overwriting |
---|
| 348 | Recurses until a unique key value is found. |
---|
| 349 | |
---|
| 350 | :param dictionary: A dictionary with any number of entries |
---|
| 351 | :param name: The index of the item to be added to dictionary |
---|
| 352 | :param numb: The number to be appended to the name, starts at 0 |
---|
| 353 | """ |
---|
| 354 | if dictionary.get(name) is not None: |
---|
| 355 | numb += 1 |
---|
| 356 | name = name.split("_")[0] |
---|
| 357 | name += "_{0}".format(numb) |
---|
| 358 | name = self._create_unique_key(dictionary, name, numb) |
---|
| 359 | return name |
---|