cansas_reader_HDF5.py @ 7858575

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since 7858575 was ad52d31, checked in by krzywon, 9 years ago
Modified the CanSAS HDF5 reader to pull in more instrument, sample, and other information not required to analyze the data.
Property mode set to `100644`
File size: 20.5 KB

Line
1	"""
2	CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3	"""
4
5	import h5py
6	import numpy as np
7	import re
8	import os
9	import sys
10
11	from sas.sascalc.dataloader.data_info import plottable_1D, plottable_2D, Data1D, Data2D, Sample, Source
12	from sas.sascalc.dataloader.data_info import Process, Aperture, Collimation, TransmissionSpectrum, Detector
13
14
15	class Reader():
16	"""
17	A class for reading in CanSAS v2.0 data files. The existing iteration opens Mantid generated HDF5 formatted files
18	with file extension .h5/.H5. Any number of data sets may be present within the file and any dimensionality of data
19	may be used. Currently 1D and 2D SAS data sets are supported, but future implementations will include 1D and 2D
20	SESANS data. This class assumes a single data set for each sasentry.
21
22	:Dependencies:
23	The CanSAS HDF5 reader requires h5py v2.5.0 or later.
24	"""
25
26	## CanSAS version
27	cansas_version = 2.0
28	## Logged warnings or messages
29	logging = None
30	## List of errors for the current data set
31	errors = None
32	## Raw file contents to be processed
33	raw_data = None
34	## Data set being modified
35	current_dataset = None
36	## For recursion and saving purposes, remember parent objects
37	parent_list = None
38	## Data type name
39	type_name = "CanSAS 2.0"
40	## Wildcards
41	type = ["CanSAS 2.0 HDF5 Files (.h5)\|.h5"]
42	## List of allowed extensions
43	ext = ['.h5', '.H5']
44	## Flag to bypass extension check
45	allow_all = False
46	## List of files to return
47	output = None
48
49	def __init__(self):
50	"""
51	Create the reader object and define initial states for class variables
52	"""
53	self.current_dataset = None
54	self.datasets = []
55	self.raw_data = None
56	self.errors = set()
57	self.logging = []
58	self.parent_list = []
59	self.output = []
60	self.detector = Detector()
61	self.collimation = Collimation()
62	self.aperture = Aperture()
63	self.process = Process()
64	self.sample = Sample()
65	self.source = Source()
66	self.trans_spectrum = TransmissionSpectrum()
67
68	def read(self, filename):
69	"""
70	This is the general read method that all SasView data_loaders must have.
71
72	:param filename: A path for an HDF5 formatted CanSAS 2D data file.
73	:return: List of Data1D/2D objects or a list of errors.
74	"""
75
76	## Reinitialize the class when loading a new data file to reset all class variables
77	self.__init__()
78	## Check that the file exists
79	if os.path.isfile(filename):
80	basename = os.path.basename(filename)
81	_, extension = os.path.splitext(basename)
82	# If the file type is not allowed, return empty list
83	if extension in self.ext or self.allow_all:
84	## Load the data file
85	self.raw_data = h5py.File(filename, 'r')
86	## Read in all child elements of top level SASroot
87	self.read_children(self.raw_data)
88	## Add the last data set to the list of outputs
89	self.add_data_set()
90	## Return data set(s)
91	return self.output
92
93	def read_children(self, data, parent=u'SASroot'):
94	"""
95	A recursive method for stepping through the hierarchical data file.
96
97	:param data: h5py Group object of any kind
98	:param parent: h5py Group parent name
99	:return: None
100	"""
101
102	## Create regex for base sasentry and for parent
103	parent_prog = re.compile(parent)
104
105	## Loop through each element of the parent and process accordingly
106	for key in data.keys():
107	## Get all information for the current key
108	value = data.get(key)
109	attr_keys = value.attrs.keys()
110	attr_values = value.attrs.values()
111	class_name = value.attrs.get(u'NX_class')
112	if class_name is not None:
113	class_prog = re.compile(class_name)
114	else:
115	class_prog = re.compile(value.name)
116
117	if isinstance(value, h5py.Group):
118	##TODO: Rework this for multiple SASdata objects within a single SASentry to allow for both 1D and 2D
119	##TODO: data within the same SASentry - One 1D and one 2D data object for all SASdata sets?
120	## If this is a new sasentry, store the current data set and create a fresh Data1D/2D object
121	if class_prog.match(u'SASentry'):
122	self.add_data_set(key)
123	## Recursion step to access data within the group
124	self.read_children(value, class_name)
125	self.add_intermediate(class_name)
126
127	elif isinstance(value, h5py.Dataset):
128	## If this is a dataset, store the data appropriately
129	data_set = data[key][:]
130
131	for data_point in data_set:
132	## Top Level Meta Data
133	if key == u'definition':
134	self.current_dataset.meta_data['reader'] = data_point
135	elif key == u'run':
136	self.current_dataset.run.append(data_point)
137	elif key == u'title':
138	self.current_dataset.title = data_point
139	elif key == u'SASnote':
140	self.current_dataset.notes.append(data_point)
141
142	## I and Q Data
143	elif key == u'I':
144	i_unit = value.attrs.get(u'unit')
145	if type(self.current_dataset) is Data2D:
146	self.current_dataset.data = np.append(self.current_dataset.data, data_point)
147	self.current_dataset.zaxis("Intensity (%s)" % (i_unit), i_unit)
148	else:
149	self.current_dataset.y = np.append(self.current_dataset.y, data_point)
150	self.current_dataset.yaxis("Intensity (%s)" % (i_unit), i_unit)
151	elif key == u'Idev':
152	if type(self.current_dataset) is Data2D:
153	self.current_dataset.err_data = np.append(self.current_dataset.err_data, data_point)
154	else:
155	self.current_dataset.dy = np.append(self.current_dataset.dy, data_point)
156	elif key == u'Q':
157	q_unit = value.attrs.get(u'unit')
158	self.current_dataset.xaxis("Q (%s)" % (q_unit), q_unit)
159	if type(self.current_dataset) is Data2D:
160	self.current_dataset.q = np.append(self.current_dataset.q, data_point)
161	else:
162	self.current_dataset.x = np.append(self.current_dataset.x, data_point)
163	elif key == u'Qy':
164	q_unit = value.attrs.get(u'unit')
165	self.current_dataset.yaxis("Q (%s)" % (q_unit), q_unit)
166	self.current_dataset.qy_data = np.append(self.current_dataset.qy_data, data_point)
167	elif key == u'Qydev':
168	self.current_dataset.dqy_data = np.append(self.current_dataset.dqy_data, data_point)
169	elif key == u'Qx':
170	q_unit = value.attrs.get(u'unit')
171	self.current_dataset.xaxis("Q (%s)" % (q_unit), q_unit)
172	self.current_dataset.qx_data = np.append(self.current_dataset.qx_data, data_point)
173	elif key == u'Qxdev':
174	self.current_dataset.dqx_data = np.append(self.current_dataset.dqx_data, data_point)
175	elif key == u'Mask':
176	self.current_dataset.mask = np.append(self.current_dataset.mask, data_point)
177
178	## Sample Information
179	elif key == u'Title' and parent == u'SASsample':
180	self.sample.name = data_point
181	elif key == u'thickness' and parent == u'SASsample':
182	self.sample.thickness = data_point
183	elif key == u'temperature' and parent == u'SASsample':
184	self.sample.temperature = data_point
185
186	## Instrumental Information
187	elif key == u'name' and parent == u'SASinstrument':
188	self.current_dataset.instrument = data_point
189	elif key == u'name' and parent == u'SASdetector':
190	self.detector.name = data_point
191	elif key == u'SDD' and parent == u'SASdetector':
192	self.detector.distance = data_point
193	self.detector.distance_unit = value.attrs.get(u'unit')
194	elif key == u'SSD' and parent == u'SAScollimation':
195	self.collimation.length = data_point
196	self.collimation.length_unit = value.attrs.get(u'unit')
197	elif key == u'name' and parent == u'SAScollimation':
198	self.collimation.name = data_point
199
200	## Process Information
201	elif key == u'name' and parent == u'SASprocess':
202	self.process.name = data_point
203	elif key == u'Title' and parent == u'SASprocess':
204	self.process.name = data_point
205	elif key == u'description' and parent == u'SASprocess':
206	self.process.description = data_point
207	elif key == u'date' and parent == u'SASprocess':
208	self.process.date = data_point
209	elif parent == u'SASprocess':
210	self.process.notes.append(data_point)
211
212	## Transmission Spectrum
213	elif key == u'T' and parent == u'SAStransmission_spectrum':
214	self.trans_spectrum.transmission.append(data_point)
215	elif key == u'Tdev' and parent == u'SAStransmission_spectrum':
216	self.trans_spectrum.transmission_deviation.append(data_point)
217	elif key == u'lambda' and parent == u'SAStransmission_spectrum':
218	self.trans_spectrum.wavelength.append(data_point)
219
220	## Other Information
221	elif key == u'wavelength' and parent == u'SASdata':
222	self.source.wavelength = data_point
223	self.source.wavelength.unit = value.attrs.get(u'unit')
224	elif key == u'radiation' and parent == u'SASsource':
225	self.source.radiation = data_point
226	elif key == u'transmission' and parent == u'SASdata':
227	self.sample.transmission = data_point
228
229	## Everything else goes in meta_data
230	else:
231	new_key = self._create_unique_key(self.current_dataset.meta_data, key)
232	self.current_dataset.meta_data[new_key] = data_point
233
234	else:
235	## I don't know if this reachable code
236	self.errors.add("ShouldNeverHappenException")
237
238	def add_intermediate(self, parent):
239	"""
240	This method stores any intermediate objects within the final data set after fully reading the set.
241
242	:param parent: The NXclass name for the h5py Group object that just finished being processed
243	:return:
244	"""
245
246	if parent == u'SASprocess':
247	self.current_dataset.process.append(self.process)
248	self.process = Process()
249	elif parent == u'SASdetector':
250	self.current_dataset.detector.append(self.detector)
251	self.detector = Detector()
252	elif parent == u'SAStransmission_spectrum':
253	self.current_dataset.trans_spectrum.append(self.trans_spectrum)
254	self.trans_spectrum = TransmissionSpectrum()
255	elif parent == u'SASsource':
256	self.current_dataset.source = self.source
257	self.source = Source()
258	elif parent == u'SASsample':
259	self.current_dataset.sample = self.sample
260	self.sample = Sample()
261	elif parent == u'SAScollimation':
262	self.current_dataset.collimation.append(self.collimation)
263	self.collimation = Collimation()
264	elif parent == u'SASaperture':
265	self.collimation.aperture.append(self.aperture)
266	self.aperture = Aperture()
267
268	def final_data_cleanup(self):
269	"""
270	Does some final cleanup and formatting on self.current_dataset
271	"""
272
273	## Type cast data arrays to float64 and find min/max as appropriate
274	if type(self.current_dataset) is Data2D:
275	self.current_dataset.data = np.delete(self.current_dataset.data, [0])
276	self.current_dataset.data = self.current_dataset.data.astype(np.float64)
277	self.current_dataset.err_data = np.delete(self.current_dataset.err_data, [0])
278	self.current_dataset.err_data = self.current_dataset.err_data.astype(np.float64)
279	self.current_dataset.mask = np.delete(self.current_dataset.mask, [0])
280	if self.current_dataset.qx_data is not None:
281	self.current_dataset.qx_data = np.delete(self.current_dataset.qx_data, [0])
282	self.current_dataset.xmin = np.min(self.current_dataset.qx_data)
283	self.current_dataset.xmax = np.max(self.current_dataset.qx_data)
284	self.current_dataset.qx_data = self.current_dataset.qx_data.astype(np.float64)
285	if self.current_dataset.dqx_data is not None:
286	self.current_dataset.dqx_data = np.delete(self.current_dataset.dqx_data, [0])
287	self.current_dataset.dqx_data = self.current_dataset.dqx_data.astype(np.float64)
288	if self.current_dataset.qy_data is not None:
289	self.current_dataset.qy_data = np.delete(self.current_dataset.qy_data, [0])
290	self.current_dataset.ymin = np.min(self.current_dataset.qy_data)
291	self.current_dataset.ymax = np.max(self.current_dataset.qy_data)
292	self.current_dataset.qy_data = self.current_dataset.qy_data.astype(np.float64)
293	if self.current_dataset.dqy_data is not None:
294	self.current_dataset.dqy_data = np.delete(self.current_dataset.dqy_data, [0])
295	self.current_dataset.dqy_data = self.current_dataset.dqy_data.astype(np.float64)
296	if self.current_dataset.q_data is not None:
297	self.current_dataset.q_data = np.delete(self.current_dataset.q_data, [0])
298	self.current_dataset.q_data = self.current_dataset.q_data.astype(np.float64)
299	zeros = np.ones(self.current_dataset.data.size, dtype=bool)
300	try:
301	for i in range (0, self.current_dataset.mask.size - 1):
302	zeros[i] = self.current_dataset.mask[i]
303	except:
304	self.errors.add(sys.exc_value)
305	self.current_dataset.mask = zeros
306
307	## Calculate the actual Q matrix
308	try:
309	if self.current_dataset.q_data.size <= 1:
310	self.current_dataset.q_data = np.sqrt(self.current_dataset.qx_data * self.current_dataset.qx_data +
311	self.current_dataset.qy_data * self.current_dataset.qy_data)
312	except:
313	self.current_dataset.q_data = None
314
315	elif type(self.current_dataset) is Data1D:
316	if self.current_dataset.x is not None:
317	self.current_dataset.x = np.delete(self.current_dataset.x, [0])
318	self.current_dataset.x = self.current_dataset.x.astype(np.float64)
319	self.current_dataset.xmin = np.min(self.current_dataset.x)
320	self.current_dataset.xmax = np.max(self.current_dataset.x)
321	if self.current_dataset.y is not None:
322	self.current_dataset.y = np.delete(self.current_dataset.y, [0])
323	self.current_dataset.y = self.current_dataset.y.astype(np.float64)
324	self.current_dataset.ymin = np.min(self.current_dataset.y)
325	self.current_dataset.ymax = np.max(self.current_dataset.y)
326	if self.current_dataset.dx is not None:
327	self.current_dataset.dx = np.delete(self.current_dataset.dx, [0])
328	self.current_dataset.dx = self.current_dataset.dx.astype(np.float64)
329	if self.current_dataset.dxl is not None:
330	self.current_dataset.dxl = np.delete(self.current_dataset.dxl, [0])
331	self.current_dataset.dxl = self.current_dataset.dxl.astype(np.float64)
332	if self.current_dataset.dxw is not None:
333	self.current_dataset.dxw = np.delete(self.current_dataset.dxw, [0])
334	self.current_dataset.dxw = self.current_dataset.dxw.astype(np.float64)
335	if self.current_dataset.dy is not None:
336	self.current_dataset.dy = np.delete(self.current_dataset.dy, [0])
337	self.current_dataset.dy =self.current_dataset.dy.astype(np.float64)
338
339	if len(self.current_dataset.trans_spectrum) is not 0:
340	spectrum_list = []
341	for spectrum in self.current_dataset.trans_spectrum:
342	spectrum.transmission = np.delete(spectrum.transmission, [0])
343	spectrum.transmission = spectrum.transmission.astype(np.float64)
344	spectrum.transmission_deviation = np.delete(spectrum.transmission_deviation, [0])
345	spectrum.transmission_deviation = spectrum.transmission_deviation.astype(np.float64)
346	spectrum.wavelength = np.delete(spectrum.wavelength, [0])
347	spectrum.wavelength = spectrum.wavelength.astype(np.float64)
348	spectrum_list.append(spectrum)
349	self.current_dataset.trans_spectrum = spectrum_list
350
351	else:
352	self.errors.add("ShouldNeverHappenException")
353
354	## Append intermediate objects to data
355	self.current_dataset.sample = self.sample
356	self.current_dataset.source = self.source
357	self.current_dataset.collimation.append(self.collimation)
358
359	## Append errors to dataset and reset class errors
360	self.current_dataset.errors = self.errors
361	self.errors.clear()
362
363	def add_data_set(self, key=""):
364	"""
365	Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a
366	private method to generate a new data set.
367
368	:param key: NeXus group name for current tree level
369	:return: None
370	"""
371	if self.current_dataset is not None:
372	self.final_data_cleanup()
373	self.output.append(self.current_dataset)
374	self._initialize_new_data_set(key)
375
376	def _initialize_new_data_set(self, key=""):
377	"""
378	A private class method to generate a new 1D or 2D data object based on the type of data within the set.
379	Outside methods should call add_data_set() to be sure any existing data is stored properly.
380
381	:param key: NeXus group name for current tree level
382	:return: None
383	"""
384	entry = self._find_intermediate(key, "sasentry*")
385	data = entry.get("sasdata")
386	if data.get("Qx") is not None:
387	self.current_dataset = Data2D()
388	else:
389	x = np.array(0)
390	y = np.array(0)
391	self.current_dataset = Data1D(x, y)
392	self.current_dataset.filename = self.raw_data.filename
393
394	def _find_intermediate(self, key="", basename=""):
395	"""
396	A private class used to find an entry by either using a direct key or knowing the approximate basename.
397
398	:param key: Exact keyname of an entry
399	:param basename: Approximate name of an entry
400	:return:
401	"""
402	entry = []
403	if key is not "":
404	entry = self.raw_data.get(key)
405	else:
406	key_prog = re.compile(basename)
407	for key in self.raw_data.keys():
408	if (key_prog.match(key)):
409	entry = self.raw_data.get(key)
410	break
411	return entry
412
413	def _create_unique_key(self, dictionary, name, numb=0):
414	"""
415	Create a unique key value for any dictionary to prevent overwriting
416	Recurses until a unique key value is found.
417
418	:param dictionary: A dictionary with any number of entries
419	:param name: The index of the item to be added to dictionary
420	:param numb: The number to be appended to the name, starts at 0
421	"""
422	if dictionary.get(name) is not None:
423	numb += 1
424	name = name.split("_")[0]
425	name += "_{0}".format(numb)
426	name = self._create_unique_key(dictionary, name, numb)
427	return name

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ 7858575

Download in other formats: