cansas_reader_HDF5.py @ 68aa210

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since 68aa210 was 68aa210, checked in by krzywon, 8 years ago
Added a reader for 1D and 2D CanSAS v2.0 data in HDF5 format. Added the h5py requirement to check_packages.
Property mode set to `100644`
File size: 16.8 KB

Line
1	"""
2	CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
3	"""
4
5	import h5py
6	import numpy as np
7	import re
8	import os
9	import sys
10
11	from sas.sascalc.dataloader.data_info import Data1D, Data2D, Sample, Source
12	from sas.sascalc.dataloader.data_info import Process, Aperture, Collimation, TransmissionSpectrum, Detector
13
14
15	class Reader():
16	"""
17	This is a placeholder for the epic class description I plan on writing in the future. But not today.
18
19	:Dependencies:
20	The CanSAS HDF5 reader requires h5py v2.5.0 or later.
21	"""
22
23	## CanSAS version
24	cansas_version = 2.0
25	## Logged warnings or messages
26	logging = None
27	## List of errors for the current data set
28	errors = None
29	## Raw file contents to be processed
30	raw_data = None
31	## Data set being modified
32	current_dataset = None
33	## For recursion and saving purposes, remember parent objects
34	parent_list = None
35	## Data type name
36	type_name = "CanSAS 2D"
37	## Wildcards
38	type = ["CanSAS 2D HDF5 Files (.h5)\|.h5"]
39	## List of allowed extensions
40	ext = ['.h5', '.H5']
41	## Flag to bypass extension check
42	allow_all = False
43	## List of files to return
44	output = None
45
46	def __init__(self):
47	"""
48	Create the reader object and define initial states for certain class variables
49	"""
50	self.current_dataset = None
51	self.raw_data = None
52	self.errors = set()
53	self.logging = []
54	self.parent_list = []
55	self.output = []
56	self.detector = Detector()
57	self.collimation = Collimation()
58	self.aperture = Aperture()
59	self.process = Process()
60	self.sample = Sample()
61	self.source = Source()
62	self.trans_spectrum = TransmissionSpectrum()
63
64	def read(self, filename):
65	"""
66	General read method called by the top-level SasView data_loader.
67
68	:param filename: A path for an HDF5 formatted CanSAS 2D data file.
69	:return: List of Data1D/2D objects or a list of errors.
70	"""
71
72	## Reinitialize the class when loading new data file to reset all class variables
73	self.__init__()
74	## Check that the file exists
75	if os.path.isfile(filename):
76	basename = os.path.basename(filename)
77	_, extension = os.path.splitext(basename)
78	# If the file type is not allowed, return empty list
79	if extension in self.ext or self.allow_all:
80	## Load the data file
81	self.raw_data = h5py.File(filename, 'r')
82	## Read in all child elements of top level SASroot
83	self.read_children(self.raw_data)
84	self.add_data_set()
85	## Return data set(s)
86	return self.output
87
88	def read_children(self, data, parent=u'SASroot'):
89	"""
90	Recursive method for stepping through the hierarchy. Stores the data
91
92	:param data: h5py Group object of any kind
93	:param parent: h5py Group parent name
94	:return: None
95	"""
96
97	## Create regex for base sasentry and for parent
98	parent_prog = re.compile(parent)
99
100	## Loop through each element of the parent and process accordingly
101	for key in data.keys():
102	## Get all information for the current key
103	value = data.get(key)
104	attr_keys = value.attrs.keys()
105	attr_values = value.attrs.values()
106	class_name = value.attrs.get(u'NX_class')
107	if class_name is not None:
108	class_prog = re.compile(class_name)
109	else:
110	class_prog = re.compile(value.name)
111
112	if isinstance(value, h5py.Group):
113	## If this is a new sasentry, store the current data set and create a fresh Data1D/2D object
114	if class_prog.match(u'SASentry'):
115	self.add_data_set(key)
116	## If the value is a group of data, iterate
117	## TODO: If Process, Aperture, etc, store and renew
118	##Recursion step to access data within the
119	self.read_children(data.get(key), class_name)
120
121	elif isinstance(value, h5py.Dataset):
122	## If this is a dataset, store the data appropriately
123	## TODO: Add instrumental information
124	data_set = data[key][:]
125
126	for data_point in data_set:
127	## Top Level Meta Data
128	if key == u'definition':
129	self.current_dataset.meta_data['reader'] = data_point
130	elif key == u'run':
131	self.current_dataset.run.append(data_point)
132	elif key == u'title':
133	self.current_dataset.title = data_point
134	elif key == u'SASnote':
135	self.current_dataset.notes.append(data_point)
136
137	## I and Q Data
138	elif key == u'I':
139	i_unit = value.attrs.get(u'unit')
140	if type(self.current_dataset) is Data2D:
141	self.current_dataset.data = np.append(self.current_dataset.data, data_point)
142	self.current_dataset.zaxis("Intensity (%s)" % (i_unit), i_unit)
143	else:
144	self.current_dataset.y = np.append(self.current_dataset.y, data_point)
145	self.current_dataset.yaxis("Intensity (%s)" % (i_unit), i_unit)
146	elif key == u'Idev':
147	if type(self.current_dataset) is Data2D:
148	self.current_dataset.err_data = np.append(self.current_dataset.err_data, data_point)
149	else:
150	self.current_dataset.dy = np.append(self.current_dataset.dy, data_point)
151	elif key == u'Q':
152	q_unit = value.attrs.get(u'unit')
153	self.current_dataset.xaxis("Q (%s)" % (q_unit), q_unit)
154	if type(self.current_dataset) is Data2D:
155	self.current_dataset.q = np.append(self.current_dataset.q, data_point)
156	else:
157	self.current_dataset.x = np.append(self.current_dataset.x, data_point)
158	elif key == u'Qy':
159	q_unit = value.attrs.get(u'unit')
160	self.current_dataset.yaxis("Q (%s)" % (q_unit), q_unit)
161	self.current_dataset.qy_data = np.append(self.current_dataset.qy_data, data_point)
162	elif key == u'Qydev':
163	self.current_dataset.dqy_data = np.append(self.current_dataset.dqy_data, data_point)
164	elif key == u'Qx':
165	q_unit = value.attrs.get(u'unit')
166	self.current_dataset.xaxis("Q (%s)" % (q_unit), q_unit)
167	self.current_dataset.qx_data = np.append(self.current_dataset.qx_data, data_point)
168	elif key == u'Qxdev':
169	self.current_dataset.dqx_data = np.append(self.current_dataset.dqx_data, data_point)
170	elif key == u'Mask':
171	self.current_dataset.mask = np.append(self.current_dataset.mask, data_point)
172
173	## Other Information
174	elif key == u'wavelength':
175	if data_set.size > 1:
176	self.trans_spectrum.wavelength.append(data_point)
177	self.source.wavelength = sum(self.trans_spectrum.wavelength)\
178	/ len(self.trans_spectrum.wavelength)
179	else:
180	self.source.wavelength = data_point
181	elif key == u'probe_type':
182	self.source.radiation = data_point
183	elif key == u'transmission':
184	if data_set.size > 1:
185	self.trans_spectrum.transmission.append(data_point)
186	self.sample.transmission = sum(self.trans_spectrum.transmission) \
187	/ len(self.trans_spectrum.transmission)
188	else:
189	self.sample.transmission = data_point
190
191	## Sample Information
192	elif key == u'Title' and parent == u'SASsample':
193	self.sample.name = data_point
194	elif key == u'thickness' and parent == u'SASsample':
195	self.sample.thickness = data_point
196	elif key == u'temperature' and parent == u'SASsample':
197	self.sample.temperature = data_point
198
199	## Process Information
200	elif key == u'name' and parent == u'SASprocess':
201	self.process.name = data_point
202	elif key == u'Title' and parent == u'SASprocess':
203	self.process.name = data_point
204	elif key == u'description' and parent == u'SASprocess':
205	self.process.description = data_point
206	elif key == u'date' and parent == u'SASprocess':
207	self.process.date = data_point
208
209	## Everything else goes in meta_data
210	else:
211	new_key = self._create_unique_key(self.current_dataset.meta_data, key)
212	self.current_dataset.meta_data[new_key] = data_point
213
214	else:
215	## I don't know if this reachable code
216	self.errors.add("ShouldNeverHappenException")
217
218	return
219
220	def final_data_cleanup(self):
221	"""
222	Does some final cleanup and formatting on self.current_dataset
223	"""
224	## TODO: Add all cleanup items - NOT FINISHED
225	## TODO: All strings to float64
226	## TODO: All intermediates (self.sample, etc.) put in self.current_dataset
227
228	## Type cast data arrays to float64 and find min/max as appropriate
229	if type(self.current_dataset) is Data2D:
230	self.current_dataset.data = np.delete(self.current_dataset.data, [0])
231	self.current_dataset.data = self.current_dataset.data.astype(np.float64)
232	self.current_dataset.err_data = np.delete(self.current_dataset.err_data, [0])
233	self.current_dataset.err_data = self.current_dataset.err_data.astype(np.float64)
234	self.current_dataset.mask = np.delete(self.current_dataset.mask, [0])
235	if self.current_dataset.qx_data is not None:
236	self.current_dataset.qx_data = np.delete(self.current_dataset.qx_data, [0])
237	self.current_dataset.xmin = np.min(self.current_dataset.qx_data)
238	self.current_dataset.xmax = np.max(self.current_dataset.qx_data)
239	self.current_dataset.qx_data = self.current_dataset.qx_data.astype(np.float64)
240	if self.current_dataset.dqx_data is not None:
241	self.current_dataset.dqx_data = np.delete(self.current_dataset.dqx_data, [0])
242	self.current_dataset.dqx_data = self.current_dataset.dqx_data.astype(np.float64)
243	if self.current_dataset.qy_data is not None:
244	self.current_dataset.qy_data = np.delete(self.current_dataset.qy_data, [0])
245	self.current_dataset.ymin = np.min(self.current_dataset.qy_data)
246	self.current_dataset.ymax = np.max(self.current_dataset.qy_data)
247	self.current_dataset.qy_data = self.current_dataset.qy_data.astype(np.float64)
248	if self.current_dataset.dqy_data is not None:
249	self.current_dataset.dqy_data = np.delete(self.current_dataset.dqy_data, [0])
250	self.current_dataset.dqy_data = self.current_dataset.dqy_data.astype(np.float64)
251	if self.current_dataset.q_data is not None:
252	self.current_dataset.q_data = np.delete(self.current_dataset.q_data, [0])
253	self.current_dataset.q_data = self.current_dataset.q_data.astype(np.float64)
254	zeros = np.ones(self.current_dataset.data.size, dtype=bool)
255	try:
256	for i in range (0, self.current_dataset.mask.size - 1):
257	zeros[i] = self.current_dataset.mask[i]
258	except:
259	self.errors.add(sys.exc_value)
260	self.current_dataset.mask = zeros
261
262	## Calculate the actual Q matrix
263	try:
264	if self.current_dataset.q_data.size <= 1:
265	self.current_dataset.q_data = np.sqrt(self.current_dataset.qx_data * self.current_dataset.qx_data +
266	self.current_dataset.qy_data * self.current_dataset.qy_data)
267	except:
268	self.current_dataset.q_data = None
269
270	elif type(self.current_dataset) is Data1D:
271	if self.current_dataset.x is not None:
272	self.current_dataset.x = np.delete(self.current_dataset.x, [0])
273	self.current_dataset.x = self.current_dataset.x.astype(np.float64)
274	self.current_dataset.xmin = np.min(self.current_dataset.x)
275	self.current_dataset.xmax = np.max(self.current_dataset.x)
276	if self.current_dataset.y is not None:
277	self.current_dataset.y = np.delete(self.current_dataset.y, [0])
278	self.current_dataset.y = self.current_dataset.y.astype(np.float64)
279	self.current_dataset.ymin = np.min(self.current_dataset.y)
280	self.current_dataset.ymax = np.max(self.current_dataset.y)
281	if self.current_dataset.dx is not None:
282	self.current_dataset.dx = np.delete(self.current_dataset.dx, [0])
283	self.current_dataset.dx = self.current_dataset.dx.astype(np.float64)
284	if self.current_dataset.dxl is not None:
285	self.current_dataset.dxl = np.delete(self.current_dataset.dxl, [0])
286	self.current_dataset.dxl = self.current_dataset.dxl.astype(np.float64)
287	if self.current_dataset.dxw is not None:
288	self.current_dataset.dxw = np.delete(self.current_dataset.dxw, [0])
289	self.current_dataset.dxw = self.current_dataset.dxw.astype(np.float64)
290	if self.current_dataset.dy is not None:
291	self.current_dataset.dy = np.delete(self.current_dataset.dy, [0])
292	self.current_dataset.dy =self.current_dataset.dy.astype(np.float64)
293
294	else:
295	self.errors.add("ShouldNeverHappenException")
296
297	## Append intermediate objects to data
298	self.current_dataset.sample = self.sample
299	self.current_dataset.source = self.source
300	self.current_dataset.collimation.append(self.collimation)
301
302	## Append errors to dataset and reset class errors
303	self.current_dataset.errors = self.errors
304	self.errors.clear()
305
306	def add_data_set(self, key=""):
307	"""
308	Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a
309	private method to generate a new data set.
310
311	:param key: NeXus group name for current tree level
312	:return: None
313	"""
314	if self.current_dataset is not None:
315	self.final_data_cleanup()
316	self.output.append(self.current_dataset)
317	self._initialize_new_data_set(key)
318
319	def _initialize_new_data_set(self, key=""):
320	"""
321	A private class method to generate a new 1D or 2D data object based on the type of data within the set.
322	Outside methods should call add_data_set() to be sure any existing data is stored properly.
323
324	:param key: NeXus group name for current tree level
325	:return: None
326	"""
327	entry = []
328	if key is not "":
329	entry = self.raw_data.get(key)
330	else:
331	key_prog = re.compile("sasentry*")
332	for key in self.raw_data.keys():
333	if (key_prog.match(key)):
334	entry = self.raw_data.get(key)
335	break
336	data = entry.get("sasdata")
337	if data.get("Qx") is not None:
338	self.current_dataset = Data2D()
339	else:
340	x = np.array(0)
341	y = np.array(0)
342	self.current_dataset = Data1D(x, y)
343	self.current_dataset.filename = self.raw_data.filename
344
345	def _create_unique_key(self, dictionary, name, numb=0):
346	"""
347	Create a unique key value for any dictionary to prevent overwriting
348	Recurses until a unique key value is found.
349
350	:param dictionary: A dictionary with any number of entries
351	:param name: The index of the item to be added to dictionary
352	:param numb: The number to be appended to the name, starts at 0
353	"""
354	if dictionary.get(name) is not None:
355	numb += 1
356	name = name.split("_")[0]
357	name += "_{0}".format(numb)
358	name = self._create_unique_key(dictionary, name, numb)
359	return name

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ 68aa210

Download in other formats: