cansas_reader_HDF5.py @ ae308a4

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since ae308a4 was 68aa210, checked in by krzywon, 8 years ago
Added a reader for 1D and 2D CanSAS v2.0 data in HDF5 format. Added the h5py requirement to check_packages.
Property mode set to `100644`
File size: 16.8 KB

Rev	Line
[68aa210]	1	"""
	2	CanSAS 2D data reader for reading HDF5 formatted CanSAS files.
	3	"""
	4
	5	import h5py
	6	import numpy as np
	7	import re
	8	import os
	9	import sys
	10
	11	from sas.sascalc.dataloader.data_info import Data1D, Data2D, Sample, Source
	12	from sas.sascalc.dataloader.data_info import Process, Aperture, Collimation, TransmissionSpectrum, Detector
	13
	14
	15	class Reader():
	16	"""
	17	This is a placeholder for the epic class description I plan on writing in the future. But not today.
	18
	19	:Dependencies:
	20	The CanSAS HDF5 reader requires h5py v2.5.0 or later.
	21	"""
	22
	23	## CanSAS version
	24	cansas_version = 2.0
	25	## Logged warnings or messages
	26	logging = None
	27	## List of errors for the current data set
	28	errors = None
	29	## Raw file contents to be processed
	30	raw_data = None
	31	## Data set being modified
	32	current_dataset = None
	33	## For recursion and saving purposes, remember parent objects
	34	parent_list = None
	35	## Data type name
	36	type_name = "CanSAS 2D"
	37	## Wildcards
	38	type = ["CanSAS 2D HDF5 Files (.h5)\|.h5"]
	39	## List of allowed extensions
	40	ext = ['.h5', '.H5']
	41	## Flag to bypass extension check
	42	allow_all = False
	43	## List of files to return
	44	output = None
	45
	46	def __init__(self):
	47	"""
	48	Create the reader object and define initial states for certain class variables
	49	"""
	50	self.current_dataset = None
	51	self.raw_data = None
	52	self.errors = set()
	53	self.logging = []
	54	self.parent_list = []
	55	self.output = []
	56	self.detector = Detector()
	57	self.collimation = Collimation()
	58	self.aperture = Aperture()
	59	self.process = Process()
	60	self.sample = Sample()
	61	self.source = Source()
	62	self.trans_spectrum = TransmissionSpectrum()
	63
	64	def read(self, filename):
	65	"""
	66	General read method called by the top-level SasView data_loader.
	67
	68	:param filename: A path for an HDF5 formatted CanSAS 2D data file.
	69	:return: List of Data1D/2D objects or a list of errors.
	70	"""
	71
	72	## Reinitialize the class when loading new data file to reset all class variables
	73	self.__init__()
	74	## Check that the file exists
	75	if os.path.isfile(filename):
	76	basename = os.path.basename(filename)
	77	_, extension = os.path.splitext(basename)
	78	# If the file type is not allowed, return empty list
	79	if extension in self.ext or self.allow_all:
	80	## Load the data file
	81	self.raw_data = h5py.File(filename, 'r')
	82	## Read in all child elements of top level SASroot
	83	self.read_children(self.raw_data)
	84	self.add_data_set()
	85	## Return data set(s)
	86	return self.output
	87
	88	def read_children(self, data, parent=u'SASroot'):
	89	"""
	90	Recursive method for stepping through the hierarchy. Stores the data
	91
	92	:param data: h5py Group object of any kind
	93	:param parent: h5py Group parent name
	94	:return: None
	95	"""
	96
	97	## Create regex for base sasentry and for parent
	98	parent_prog = re.compile(parent)
	99
	100	## Loop through each element of the parent and process accordingly
	101	for key in data.keys():
	102	## Get all information for the current key
	103	value = data.get(key)
	104	attr_keys = value.attrs.keys()
	105	attr_values = value.attrs.values()
	106	class_name = value.attrs.get(u'NX_class')
	107	if class_name is not None:
	108	class_prog = re.compile(class_name)
	109	else:
	110	class_prog = re.compile(value.name)
	111
	112	if isinstance(value, h5py.Group):
	113	## If this is a new sasentry, store the current data set and create a fresh Data1D/2D object
	114	if class_prog.match(u'SASentry'):
	115	self.add_data_set(key)
	116	## If the value is a group of data, iterate
	117	## TODO: If Process, Aperture, etc, store and renew
	118	##Recursion step to access data within the
	119	self.read_children(data.get(key), class_name)
	120
	121	elif isinstance(value, h5py.Dataset):
	122	## If this is a dataset, store the data appropriately
	123	## TODO: Add instrumental information
	124	data_set = data[key][:]
	125
	126	for data_point in data_set:
	127	## Top Level Meta Data
	128	if key == u'definition':
	129	self.current_dataset.meta_data['reader'] = data_point
	130	elif key == u'run':
	131	self.current_dataset.run.append(data_point)
	132	elif key == u'title':
	133	self.current_dataset.title = data_point
	134	elif key == u'SASnote':
	135	self.current_dataset.notes.append(data_point)
	136
	137	## I and Q Data
	138	elif key == u'I':
	139	i_unit = value.attrs.get(u'unit')
	140	if type(self.current_dataset) is Data2D:
	141	self.current_dataset.data = np.append(self.current_dataset.data, data_point)
	142	self.current_dataset.zaxis("Intensity (%s)" % (i_unit), i_unit)
	143	else:
	144	self.current_dataset.y = np.append(self.current_dataset.y, data_point)
	145	self.current_dataset.yaxis("Intensity (%s)" % (i_unit), i_unit)
	146	elif key == u'Idev':
	147	if type(self.current_dataset) is Data2D:
	148	self.current_dataset.err_data = np.append(self.current_dataset.err_data, data_point)
	149	else:
	150	self.current_dataset.dy = np.append(self.current_dataset.dy, data_point)
	151	elif key == u'Q':
	152	q_unit = value.attrs.get(u'unit')
	153	self.current_dataset.xaxis("Q (%s)" % (q_unit), q_unit)
	154	if type(self.current_dataset) is Data2D:
	155	self.current_dataset.q = np.append(self.current_dataset.q, data_point)
	156	else:
	157	self.current_dataset.x = np.append(self.current_dataset.x, data_point)
	158	elif key == u'Qy':
	159	q_unit = value.attrs.get(u'unit')
	160	self.current_dataset.yaxis("Q (%s)" % (q_unit), q_unit)
	161	self.current_dataset.qy_data = np.append(self.current_dataset.qy_data, data_point)
	162	elif key == u'Qydev':
	163	self.current_dataset.dqy_data = np.append(self.current_dataset.dqy_data, data_point)
	164	elif key == u'Qx':
	165	q_unit = value.attrs.get(u'unit')
	166	self.current_dataset.xaxis("Q (%s)" % (q_unit), q_unit)
	167	self.current_dataset.qx_data = np.append(self.current_dataset.qx_data, data_point)
	168	elif key == u'Qxdev':
	169	self.current_dataset.dqx_data = np.append(self.current_dataset.dqx_data, data_point)
	170	elif key == u'Mask':
	171	self.current_dataset.mask = np.append(self.current_dataset.mask, data_point)
	172
	173	## Other Information
	174	elif key == u'wavelength':
	175	if data_set.size > 1:
	176	self.trans_spectrum.wavelength.append(data_point)
	177	self.source.wavelength = sum(self.trans_spectrum.wavelength)\
	178	/ len(self.trans_spectrum.wavelength)
	179	else:
	180	self.source.wavelength = data_point
	181	elif key == u'probe_type':
	182	self.source.radiation = data_point
	183	elif key == u'transmission':
	184	if data_set.size > 1:
	185	self.trans_spectrum.transmission.append(data_point)
	186	self.sample.transmission = sum(self.trans_spectrum.transmission) \
	187	/ len(self.trans_spectrum.transmission)
	188	else:
	189	self.sample.transmission = data_point
	190
	191	## Sample Information
	192	elif key == u'Title' and parent == u'SASsample':
	193	self.sample.name = data_point
	194	elif key == u'thickness' and parent == u'SASsample':
	195	self.sample.thickness = data_point
	196	elif key == u'temperature' and parent == u'SASsample':
	197	self.sample.temperature = data_point
	198
	199	## Process Information
	200	elif key == u'name' and parent == u'SASprocess':
	201	self.process.name = data_point
	202	elif key == u'Title' and parent == u'SASprocess':
	203	self.process.name = data_point
	204	elif key == u'description' and parent == u'SASprocess':
	205	self.process.description = data_point
	206	elif key == u'date' and parent == u'SASprocess':
	207	self.process.date = data_point
	208
	209	## Everything else goes in meta_data
	210	else:
	211	new_key = self._create_unique_key(self.current_dataset.meta_data, key)
	212	self.current_dataset.meta_data[new_key] = data_point
	213
	214	else:
	215	## I don't know if this reachable code
	216	self.errors.add("ShouldNeverHappenException")
	217
	218	return
	219
	220	def final_data_cleanup(self):
	221	"""
	222	Does some final cleanup and formatting on self.current_dataset
	223	"""
	224	## TODO: Add all cleanup items - NOT FINISHED
	225	## TODO: All strings to float64
	226	## TODO: All intermediates (self.sample, etc.) put in self.current_dataset
	227
	228	## Type cast data arrays to float64 and find min/max as appropriate
	229	if type(self.current_dataset) is Data2D:
	230	self.current_dataset.data = np.delete(self.current_dataset.data, [0])
	231	self.current_dataset.data = self.current_dataset.data.astype(np.float64)
	232	self.current_dataset.err_data = np.delete(self.current_dataset.err_data, [0])
	233	self.current_dataset.err_data = self.current_dataset.err_data.astype(np.float64)
	234	self.current_dataset.mask = np.delete(self.current_dataset.mask, [0])
	235	if self.current_dataset.qx_data is not None:
	236	self.current_dataset.qx_data = np.delete(self.current_dataset.qx_data, [0])
	237	self.current_dataset.xmin = np.min(self.current_dataset.qx_data)
	238	self.current_dataset.xmax = np.max(self.current_dataset.qx_data)
	239	self.current_dataset.qx_data = self.current_dataset.qx_data.astype(np.float64)
	240	if self.current_dataset.dqx_data is not None:
	241	self.current_dataset.dqx_data = np.delete(self.current_dataset.dqx_data, [0])
	242	self.current_dataset.dqx_data = self.current_dataset.dqx_data.astype(np.float64)
	243	if self.current_dataset.qy_data is not None:
	244	self.current_dataset.qy_data = np.delete(self.current_dataset.qy_data, [0])
	245	self.current_dataset.ymin = np.min(self.current_dataset.qy_data)
	246	self.current_dataset.ymax = np.max(self.current_dataset.qy_data)
	247	self.current_dataset.qy_data = self.current_dataset.qy_data.astype(np.float64)
	248	if self.current_dataset.dqy_data is not None:
	249	self.current_dataset.dqy_data = np.delete(self.current_dataset.dqy_data, [0])
	250	self.current_dataset.dqy_data = self.current_dataset.dqy_data.astype(np.float64)
	251	if self.current_dataset.q_data is not None:
	252	self.current_dataset.q_data = np.delete(self.current_dataset.q_data, [0])
	253	self.current_dataset.q_data = self.current_dataset.q_data.astype(np.float64)
	254	zeros = np.ones(self.current_dataset.data.size, dtype=bool)
	255	try:
	256	for i in range (0, self.current_dataset.mask.size - 1):
	257	zeros[i] = self.current_dataset.mask[i]
	258	except:
	259	self.errors.add(sys.exc_value)
	260	self.current_dataset.mask = zeros
	261
	262	## Calculate the actual Q matrix
	263	try:
	264	if self.current_dataset.q_data.size <= 1:
	265	self.current_dataset.q_data = np.sqrt(self.current_dataset.qx_data * self.current_dataset.qx_data +
	266	self.current_dataset.qy_data * self.current_dataset.qy_data)
	267	except:
	268	self.current_dataset.q_data = None
	269
	270	elif type(self.current_dataset) is Data1D:
	271	if self.current_dataset.x is not None:
	272	self.current_dataset.x = np.delete(self.current_dataset.x, [0])
	273	self.current_dataset.x = self.current_dataset.x.astype(np.float64)
	274	self.current_dataset.xmin = np.min(self.current_dataset.x)
	275	self.current_dataset.xmax = np.max(self.current_dataset.x)
	276	if self.current_dataset.y is not None:
	277	self.current_dataset.y = np.delete(self.current_dataset.y, [0])
	278	self.current_dataset.y = self.current_dataset.y.astype(np.float64)
	279	self.current_dataset.ymin = np.min(self.current_dataset.y)
	280	self.current_dataset.ymax = np.max(self.current_dataset.y)
	281	if self.current_dataset.dx is not None:
	282	self.current_dataset.dx = np.delete(self.current_dataset.dx, [0])
	283	self.current_dataset.dx = self.current_dataset.dx.astype(np.float64)
	284	if self.current_dataset.dxl is not None:
	285	self.current_dataset.dxl = np.delete(self.current_dataset.dxl, [0])
	286	self.current_dataset.dxl = self.current_dataset.dxl.astype(np.float64)
	287	if self.current_dataset.dxw is not None:
	288	self.current_dataset.dxw = np.delete(self.current_dataset.dxw, [0])
	289	self.current_dataset.dxw = self.current_dataset.dxw.astype(np.float64)
	290	if self.current_dataset.dy is not None:
	291	self.current_dataset.dy = np.delete(self.current_dataset.dy, [0])
	292	self.current_dataset.dy =self.current_dataset.dy.astype(np.float64)
	293
	294	else:
	295	self.errors.add("ShouldNeverHappenException")
	296
	297	## Append intermediate objects to data
	298	self.current_dataset.sample = self.sample
	299	self.current_dataset.source = self.source
	300	self.current_dataset.collimation.append(self.collimation)
	301
	302	## Append errors to dataset and reset class errors
	303	self.current_dataset.errors = self.errors
	304	self.errors.clear()
	305
	306	def add_data_set(self, key=""):
	307	"""
	308	Adds the current_dataset to the list of outputs after preforming final processing on the data and then calls a
	309	private method to generate a new data set.
	310
	311	:param key: NeXus group name for current tree level
	312	:return: None
	313	"""
	314	if self.current_dataset is not None:
	315	self.final_data_cleanup()
	316	self.output.append(self.current_dataset)
	317	self._initialize_new_data_set(key)
	318
	319	def _initialize_new_data_set(self, key=""):
	320	"""
	321	A private class method to generate a new 1D or 2D data object based on the type of data within the set.
	322	Outside methods should call add_data_set() to be sure any existing data is stored properly.
	323
	324	:param key: NeXus group name for current tree level
	325	:return: None
	326	"""
	327	entry = []
	328	if key is not "":
	329	entry = self.raw_data.get(key)
	330	else:
	331	key_prog = re.compile("sasentry*")
	332	for key in self.raw_data.keys():
	333	if (key_prog.match(key)):
	334	entry = self.raw_data.get(key)
	335	break
	336	data = entry.get("sasdata")
	337	if data.get("Qx") is not None:
	338	self.current_dataset = Data2D()
	339	else:
	340	x = np.array(0)
	341	y = np.array(0)
	342	self.current_dataset = Data1D(x, y)
	343	self.current_dataset.filename = self.raw_data.filename
	344
	345	def _create_unique_key(self, dictionary, name, numb=0):
	346	"""
	347	Create a unique key value for any dictionary to prevent overwriting
	348	Recurses until a unique key value is found.
	349
	350	:param dictionary: A dictionary with any number of entries
	351	:param name: The index of the item to be added to dictionary
	352	:param numb: The number to be appended to the name, starts at 0
	353	"""
	354	if dictionary.get(name) is not None:
	355	numb += 1
	356	name = name.split("_")[0]
	357	name += "_{0}".format(numb)
	358	name = self._create_unique_key(dictionary, name, numb)
	359	return name

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/dataloader/readers/cansas_reader_HDF5.py @ ae308a4

Download in other formats: