file_reader_base_class.py @ 2924532

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload

Last change on this file since 2924532 was 2924532, checked in by krzywon, 6 years ago
Cleanup of the SasView? GUI data loader error handling and a more specific error message for deprecated file extension.
Property mode set to `100644`
File size: 16.2 KB

Rev	Line
[beba407]	1	"""
[b09095a]	2	This is the base file reader class most file readers should inherit from.
[beba407]	3	All generic functionality required for a file loader/reader is built into this
	4	class
	5	"""
	6
	7	import os
[7b50f14]	8	import sys
[8475d16]	9	import math
[beba407]	10	import logging
	11	from abc import abstractmethod
[574adc7]	12
	13	import numpy as np
	14	from .loader_exceptions import NoKnownLoaderException, FileContentsException,\
[da8bb53]	15	DataReaderException, DefaultReaderException
[574adc7]	16	from .data_info import Data1D, Data2D, DataInfo, plottable_1D, plottable_2D,\
[beba407]	17	combine_data_info_with_plottable
	18
	19	logger = logging.getLogger(__name__)
	20
[7b50f14]	21	if sys.version_info[0] < 3:
	22	def decode(s):
	23	return s
	24	else:
	25	def decode(s):
	26	return s.decode() if isinstance(s, bytes) else s
[beba407]	27
[6fd7b20]	28	# Data 1D fields for iterative purposes
	29	FIELDS_1D = ('x', 'y', 'dx', 'dy', 'dxl', 'dxw')
	30	# Data 2D fields for iterative purposes
	31	FIELDS_2D = ('data', 'qx_data', 'qy_data', 'q_data', 'err_data',
[340291a]	32	'dqx_data', 'dqy_data', 'mask')
[c36c09f]	33	DEPRECATION_MESSAGE = ("\rThe extension of this file suggests the data set migh"
	34	"t not be fully reduced. Support for the reader associat"
	35	"ed with this file type has been removed. An attempt to "
[2924532]	36	"load the file was made, however, even if a data set was"
	37	" generated, SasView cannot guarantee the accuracy of th"
	38	"e data.")
[6fd7b20]	39
[beba407]	40	class FileReader(object):
[b09095a]	41	# String to describe the type of data this reader can load
	42	type_name = "ASCII"
	43	# Wildcards to display
	44	type = ["Text files (.txt\|.TXT)"]
[beba407]	45	# List of allowed extensions
	46	ext = ['.txt']
[c36c09f]	47	# Deprecated extensions
	48	deprecated_extensions = ['.asc', '.nxs']
[beba407]	49	# Bypass extension check and try to load anyway
	50	allow_all = False
[b09095a]	51	# Able to import the unit converter
	52	has_converter = True
	53	# Default value of zero
	54	_ZERO = 1e-16
[beba407]	55
[cb11a25]	56	def __init__(self):
	57	# List of Data1D and Data2D objects to be sent back to data_loader
	58	self.output = []
	59	# Current plottable_(1D/2D) object being loaded in
	60	self.current_dataset = None
	61	# Current DataInfo object being loaded in
	62	self.current_datainfo = None
[3053a4a]	63	# File path sent to reader
	64	self.filepath = None
[cb11a25]	65	# Open file handle
	66	self.f_open = None
	67
[beba407]	68	def read(self, filepath):
	69	"""
[bc570f4]	70	Basic file reader
	71
[beba407]	72	:param filepath: The full or relative path to a file to be loaded
	73	"""
[3053a4a]	74	self.filepath = filepath
[beba407]	75	if os.path.isfile(filepath):
	76	basename, extension = os.path.splitext(os.path.basename(filepath))
[da8bb53]	77	self.extension = extension.lower()
[beba407]	78	# If the file type is not allowed, return nothing
[da8bb53]	79	if self.extension in self.ext or self.allow_all:
[beba407]	80	# Try to load the file, but raise an error if unable to.
	81	try:
[b09095a]	82	self.f_open = open(filepath, 'rb')
	83	self.get_file_contents()
[0b79323]	84
[bc570f4]	85	except DataReaderException as e:
[da8bb53]	86	self.handle_error_message(e.message)
[beba407]	87	except OSError as e:
[b09095a]	88	# If the file cannot be opened
[beba407]	89	msg = "Unable to open file: {}\n".format(filepath)
	90	msg += e.message
	91	self.handle_error_message(msg)
[b09095a]	92	finally:
[da8bb53]	93	# Close the file handle if it is open
[b09095a]	94	if not self.f_open.closed:
	95	self.f_open.close()
[425feff]	96	if any(filepath.lower().endswith(ext) for ext in
	97	self.deprecated_extensions):
	98	self.handle_error_message(DEPRECATION_MESSAGE)
[248ff73]	99	if len(self.output) > 0:
	100	# Sort the data that's been loaded
	101	self.sort_one_d_data()
	102	self.sort_two_d_data()
[beba407]	103	else:
	104	msg = "Unable to find file at: {}\n".format(filepath)
	105	msg += "Please check your file path and try again."
	106	self.handle_error_message(msg)
[a78433dd]	107
[b09095a]	108	# Return a list of parsed entries that data_loader can manage
[1576693]	109	final_data = self.output
	110	self.reset_state()
	111	return final_data
[beba407]	112
[61f329f0]	113	def reset_state(self):
	114	"""
	115	Resets the class state to a base case when loading a new data file so previous
	116	data files do not appear a second time
	117	"""
	118	self.current_datainfo = None
	119	self.current_dataset = None
[3053a4a]	120	self.filepath = None
[8475d16]	121	self.ind = None
[61f329f0]	122	self.output = []
	123
[26183bf]	124	def nextline(self):
	125	"""
	126	Returns the next line in the file as a string.
	127	"""
	128	#return self.f_open.readline()
[7b50f14]	129	return decode(self.f_open.readline())
[26183bf]	130
	131	def nextlines(self):
	132	"""
	133	Returns the next line in the file as a string.
	134	"""
	135	for line in self.f_open:
	136	#yield line
[7b50f14]	137	yield decode(line)
[26183bf]	138
	139	def readall(self):
	140	"""
	141	Returns the entire file as a string.
	142	"""
	143	#return self.f_open.read()
[7b50f14]	144	return decode(self.f_open.read())
[26183bf]	145
[beba407]	146	def handle_error_message(self, msg):
	147	"""
	148	Generic error handler to add an error to the current datainfo to
[20fa5fe]	149	propagate the error up the error chain.
[beba407]	150	:param msg: Error message
	151	"""
[dcb91cf]	152	if len(self.output) > 0:
	153	self.output[-1].errors.append(msg)
	154	elif isinstance(self.current_datainfo, DataInfo):
[beba407]	155	self.current_datainfo.errors.append(msg)
	156	else:
	157	logger.warning(msg)
[425feff]	158	raise NoKnownLoaderException(msg)
[beba407]	159
	160	def send_to_output(self):
	161	"""
	162	Helper that automatically combines the info and set and then appends it
	163	to output
	164	"""
	165	data_obj = combine_data_info_with_plottable(self.current_dataset,
	166	self.current_datainfo)
	167	self.output.append(data_obj)
	168
[b09095a]	169	def sort_one_d_data(self):
	170	"""
	171	Sort 1D data along the X axis for consistency
	172	"""
	173	for data in self.output:
	174	if isinstance(data, Data1D):
[a78a02f]	175	# Normalize the units for
	176	data.x_unit = self.format_unit(data.x_unit)
	177	data.y_unit = self.format_unit(data.y_unit)
[7477fb9]	178	# Sort data by increasing x and remove 1st point
[e3133dc]	179	ind = np.lexsort((data.y, data.x))
	180	data.x = self._reorder_1d_array(data.x, ind)
	181	data.y = self._reorder_1d_array(data.y, ind)
[b09095a]	182	if data.dx is not None:
[4660990]	183	if len(data.dx) == 0:
	184	data.dx = None
	185	continue
[e3133dc]	186	data.dx = self._reorder_1d_array(data.dx, ind)
[b09095a]	187	if data.dxl is not None:
[e3133dc]	188	data.dxl = self._reorder_1d_array(data.dxl, ind)
[b09095a]	189	if data.dxw is not None:
[e3133dc]	190	data.dxw = self._reorder_1d_array(data.dxw, ind)
[b09095a]	191	if data.dy is not None:
[4660990]	192	if len(data.dy) == 0:
	193	data.dy = None
	194	continue
[e3133dc]	195	data.dy = self._reorder_1d_array(data.dy, ind)
[b09095a]	196	if data.lam is not None:
[e3133dc]	197	data.lam = self._reorder_1d_array(data.lam, ind)
[b09095a]	198	if data.dlam is not None:
[e3133dc]	199	data.dlam = self._reorder_1d_array(data.dlam, ind)
[f02a0c6]	200	data = self._remove_nans_in_data(data)
[dcb91cf]	201	if len(data.x) > 0:
[248ff73]	202	data.xmin = np.min(data.x)
	203	data.xmax = np.max(data.x)
	204	data.ymin = np.min(data.y)
	205	data.ymax = np.max(data.y)
[b09095a]	206
[e3133dc]	207	@staticmethod
	208	def _reorder_1d_array(array, ind):
	209	"""
	210	Reorders a 1D array based on the indices passed as ind
	211	:param array: Array to be reordered
	212	:param ind: Indices used to reorder array
	213	:return: reordered array
	214	"""
	215	array = np.asarray(array, dtype=np.float64)
	216	return array[ind]
	217
	218	@staticmethod
[f02a0c6]	219	def _remove_nans_in_data(data):
[e3133dc]	220	"""
	221	Remove data points where nan is loaded
[a58b5a0]	222	:param data: 1D or 2D data object
	223	:return: data with nan points removed
[e3133dc]	224	"""
[f02a0c6]	225	if isinstance(data, Data1D):
[6fd7b20]	226	fields = FIELDS_1D
[f02a0c6]	227	elif isinstance(data, Data2D):
[6fd7b20]	228	fields = FIELDS_2D
[f02a0c6]	229	else:
[6fd7b20]	230	return data
[a58b5a0]	231	# Make array of good points - all others will be removed
[6fd7b20]	232	good = np.isfinite(getattr(data, fields[0]))
	233	for name in fields[1:]:
	234	array = getattr(data, name)
[e3133dc]	235	if array is not None:
[a58b5a0]	236	# Update good points only if not already changed
[6fd7b20]	237	good &= np.isfinite(array)
	238	if not np.all(good):
	239	for name in fields:
	240	array = getattr(data, name)
	241	if array is not None:
	242	setattr(data, name, array[good])
[e3133dc]	243	return data
[8475d16]	244
[0b79323]	245	def sort_two_d_data(self):
	246	for dataset in self.output:
[9d786e5]	247	if isinstance(dataset, Data2D):
[a78a02f]	248	# Normalize the units for
	249	dataset.x_unit = self.format_unit(dataset.Q_unit)
	250	dataset.y_unit = self.format_unit(dataset.I_unit)
[9d786e5]	251	dataset.data = dataset.data.astype(np.float64)
	252	dataset.qx_data = dataset.qx_data.astype(np.float64)
	253	dataset.xmin = np.min(dataset.qx_data)
	254	dataset.xmax = np.max(dataset.qx_data)
	255	dataset.qy_data = dataset.qy_data.astype(np.float64)
	256	dataset.ymin = np.min(dataset.qy_data)
	257	dataset.ymax = np.max(dataset.qy_data)
	258	dataset.q_data = np.sqrt(dataset.qx_data * dataset.qx_data
	259	+ dataset.qy_data * dataset.qy_data)
	260	if dataset.err_data is not None:
	261	dataset.err_data = dataset.err_data.astype(np.float64)
	262	if dataset.dqx_data is not None:
	263	dataset.dqx_data = dataset.dqx_data.astype(np.float64)
	264	if dataset.dqy_data is not None:
	265	dataset.dqy_data = dataset.dqy_data.astype(np.float64)
	266	if dataset.mask is not None:
	267	dataset.mask = dataset.mask.astype(dtype=bool)
	268
	269	if len(dataset.data.shape) == 2:
	270	n_rows, n_cols = dataset.data.shape
	271	dataset.y_bins = dataset.qy_data[0::int(n_cols)]
	272	dataset.x_bins = dataset.qx_data[:int(n_cols)]
[2f85af7]	273	dataset.data = dataset.data.flatten()
[f02a0c6]	274	dataset = self._remove_nans_in_data(dataset)
[deaa0c6]	275	if len(dataset.data) > 0:
	276	dataset.xmin = np.min(dataset.qx_data)
	277	dataset.xmax = np.max(dataset.qx_data)
	278	dataset.ymin = np.min(dataset.qy_data)
	279	dataset.ymax = np.max(dataset.qx_data)
[0b79323]	280
[a78a02f]	281	def format_unit(self, unit=None):
	282	"""
	283	Format units a common way
	284	:param unit:
	285	:return:
	286	"""
	287	if unit:
	288	split = unit.split("/")
	289	if len(split) == 1:
	290	return unit
	291	elif split[0] == '1':
	292	return "{0}^".format(split[1]) + "{-1}"
	293	else:
	294	return "{0}*{1}^".format(split[0], split[1]) + "{-1}"
	295
[da8bb53]	296	def set_all_to_none(self):
	297	"""
	298	Set all mutable values to None for error handling purposes
	299	"""
	300	self.current_dataset = None
	301	self.current_datainfo = None
	302	self.output = []
	303
[7b07fbe]	304	def data_cleanup(self):
	305	"""
	306	Clean up the data sets and refresh everything
	307	:return: None
	308	"""
	309	self.remove_empty_q_values()
	310	self.send_to_output() # Combine datasets with DataInfo
	311	self.current_datainfo = DataInfo() # Reset DataInfo
	312
	313	def remove_empty_q_values(self):
[ad92c5a]	314	"""
	315	Remove any point where Q == 0
	316	"""
[7b07fbe]	317	if isinstance(self.current_dataset, plottable_1D):
	318	# Booleans for resolutions
	319	has_error_dx = self.current_dataset.dx is not None
	320	has_error_dxl = self.current_dataset.dxl is not None
	321	has_error_dxw = self.current_dataset.dxw is not None
	322	has_error_dy = self.current_dataset.dy is not None
	323	# Create arrays of zeros for non-existent resolutions
	324	if has_error_dxw and not has_error_dxl:
	325	array_size = self.current_dataset.dxw.size - 1
	326	self.current_dataset.dxl = np.append(self.current_dataset.dxl,
	327	np.zeros([array_size]))
	328	has_error_dxl = True
	329	elif has_error_dxl and not has_error_dxw:
	330	array_size = self.current_dataset.dxl.size - 1
	331	self.current_dataset.dxw = np.append(self.current_dataset.dxw,
	332	np.zeros([array_size]))
	333	has_error_dxw = True
	334	elif not has_error_dxl and not has_error_dxw and not has_error_dx:
	335	array_size = self.current_dataset.x.size - 1
	336	self.current_dataset.dx = np.append(self.current_dataset.dx,
	337	np.zeros([array_size]))
	338	has_error_dx = True
	339	if not has_error_dy:
	340	array_size = self.current_dataset.y.size - 1
	341	self.current_dataset.dy = np.append(self.current_dataset.dy,
	342	np.zeros([array_size]))
	343	has_error_dy = True
	344
	345	# Remove points where q = 0
	346	x = self.current_dataset.x
	347	self.current_dataset.x = self.current_dataset.x[x != 0]
	348	self.current_dataset.y = self.current_dataset.y[x != 0]
	349	if has_error_dy:
	350	self.current_dataset.dy = self.current_dataset.dy[x != 0]
	351	if has_error_dx:
	352	self.current_dataset.dx = self.current_dataset.dx[x != 0]
	353	if has_error_dxl:
	354	self.current_dataset.dxl = self.current_dataset.dxl[x != 0]
	355	if has_error_dxw:
	356	self.current_dataset.dxw = self.current_dataset.dxw[x != 0]
	357	elif isinstance(self.current_dataset, plottable_2D):
	358	has_error_dqx = self.current_dataset.dqx_data is not None
	359	has_error_dqy = self.current_dataset.dqy_data is not None
	360	has_error_dy = self.current_dataset.err_data is not None
	361	has_mask = self.current_dataset.mask is not None
	362	x = self.current_dataset.qx_data
	363	self.current_dataset.data = self.current_dataset.data[x != 0]
	364	self.current_dataset.qx_data = self.current_dataset.qx_data[x != 0]
	365	self.current_dataset.qy_data = self.current_dataset.qy_data[x != 0]
[deaa0c6]	366	self.current_dataset.q_data = np.sqrt(
	367	np.square(self.current_dataset.qx_data) + np.square(
	368	self.current_dataset.qy_data))
[7b07fbe]	369	if has_error_dy:
	370	self.current_dataset.err_data = self.current_dataset.err_data[x != 0]
	371	if has_error_dqx:
	372	self.current_dataset.dqx_data = self.current_dataset.dqx_data[x != 0]
	373	if has_error_dqy:
	374	self.current_dataset.dqy_data = self.current_dataset.dqy_data[x != 0]
	375	if has_mask:
	376	self.current_dataset.mask = self.current_dataset.mask[x != 0]
[ad92c5a]	377
	378	def reset_data_list(self, no_lines=0):
	379	"""
	380	Reset the plottable_1D object
	381	"""
	382	# Initialize data sets with arrays the maximum possible size
	383	x = np.zeros(no_lines)
	384	y = np.zeros(no_lines)
[4660990]	385	dx = np.zeros(no_lines)
	386	dy = np.zeros(no_lines)
	387	self.current_dataset = plottable_1D(x, y, dx, dy)
[ad92c5a]	388
[b09095a]	389	@staticmethod
	390	def splitline(line):
	391	"""
[20fa5fe]	392	Splits a line into pieces based on common delimiters
[b09095a]	393	:param line: A single line of text
	394	:return: list of values
	395	"""
	396	# Initial try for CSV (split on ,)
	397	toks = line.split(',')
	398	# Now try SCSV (split on ;)
	399	if len(toks) < 2:
	400	toks = line.split(';')
	401	# Now go for whitespace
	402	if len(toks) < 2:
	403	toks = line.split()
	404	return toks
	405
[beba407]	406	@abstractmethod
[b09095a]	407	def get_file_contents(self):
[beba407]	408	"""
[ad92c5a]	409	Reader specific class to access the contents of the file
[b09095a]	410	All reader classes that inherit from FileReader must implement
[beba407]	411	"""
	412	pass

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/dataloader/file_reader_base_class.py @ 2924532

Download in other formats: