Changeset b09095a in sasview for src/sas/sascalc/dataloader
- Timestamp:
- Apr 17, 2017 2:39:50 PM (8 years ago)
- Branches:
- master, ESS_GUI, ESS_GUI_Docs, ESS_GUI_batch_fitting, ESS_GUI_bumps_abstraction, ESS_GUI_iss1116, ESS_GUI_iss879, ESS_GUI_iss959, ESS_GUI_opencl, ESS_GUI_ordering, ESS_GUI_sync_sascalc, costrafo411, magnetic_scatt, release-4.2.2, ticket-1009, ticket-1094-headless, ticket-1242-2d-resolution, ticket-1243, ticket-1249, ticket885, unittest-saveload
- Children:
- 8ffafd1
- Parents:
- beba407
- git-author:
- Jeff Krzywon <krzywon@…> (04/17/17 14:39:50)
- git-committer:
- krzywon <krzywon@…> (04/17/17 14:39:50)
- Location:
- src/sas/sascalc/dataloader
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
src/sas/sascalc/dataloader/file_reader_base_class.py
rbeba407 rb09095a 1 1 """ 2 This is the base file reader class allfile readers should inherit from.2 This is the base file reader class most file readers should inherit from. 3 3 All generic functionality required for a file loader/reader is built into this 4 4 class … … 7 7 import os 8 8 import logging 9 import numpy as np 9 10 from abc import abstractmethod 10 11 from loader_exceptions import NoKnownLoaderException, FileContentsException,\ … … 19 20 # List of Data1D and Data2D objects to be sent back to data_loader 20 21 output = [] 21 # Current plottable 1D/2Dobject being loaded in22 # Current plottable_(1D/2D) object being loaded in 22 23 current_dataset = None 23 # Current DataInfo objec ct being loaded in24 # Current DataInfo object being loaded in 24 25 current_datainfo = None 25 # Wildcards 26 type = ["Text files (*.txt)"] 26 # String to describe the type of data this reader can load 27 type_name = "ASCII" 28 # Wildcards to display 29 type = ["Text files (*.txt|*.TXT)"] 27 30 # List of allowed extensions 28 31 ext = ['.txt'] 29 32 # Bypass extension check and try to load anyway 30 33 allow_all = False 34 # Able to import the unit converter 35 has_converter = True 36 # Open file handle 37 f_open = None 38 # Default value of zero 39 _ZERO = 1e-16 31 40 32 41 def read(self, filepath): … … 42 51 # Try to load the file, but raise an error if unable to. 43 52 try: 44 input_f = open(filepath, 'rb') 45 self.get_file_contents(input_f) 53 self.unit_converter() 54 self.f_open = open(filepath, 'rb') 55 self.get_file_contents() 56 self.sort_one_d_data() 46 57 except RuntimeError: 58 # Reader specific errors 59 # TODO: Give a specific error. 47 60 pass 48 61 except OSError as e: 62 # If the file cannot be opened 49 63 msg = "Unable to open file: {}\n".format(filepath) 50 64 msg += e.message 51 65 self.handle_error_message(msg) 52 66 except Exception as e: 53 self.handle_error_message(e.message) 67 # Handle any other generic error 68 # TODO: raise or log? 69 raise 70 finally: 71 if not self.f_open.closed: 72 self.f_open.close() 54 73 else: 55 74 msg = "Unable to find file at: {}\n".format(filepath) 56 75 msg += "Please check your file path and try again." 57 76 self.handle_error_message(msg) 58 # Return a list of parsed entries that data loader can manage77 # Return a list of parsed entries that data_loader can manage 59 78 return self.output 60 79 … … 79 98 self.output.append(data_obj) 80 99 100 def unit_converter(self): 101 """ 102 Generic unit conversion import 103 """ 104 # Check whether we have a converter available 105 self.has_converter = True 106 try: 107 from sas.sascalc.data_util.nxsunit import Converter 108 except: 109 self.has_converter = False 110 111 def sort_one_d_data(self): 112 """ 113 Sort 1D data along the X axis for consistency 114 """ 115 final_list = [] 116 for data in self.output: 117 if isinstance(data, Data1D): 118 ind = np.lexsort((data.y, data.x)) 119 data.x = np.asarray([data.x[i] for i in ind]) 120 data.y = np.asarray([data.y[i] for i in ind]) 121 if data.dx is not None: 122 data.dx = np.asarray([data.dx[i] for i in ind]) 123 if data.dxl is not None: 124 data.dxl = np.asarray([data.dxl[i] for i in ind]) 125 if data.dxw is not None: 126 data.dxw = np.asarray([data.dxw[i] for i in ind]) 127 if data.dy is not None: 128 data.dy = np.asarray([data.dy[i] for i in ind]) 129 if data.lam is not None: 130 data.lam = np.asarray([data.lam[i] for i in ind]) 131 if data.dlam is not None: 132 data.dlam = np.asarray([data.dlam[i] for i in ind]) 133 final_list.append(data) 134 self.output = final_list 135 136 @staticmethod 137 def splitline(line): 138 """ 139 Splits a line into pieces based on common delimeters 140 :param line: A single line of text 141 :return: list of values 142 """ 143 # Initial try for CSV (split on ,) 144 toks = line.split(',') 145 # Now try SCSV (split on ;) 146 if len(toks) < 2: 147 toks = line.split(';') 148 # Now go for whitespace 149 if len(toks) < 2: 150 toks = line.split() 151 return toks 152 81 153 @abstractmethod 82 def get_file_contents(self , contents):154 def get_file_contents(self): 83 155 """ 84 All reader classes that inherit from here should implement 85 :param contents: 156 All reader classes that inherit from FileReader must implement 86 157 """ 87 158 pass -
src/sas/sascalc/dataloader/readers/ascii_reader.py
r235f514 rb09095a 1 1 """ 2 ASCIIreader2 Generic multi-column ASCII data reader 3 3 """ 4 4 ############################################################################ 5 # This software was developed by the University of Tennessee as part of the6 # Distributed Data Analysis of Neutron Scattering Experiments (DANSE)7 # project funded by the US National Science Foundation.8 # If you use DANSE applications to do scientific research that leads to9 # publication, we ask that you acknowledge the use of the software with the10 # following sentence:11 # This work benefited from DANSE software developed under NSF award DMR-0520547.12 # copyright 2008, University of Tennessee5 # This software was developed by the University of Tennessee as part of the 6 # Distributed Data Analysis of Neutron Scattering Experiments (DANSE) 7 # project funded by the US National Science Foundation. 8 # If you use DANSE applications to do scientific research that leads to 9 # publication, we ask that you acknowledge the use of the software with the 10 # following sentence: 11 # This work benefited from DANSE software developed under NSF award DMR-0520547. 12 # copyright 2008, University of Tennessee 13 13 ############################################################################# 14 14 15 import logging 16 import numpy as np 17 from sas.sascalc.dataloader.file_reader_base_class import FileReader 18 from sas.sascalc.dataloader.data_info import DataInfo, plottable_1D 15 19 16 import numpy as np 17 import os 18 from sas.sascalc.dataloader.data_info import Data1D 19 20 # Check whether we have a converter available 21 has_converter = True 22 try: 23 from sas.sascalc.data_util.nxsunit import Converter 24 except: 25 has_converter = False 26 _ZERO = 1e-16 20 logger = logging.getLogger(__name__) 27 21 28 22 29 class Reader :23 class Reader(FileReader): 30 24 """ 31 25 Class to load ascii files (2, 3 or 4 columns). 32 26 """ 33 # #File type27 # File type 34 28 type_name = "ASCII" 35 36 ## Wildcards 29 # Wildcards 37 30 type = ["ASCII files (*.txt)|*.txt", 38 31 "ASCII files (*.dat)|*.dat", 39 32 "ASCII files (*.abs)|*.abs", 40 33 "CSV files (*.csv)|*.csv"] 41 ## List of allowed extensions 42 ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV'] 34 # List of allowed extensions 35 ext = ['.txt', '.dat', '.abs', '.csv'] 36 # Flag to bypass extension check 37 allow_all = True 38 # data unless that is the only data 39 min_data_pts = 5 43 40 44 ## Flag to bypass extension check 45 allow_all = True 41 def get_file_contents(self): 42 """ 43 Get the contents of the file 44 """ 46 45 47 def read(self, path): 48 """ 49 Load data file 46 buff = self.f_open.read() 47 filepath = self.f_open.name 48 lines = buff.splitlines() 49 self.output = [] 50 self.current_datainfo = DataInfo() 51 self.current_datainfo.filename = filepath 52 self.reset_data_list(len(lines)) 50 53 51 :param path: file path 52 :return: Data1D object, or None 54 # The first good line of data will define whether 55 # we have 2-column or 3-column ascii 56 has_error_dx = None 57 has_error_dy = None 53 58 54 :raise RuntimeError: when the file can't be opened 55 :raise ValueError: when the length of the data vectors are inconsistent 56 """ 57 if os.path.isfile(path): 58 basename = os.path.basename(path) 59 _, extension = os.path.splitext(basename) 60 if self.allow_all or extension.lower() in self.ext: 61 try: 62 # Read in binary mode since GRASP frequently has no-ascii 63 # characters that breaks the open operation 64 input_f = open(path,'rb') 65 except: 66 raise RuntimeError, "ascii_reader: cannot open %s" % path 67 buff = input_f.read() 68 lines = buff.splitlines() 59 # Initialize counters for data lines and header lines. 60 is_data = False 61 # More than "5" lines of data is considered as actual 62 # To count # of current data candidate lines 63 candidate_lines = 0 64 # To count total # of previous data candidate lines 65 candidate_lines_previous = 0 66 # Current line number 67 line_no = 0 68 # minimum required number of columns of data 69 lentoks = 2 70 for line in lines: 71 toks = self.splitline(line.strip()) 72 # To remember the number of columns in the current line of data 73 new_lentoks = len(toks) 74 try: 75 if new_lentoks == 0: 76 # If the line is blank, skip and continue on 77 # In case of breaks within data sets. 78 continue 79 elif new_lentoks != lentoks and is_data: 80 # If a footer is found, break the loop and save the data 81 break 82 elif new_lentoks != lentoks and not is_data: 83 # If header lines are numerical 84 candidate_lines = 0 85 self.reset_data_list(len(lines) - line_no) 69 86 70 # Arrays for data storage 71 tx = np.zeros(0) 72 ty = np.zeros(0) 73 tdy = np.zeros(0) 74 tdx = np.zeros(0) 87 candidate_lines += 1 88 # If 5 or more lines, this is considering the set data 89 if candidate_lines >= self.min_data_pts: 90 is_data = True 75 91 76 # The first good line of data will define whether 77 # we have 2-column or 3-column ascii 92 self.current_dataset.x[candidate_lines - 1] = float(toks[0]) 93 self.current_dataset.y[candidate_lines - 1] = float(toks[1]) 94 95 # If a 3rd row is present, consider it dy 96 if new_lentoks > 2: 97 self.current_dataset.dy[candidate_lines - 1] = \ 98 float(toks[2]) 99 has_error_dy = True 100 101 # If a 4th row is present, consider it dx 102 if new_lentoks > 3: 103 self.current_dataset.dx[candidate_lines - 1] = \ 104 float(toks[3]) 105 has_error_dx = True 106 107 # To remember the # of columns on the current line 108 # for the next line of data 109 lentoks = new_lentoks 110 line_no += 1 111 except ValueError: 112 # It is data and meet non - number, then stop reading 113 if is_data: 114 break 115 # Delete the previously stored lines of data candidates if 116 # the list is not data 117 self.reset_data_list(len(lines) - line_no) 118 lentoks = 2 78 119 has_error_dx = None 79 120 has_error_dy = None 121 # Reset # of lines of data candidates 122 candidate_lines = 0 123 except Exception: 124 # Handle any unexpected exceptions 125 raise 80 126 81 #Initialize counters for data lines and header lines. 82 is_data = False 83 # More than "5" lines of data is considered as actual 84 # data unless that is the only data 85 min_data_pts = 5 86 # To count # of current data candidate lines 87 candidate_lines = 0 88 # To count total # of previous data candidate lines 89 candidate_lines_previous = 0 90 #minimum required number of columns of data 91 lentoks = 2 92 for line in lines: 93 toks = self.splitline(line) 94 # To remember the # of columns in the current line of data 95 new_lentoks = len(toks) 96 try: 97 if new_lentoks == 1 and not is_data: 98 ## If only one item in list, no longer data 99 raise ValueError 100 elif new_lentoks == 0: 101 ## If the line is blank, skip and continue on 102 ## In case of breaks within data sets. 103 continue 104 elif new_lentoks != lentoks and is_data: 105 ## If a footer is found, break the loop and save the data 106 break 107 elif new_lentoks != lentoks and not is_data: 108 ## If header lines are numerical 109 candidate_lines = 0 110 candidate_lines_previous = 0 127 if not is_data: 128 # TODO: Check file extension - primary reader, throw error. 129 # TODO: Secondary check, pass and try next reader 130 msg = "ascii_reader: x has no data" 131 raise RuntimeError(msg) 132 # Sanity check 133 if has_error_dy and not len(self.current_dataset.y) == \ 134 len(self.current_dataset.dy): 135 msg = "ascii_reader: y and dy have different length" 136 raise RuntimeError(msg) 137 if has_error_dx and not len(self.current_dataset.x) == \ 138 len(self.current_dataset.dx): 139 msg = "ascii_reader: y and dy have different length" 140 raise RuntimeError(msg) 141 # If the data length is zero, consider this as 142 # though we were not able to read the file. 143 if len(self.current_dataset.x) < 1: 144 raise RuntimeError("ascii_reader: could not load file") 145 return None 111 146 112 #Make sure that all columns are numbers. 113 for colnum in range(len(toks)): 114 # Any non-floating point values throw ValueError 115 float(toks[colnum]) 147 # Data 148 self.current_dataset.x = \ 149 self.current_dataset.x[self.current_dataset.x != 0] 150 self.current_dataset.y = \ 151 self.current_dataset.y[self.current_dataset.x != 0] 152 self.current_dataset.dy = \ 153 self.current_dataset.dy[self.current_dataset.x != 0] if \ 154 has_error_dy else np.zeros(len(self.current_dataset.y)) 155 self.current_dataset.dx = \ 156 self.current_dataset.dx[self.current_dataset.x != 0] if \ 157 has_error_dx else np.zeros(len(self.current_dataset.x)) 116 158 117 candidate_lines += 1 118 _x = float(toks[0]) 119 _y = float(toks[1]) 120 _dx = None 121 _dy = None 159 self.current_dataset.xaxis("\\rm{Q}", 'A^{-1}') 160 self.current_dataset.yaxis("\\rm{Intensity}", "cm^{-1}") 122 161 123 #If 5 or more lines, this is considering the set data124 if candidate_lines >= min_data_pts:125 is_data = True162 # Store loading process information 163 self.current_datainfo.meta_data['loader'] = self.type_name 164 self.send_to_output() 126 165 127 # If a 3rd row is present, consider it dy 128 if new_lentoks > 2: 129 _dy = float(toks[2]) 130 has_error_dy = False if _dy is None else True 131 132 # If a 4th row is present, consider it dx 133 if new_lentoks > 3: 134 _dx = float(toks[3]) 135 has_error_dx = False if _dx is None else True 136 137 # Delete the previously stored lines of data candidates if 138 # the list is not data 139 if candidate_lines == 1 and -1 < candidate_lines_previous < min_data_pts and \ 140 is_data == False: 141 try: 142 tx = np.zeros(0) 143 ty = np.zeros(0) 144 tdy = np.zeros(0) 145 tdx = np.zeros(0) 146 except: 147 pass 148 149 if has_error_dy == True: 150 tdy = np.append(tdy, _dy) 151 if has_error_dx == True: 152 tdx = np.append(tdx, _dx) 153 tx = np.append(tx, _x) 154 ty = np.append(ty, _y) 155 156 #To remember the # of columns on the current line 157 # for the next line of data 158 lentoks = new_lentoks 159 candidate_lines_previous = candidate_lines 160 except ValueError: 161 # It is data and meet non - number, then stop reading 162 if is_data == True: 163 break 164 lentoks = 2 165 has_error_dx = None 166 has_error_dy = None 167 #Reset # of lines of data candidates 168 candidate_lines = 0 169 except: 170 pass 171 172 input_f.close() 173 if not is_data: 174 msg = "ascii_reader: x has no data" 175 raise RuntimeError, msg 176 # Sanity check 177 if has_error_dy == True and not len(ty) == len(tdy): 178 msg = "ascii_reader: y and dy have different length" 179 raise RuntimeError, msg 180 if has_error_dx == True and not len(tx) == len(tdx): 181 msg = "ascii_reader: y and dy have different length" 182 raise RuntimeError, msg 183 # If the data length is zero, consider this as 184 # though we were not able to read the file. 185 if len(tx) == 0: 186 raise RuntimeError, "ascii_reader: could not load file" 187 188 #Let's re-order the data to make cal. 189 # curve look better some cases 190 ind = np.lexsort((ty, tx)) 191 x = np.zeros(len(tx)) 192 y = np.zeros(len(ty)) 193 dy = np.zeros(len(tdy)) 194 dx = np.zeros(len(tdx)) 195 output = Data1D(x, y, dy=dy, dx=dx) 196 self.filename = output.filename = basename 197 198 for i in ind: 199 x[i] = tx[ind[i]] 200 y[i] = ty[ind[i]] 201 if has_error_dy == True: 202 dy[i] = tdy[ind[i]] 203 if has_error_dx == True: 204 dx[i] = tdx[ind[i]] 205 # Zeros in dx, dy 206 if has_error_dx: 207 dx[dx == 0] = _ZERO 208 if has_error_dy: 209 dy[dy == 0] = _ZERO 210 #Data 211 output.x = x[x != 0] 212 output.y = y[x != 0] 213 output.dy = dy[x != 0] if has_error_dy == True\ 214 else np.zeros(len(output.y)) 215 output.dx = dx[x != 0] if has_error_dx == True\ 216 else np.zeros(len(output.x)) 217 218 output.xaxis("\\rm{Q}", 'A^{-1}') 219 output.yaxis("\\rm{Intensity}", "cm^{-1}") 220 221 # Store loading process information 222 output.meta_data['loader'] = self.type_name 223 if len(output.x) < 1: 224 raise RuntimeError, "%s is empty" % path 225 return output 226 227 else: 228 raise RuntimeError, "%s is not a file" % path 229 return None 230 231 def splitline(self, line): 166 def reset_data_list(self, no_lines): 232 167 """ 233 Splits a line into pieces based on common delimeters 234 :param line: A single line of text 235 :return: list of values 168 Reset the plottable_1D object 236 169 """ 237 # Initial try for CSV (split on ,) 238 toks = line.split(',') 239 # Now try SCSV (split on ;) 240 if len(toks) < 2: 241 toks = line.split(';') 242 # Now go for whitespace 243 if len(toks) < 2: 244 toks = line.split() 245 return toks 170 # Initialize data sets with arrays the maximum possible size 171 x = np.zeros(no_lines) 172 y = np.zeros(no_lines) 173 dy = np.zeros(no_lines) 174 dx = np.zeros(no_lines) 175 self.current_dataset = plottable_1D(x, y, dx, dy)
Note: See TracChangeset
for help on using the changeset viewer.