Changes in src/sas/sascalc/dataloader/readers/ascii_reader.py [235f514:080d88e] in sasview
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
src/sas/sascalc/dataloader/readers/ascii_reader.py
r235f514 r080d88e 1 1 """ 2 ASCIIreader2 Generic multi-column ASCII data reader 3 3 """ 4 4 ############################################################################ 5 # This software was developed by the University of Tennessee as part of the6 # Distributed Data Analysis of Neutron Scattering Experiments (DANSE)7 # project funded by the US National Science Foundation.8 # If you use DANSE applications to do scientific research that leads to9 # publication, we ask that you acknowledge the use of the software with the10 # following sentence:11 # This work benefited from DANSE software developed under NSF award DMR-0520547.12 # copyright 2008, University of Tennessee5 # This software was developed by the University of Tennessee as part of the 6 # Distributed Data Analysis of Neutron Scattering Experiments (DANSE) 7 # project funded by the US National Science Foundation. 8 # If you use DANSE applications to do scientific research that leads to 9 # publication, we ask that you acknowledge the use of the software with the 10 # following sentence: 11 # This work benefited from DANSE software developed under NSF award DMR-0520547. 12 # copyright 2008, University of Tennessee 13 13 ############################################################################# 14 14 15 import logging 16 from sas.sascalc.dataloader.file_reader_base_class import FileReader 17 from sas.sascalc.dataloader.data_info import DataInfo, plottable_1D 18 from sas.sascalc.dataloader.loader_exceptions import FileContentsException,\ 19 DefaultReaderException 15 20 16 import numpy as np 17 import os 18 from sas.sascalc.dataloader.data_info import Data1D 19 20 # Check whether we have a converter available 21 has_converter = True 22 try: 23 from sas.sascalc.data_util.nxsunit import Converter 24 except: 25 has_converter = False 26 _ZERO = 1e-16 21 logger = logging.getLogger(__name__) 27 22 28 23 29 class Reader :24 class Reader(FileReader): 30 25 """ 31 26 Class to load ascii files (2, 3 or 4 columns). 32 27 """ 33 # #File type28 # File type 34 29 type_name = "ASCII" 35 36 ## Wildcards 30 # Wildcards 37 31 type = ["ASCII files (*.txt)|*.txt", 38 32 "ASCII files (*.dat)|*.dat", 39 33 "ASCII files (*.abs)|*.abs", 40 34 "CSV files (*.csv)|*.csv"] 41 ## List of allowed extensions 42 ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV'] 35 # List of allowed extensions 36 ext = ['.txt', '.dat', '.abs', '.csv'] 37 # Flag to bypass extension check 38 allow_all = True 39 # data unless that is the only data 40 min_data_pts = 5 43 41 44 ## Flag to bypass extension check 45 allow_all = True 42 def get_file_contents(self): 43 """ 44 Get the contents of the file 45 """ 46 46 47 def read(self, path): 48 """ 49 Load data file 47 buff = self.f_open.read() 48 filepath = self.f_open.name 49 lines = buff.splitlines() 50 self.output = [] 51 self.current_datainfo = DataInfo() 52 self.current_datainfo.filename = filepath 53 self.reset_data_list(len(lines)) 50 54 51 :param path: file path 52 :return: Data1D object, or None 55 # The first good line of data will define whether 56 # we have 2-column or 3-column ascii 57 has_error_dx = None 58 has_error_dy = None 53 59 54 :raise RuntimeError: when the file can't be opened 55 :raise ValueError: when the length of the data vectors are inconsistent 56 """ 57 if os.path.isfile(path): 58 basename = os.path.basename(path) 59 _, extension = os.path.splitext(basename) 60 if self.allow_all or extension.lower() in self.ext: 61 try: 62 # Read in binary mode since GRASP frequently has no-ascii 63 # characters that breaks the open operation 64 input_f = open(path,'rb') 65 except: 66 raise RuntimeError, "ascii_reader: cannot open %s" % path 67 buff = input_f.read() 68 lines = buff.splitlines() 60 # Initialize counters for data lines and header lines. 61 is_data = False 62 # More than "5" lines of data is considered as actual 63 # To count # of current data candidate lines 64 candidate_lines = 0 65 # To count total # of previous data candidate lines 66 candidate_lines_previous = 0 67 # Current line number 68 line_no = 0 69 # minimum required number of columns of data 70 lentoks = 2 71 for line in lines: 72 toks = self.splitline(line.strip()) 73 # To remember the number of columns in the current line of data 74 new_lentoks = len(toks) 75 try: 76 if new_lentoks == 0: 77 # If the line is blank, skip and continue on 78 # In case of breaks within data sets. 79 continue 80 elif new_lentoks != lentoks and is_data: 81 # If a footer is found, break the loop and save the data 82 break 83 elif new_lentoks != lentoks and not is_data: 84 # If header lines are numerical 85 candidate_lines = 0 86 self.reset_data_list(len(lines) - line_no) 69 87 70 # Arrays for data storage 71 tx = np.zeros(0) 72 ty = np.zeros(0) 73 tdy = np.zeros(0) 74 tdx = np.zeros(0) 88 self.current_dataset.x[candidate_lines] = float(toks[0]) 75 89 76 # The first good line of data will define whether 77 # we have 2-column or 3-column ascii 90 if new_lentoks > 1: 91 self.current_dataset.y[candidate_lines] = float(toks[1]) 92 93 # If a 3rd row is present, consider it dy 94 if new_lentoks > 2: 95 self.current_dataset.dy[candidate_lines] = \ 96 float(toks[2]) 97 has_error_dy = True 98 99 # If a 4th row is present, consider it dx 100 if new_lentoks > 3: 101 self.current_dataset.dx[candidate_lines] = \ 102 float(toks[3]) 103 has_error_dx = True 104 105 candidate_lines += 1 106 # If 5 or more lines, this is considering the set data 107 if candidate_lines >= self.min_data_pts: 108 is_data = True 109 110 # To remember the # of columns on the current line 111 # for the next line of data 112 lentoks = new_lentoks 113 line_no += 1 114 except ValueError: 115 # ValueError is raised when non numeric strings conv. to float 116 # It is data and meet non - number, then stop reading 117 if is_data: 118 break 119 # Delete the previously stored lines of data candidates if 120 # the list is not data 121 self.reset_data_list(len(lines) - line_no) 122 lentoks = 2 78 123 has_error_dx = None 79 124 has_error_dy = None 125 # Reset # of lines of data candidates 126 candidate_lines = 0 80 127 81 #Initialize counters for data lines and header lines. 82 is_data = False 83 # More than "5" lines of data is considered as actual 84 # data unless that is the only data 85 min_data_pts = 5 86 # To count # of current data candidate lines 87 candidate_lines = 0 88 # To count total # of previous data candidate lines 89 candidate_lines_previous = 0 90 #minimum required number of columns of data 91 lentoks = 2 92 for line in lines: 93 toks = self.splitline(line) 94 # To remember the # of columns in the current line of data 95 new_lentoks = len(toks) 96 try: 97 if new_lentoks == 1 and not is_data: 98 ## If only one item in list, no longer data 99 raise ValueError 100 elif new_lentoks == 0: 101 ## If the line is blank, skip and continue on 102 ## In case of breaks within data sets. 103 continue 104 elif new_lentoks != lentoks and is_data: 105 ## If a footer is found, break the loop and save the data 106 break 107 elif new_lentoks != lentoks and not is_data: 108 ## If header lines are numerical 109 candidate_lines = 0 110 candidate_lines_previous = 0 128 if not is_data: 129 self.set_all_to_none() 130 if self.extension in self.ext: 131 msg = "ASCII Reader error: Fewer than five Q data points found " 132 msg += "in {}.".format(filepath) 133 raise FileContentsException(msg) 134 else: 135 msg = "ASCII Reader could not load the file {}".format(filepath) 136 raise DefaultReaderException(msg) 137 # Sanity check 138 if has_error_dy and not len(self.current_dataset.y) == \ 139 len(self.current_dataset.dy): 140 msg = "ASCII Reader error: Number of I and dI data points are" 141 msg += " different in {}.".format(filepath) 142 # TODO: Add error to self.current_datainfo.errors instead? 143 self.set_all_to_none() 144 raise FileContentsException(msg) 145 if has_error_dx and not len(self.current_dataset.x) == \ 146 len(self.current_dataset.dx): 147 msg = "ASCII Reader error: Number of Q and dQ data points are" 148 msg += " different in {}.".format(filepath) 149 # TODO: Add error to self.current_datainfo.errors instead? 150 self.set_all_to_none() 151 raise FileContentsException(msg) 111 152 112 #Make sure that all columns are numbers. 113 for colnum in range(len(toks)): 114 # Any non-floating point values throw ValueError 115 float(toks[colnum]) 153 self.remove_empty_q_values(has_error_dx, has_error_dy) 154 self.current_dataset.xaxis("\\rm{Q}", 'A^{-1}') 155 self.current_dataset.yaxis("\\rm{Intensity}", "cm^{-1}") 116 156 117 candidate_lines += 1 118 _x = float(toks[0]) 119 _y = float(toks[1]) 120 _dx = None 121 _dy = None 122 123 #If 5 or more lines, this is considering the set data 124 if candidate_lines >= min_data_pts: 125 is_data = True 126 127 # If a 3rd row is present, consider it dy 128 if new_lentoks > 2: 129 _dy = float(toks[2]) 130 has_error_dy = False if _dy is None else True 131 132 # If a 4th row is present, consider it dx 133 if new_lentoks > 3: 134 _dx = float(toks[3]) 135 has_error_dx = False if _dx is None else True 136 137 # Delete the previously stored lines of data candidates if 138 # the list is not data 139 if candidate_lines == 1 and -1 < candidate_lines_previous < min_data_pts and \ 140 is_data == False: 141 try: 142 tx = np.zeros(0) 143 ty = np.zeros(0) 144 tdy = np.zeros(0) 145 tdx = np.zeros(0) 146 except: 147 pass 148 149 if has_error_dy == True: 150 tdy = np.append(tdy, _dy) 151 if has_error_dx == True: 152 tdx = np.append(tdx, _dx) 153 tx = np.append(tx, _x) 154 ty = np.append(ty, _y) 155 156 #To remember the # of columns on the current line 157 # for the next line of data 158 lentoks = new_lentoks 159 candidate_lines_previous = candidate_lines 160 except ValueError: 161 # It is data and meet non - number, then stop reading 162 if is_data == True: 163 break 164 lentoks = 2 165 has_error_dx = None 166 has_error_dy = None 167 #Reset # of lines of data candidates 168 candidate_lines = 0 169 except: 170 pass 171 172 input_f.close() 173 if not is_data: 174 msg = "ascii_reader: x has no data" 175 raise RuntimeError, msg 176 # Sanity check 177 if has_error_dy == True and not len(ty) == len(tdy): 178 msg = "ascii_reader: y and dy have different length" 179 raise RuntimeError, msg 180 if has_error_dx == True and not len(tx) == len(tdx): 181 msg = "ascii_reader: y and dy have different length" 182 raise RuntimeError, msg 183 # If the data length is zero, consider this as 184 # though we were not able to read the file. 185 if len(tx) == 0: 186 raise RuntimeError, "ascii_reader: could not load file" 187 188 #Let's re-order the data to make cal. 189 # curve look better some cases 190 ind = np.lexsort((ty, tx)) 191 x = np.zeros(len(tx)) 192 y = np.zeros(len(ty)) 193 dy = np.zeros(len(tdy)) 194 dx = np.zeros(len(tdx)) 195 output = Data1D(x, y, dy=dy, dx=dx) 196 self.filename = output.filename = basename 197 198 for i in ind: 199 x[i] = tx[ind[i]] 200 y[i] = ty[ind[i]] 201 if has_error_dy == True: 202 dy[i] = tdy[ind[i]] 203 if has_error_dx == True: 204 dx[i] = tdx[ind[i]] 205 # Zeros in dx, dy 206 if has_error_dx: 207 dx[dx == 0] = _ZERO 208 if has_error_dy: 209 dy[dy == 0] = _ZERO 210 #Data 211 output.x = x[x != 0] 212 output.y = y[x != 0] 213 output.dy = dy[x != 0] if has_error_dy == True\ 214 else np.zeros(len(output.y)) 215 output.dx = dx[x != 0] if has_error_dx == True\ 216 else np.zeros(len(output.x)) 217 218 output.xaxis("\\rm{Q}", 'A^{-1}') 219 output.yaxis("\\rm{Intensity}", "cm^{-1}") 220 221 # Store loading process information 222 output.meta_data['loader'] = self.type_name 223 if len(output.x) < 1: 224 raise RuntimeError, "%s is empty" % path 225 return output 226 227 else: 228 raise RuntimeError, "%s is not a file" % path 229 return None 230 231 def splitline(self, line): 232 """ 233 Splits a line into pieces based on common delimeters 234 :param line: A single line of text 235 :return: list of values 236 """ 237 # Initial try for CSV (split on ,) 238 toks = line.split(',') 239 # Now try SCSV (split on ;) 240 if len(toks) < 2: 241 toks = line.split(';') 242 # Now go for whitespace 243 if len(toks) < 2: 244 toks = line.split() 245 return toks 157 # Store loading process information 158 self.current_datainfo.meta_data['loader'] = self.type_name 159 self.send_to_output()
Note: See TracChangeset
for help on using the changeset viewer.