source: sasview/src/sas/sascalc/dataloader/readers/ascii_reader.py @ 5dc01e5

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalc
Last change on this file since 5dc01e5 was 5dc01e5, checked in by Piotr Rozyczko <rozyczko@…>, 8 years ago

Fixes #658: Modified ASCII reader to only accept data from lines with the same number of delimited values.

  • Property mode set to 100644
File size: 9.8 KB
RevLine 
[7d6351e]1"""
2    ASCII reader
3"""
[0997158f]4############################################################################
5#This software was developed by the University of Tennessee as part of the
6#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
7#project funded by the US National Science Foundation.
[7d6351e]8#If you use DANSE applications to do scientific research that leads to
9#publication, we ask that you acknowledge the use of the software with the
[0997158f]10#following sentence:
[7d6351e]11#This work benefited from DANSE software developed under NSF award DMR-0520547.
[0997158f]12#copyright 2008, University of Tennessee
13#############################################################################
14
[8bd8ea4]15
16import numpy
17import os
[b699768]18from sas.sascalc.dataloader.data_info import Data1D
[8bd8ea4]19
[daa56d0]20# Check whether we have a converter available
[99d1af6]21has_converter = True
22try:
[b699768]23    from sas.sascalc.data_util.nxsunit import Converter
[99d1af6]24except:
25    has_converter = False
[da96629]26_ZERO = 1e-16
[99d1af6]27
[7d6351e]28
[8bd8ea4]29class Reader:
30    """
[0997158f]31    Class to load ascii files (2, 3 or 4 columns).
[8bd8ea4]32    """
[8780e9a]33    ## File type
[28caa03]34    type_name = "ASCII"
[5dc01e5]35
[28caa03]36    ## Wildcards
[8780e9a]37    type = ["ASCII files (*.txt)|*.txt",
[470bf7e]38            "ASCII files (*.dat)|*.dat",
[ef9d209]39            "ASCII files (*.abs)|*.abs",
40            "CSV files (*.csv)|*.csv"]
[8bd8ea4]41    ## List of allowed extensions
[7d6351e]42    ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV']
[5dc01e5]43
[e082e2c]44    ## Flag to bypass extension check
45    allow_all = True
[5dc01e5]46
[8bd8ea4]47    def read(self, path):
[7d6351e]48        """
[0997158f]49        Load data file
[5dc01e5]50
[0997158f]51        :param path: file path
52        :return: Data1D object, or None
[5dc01e5]53
[0997158f]54        :raise RuntimeError: when the file can't be opened
55        :raise ValueError: when the length of the data vectors are inconsistent
[8bd8ea4]56        """
57        if os.path.isfile(path):
[7d6351e]58            basename = os.path.basename(path)
[a7a5886]59            _, extension = os.path.splitext(basename)
[e082e2c]60            if self.allow_all or extension.lower() in self.ext:
[8bd8ea4]61                try:
[9cd0baa]62                    # Read in binary mode since GRASP frequently has no-ascii
[5dc01e5]63                    # characters that breaks the open operation
[7d6351e]64                    input_f = open(path,'rb')
65                except:
[8bd8ea4]66                    raise  RuntimeError, "ascii_reader: cannot open %s" % path
67                buff = input_f.read()
[ef3445e2]68                lines = buff.splitlines()
[5dc01e5]69
70                # Arrays for data storage
71                tx = numpy.zeros(0)
72                ty = numpy.zeros(0)
[de1da34]73                tdy = numpy.zeros(0)
74                tdx = numpy.zeros(0)
[5dc01e5]75
[8bd8ea4]76                # The first good line of data will define whether
77                # we have 2-column or 3-column ascii
[de1da34]78                has_error_dx = None
79                has_error_dy = None
[5dc01e5]80
[892f246]81                #Initialize counters for data lines and header lines.
[5dc01e5]82                is_data = False
[a7a5886]83                # More than "5" lines of data is considered as actual
84                # data unless that is the only data
[5dc01e5]85                min_data_pts = 5
[a7a5886]86                # To count # of current data candidate lines
[5dc01e5]87                candidate_lines = 0
[7d6351e]88                # To count total # of previous data candidate lines
[5dc01e5]89                candidate_lines_previous = 0
90                #minimum required number of columns of data
[7d6351e]91                lentoks = 2
[8bd8ea4]92                for line in lines:
[5dc01e5]93                    toks = self.splitline(line)
94                    # To remember the # of columns in the current line of data
95                    new_lentoks = len(toks)
[8bd8ea4]96                    try:
[5dc01e5]97                        if new_lentoks == 1 and not is_data:
98                            ## If only one item in list, no longer data
99                            raise ValueError
100                        elif new_lentoks == 0:
101                            ## If the line is blank, skip and continue on
102                            ## In case of breaks within data sets.
103                            continue
104                        elif new_lentoks != lentoks and is_data:
105                            ## If a footer is found, break the loop and save the data
106                            break
107                        elif new_lentoks != lentoks and not is_data:
108                            ## If header lines are numerical
109                            candidate_lines = 0
110                            candidate_lines_previous = 0
111
[5f2d3c78]112                        #Make sure that all columns are numbers.
113                        for colnum in range(len(toks)):
[5dc01e5]114                            # Any non-floating point values throw ValueError
[5f2d3c78]115                            float(toks[colnum])
[5dc01e5]116
117                        candidate_lines += 1
[8bd8ea4]118                        _x = float(toks[0])
119                        _y = float(toks[1])
[de1da34]120                        _dx = None
[5dc01e5]121                        _dy = None
122
123                        #If 5 or more lines, this is considering the set data
124                        if candidate_lines >= min_data_pts:
125                            is_data = True
126
127                        # If a 3rd row is present, consider it dy
128                        if new_lentoks > 2:
129                            _dy = float(toks[2])
130                        has_error_dy = False if _dy == None else True
131
132                        # If a 4th row is present, consider it dx
133                        if new_lentoks > 3:
134                            _dx = float(toks[3])
135                        has_error_dx = False if _dx == None else True
136
137                        # Delete the previously stored lines of data candidates if
138                        # the list is not data
139                        if candidate_lines == 1 and -1 < candidate_lines_previous < min_data_pts and \
[a7a5886]140                            is_data == False:
[892f246]141                            try:
142                                tx = numpy.zeros(0)
143                                ty = numpy.zeros(0)
[5dc01e5]144                                tdy = numpy.zeros(0)
145                                tdx = numpy.zeros(0)
[892f246]146                            except:
[7d6351e]147                                pass
[892f246]148
[de1da34]149                        if has_error_dy == True:
150                            tdy = numpy.append(tdy, _dy)
151                        if has_error_dx == True:
152                            tdx = numpy.append(tdx, _dx)
[5dc01e5]153                        tx = numpy.append(tx, _x)
154                        ty = numpy.append(ty, _y)
[d508be9]155
[a7a5886]156                        #To remember the # of columns on the current line
157                        # for the next line of data
[5dc01e5]158                        lentoks = new_lentoks
159                        candidate_lines_previous = candidate_lines
160                    except ValueError:
[892f246]161                        # It is data and meet non - number, then stop reading
162                        if is_data == True:
[7d6351e]163                            break
[d508be9]164                        lentoks = 2
[5dc01e5]165                        has_error_dx = None
166                        has_error_dy = None
[892f246]167                        #Reset # of lines of data candidates
[5dc01e5]168                        candidate_lines = 0
169                    except:
[8bd8ea4]170                        pass
[5dc01e5]171
[7d6351e]172                input_f.close()
[8bd8ea4]173                # Sanity check
[5dc01e5]174                if has_error_dy == True and not len(ty) == len(tdy):
[a7a5886]175                    msg = "ascii_reader: y and dy have different length"
176                    raise RuntimeError, msg
[5dc01e5]177                if has_error_dx == True and not len(tx) == len(tdx):
[a7a5886]178                    msg = "ascii_reader: y and dy have different length"
179                    raise RuntimeError, msg
[8bd8ea4]180                # If the data length is zero, consider this as
181                # though we were not able to read the file.
[5dc01e5]182                if len(tx) == 0:
[daa56d0]183                    raise RuntimeError, "ascii_reader: could not load file"
[5dc01e5]184
[a7a5886]185                #Let's re-order the data to make cal.
186                # curve look better some cases
187                ind = numpy.lexsort((ty, tx))
[5dc01e5]188                x = numpy.zeros(len(tx))
189                y = numpy.zeros(len(ty))
190                dy = numpy.zeros(len(tdy))
191                dx = numpy.zeros(len(tdx))
192                output = Data1D(x, y, dy=dy, dx=dx)
193                self.filename = output.filename = basename
194
[de1da34]195                for i in ind:
196                    x[i] = tx[ind[i]]
197                    y[i] = ty[ind[i]]
198                    if has_error_dy == True:
199                        dy[i] = tdy[ind[i]]
200                    if has_error_dx == True:
201                        dx[i] = tdx[ind[i]]
[7d6351e]202                # Zeros in dx, dy
[da96629]203                if has_error_dx:
[7d6351e]204                    dx[dx == 0] = _ZERO
[da96629]205                if has_error_dy:
[7d6351e]206                    dy[dy == 0] = _ZERO
207                #Data
208                output.x = x[x != 0]
209                output.y = y[x != 0]
210                output.dy = dy[x != 0] if has_error_dy == True\
211                    else numpy.zeros(len(output.y))
212                output.dx = dx[x != 0] if has_error_dx == True\
213                    else numpy.zeros(len(output.x))
[5dc01e5]214
215                output.xaxis("\\rm{Q}", 'A^{-1}')
216                output.yaxis("\\rm{Intensity}", "cm^{-1}")
217
[fe78c7b]218                # Store loading process information
[7d6351e]219                output.meta_data['loader'] = self.type_name
[83b81b8]220                if len(output.x) < 1:
221                    raise RuntimeError, "%s is empty" % path
[8bd8ea4]222                return output
[5dc01e5]223
[8bd8ea4]224        else:
225            raise RuntimeError, "%s is not a file" % path
226        return None
[5dc01e5]227
228    def splitline(self, line):
229        """
230        Splits a line into pieces based on common delimeters
231        :param line: A single line of text
232        :return: list of values
233        """
234        # Initial try for CSV (split on ,)
235        toks = line.split(',')
236        # Now try SCSV (split on ;)
237        if len(toks) < 2:
238            toks = line.split(';')
239        # Now go for whitespace
240        if len(toks) < 2:
241            toks = line.split()
242        return toks
Note: See TracBrowser for help on using the repository browser.