source: sasview/src/sas/sascalc/dataloader/readers/ascii_reader.py @ cf1910f

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since cf1910f was 7d94915, checked in by krzywon, 8 years ago

Fixes #658: ASCII reader files less than 5 lines long get thrown out.

  • Property mode set to 100644
File size: 9.9 KB
RevLine 
[7d6351e]1"""
2    ASCII reader
3"""
[0997158f]4############################################################################
5#This software was developed by the University of Tennessee as part of the
6#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
7#project funded by the US National Science Foundation.
[7d6351e]8#If you use DANSE applications to do scientific research that leads to
9#publication, we ask that you acknowledge the use of the software with the
[0997158f]10#following sentence:
[7d6351e]11#This work benefited from DANSE software developed under NSF award DMR-0520547.
[0997158f]12#copyright 2008, University of Tennessee
13#############################################################################
14
[8bd8ea4]15
16import numpy
17import os
[b699768]18from sas.sascalc.dataloader.data_info import Data1D
[8bd8ea4]19
[daa56d0]20# Check whether we have a converter available
[99d1af6]21has_converter = True
22try:
[b699768]23    from sas.sascalc.data_util.nxsunit import Converter
[99d1af6]24except:
25    has_converter = False
[da96629]26_ZERO = 1e-16
[99d1af6]27
[7d6351e]28
[8bd8ea4]29class Reader:
30    """
[0997158f]31    Class to load ascii files (2, 3 or 4 columns).
[8bd8ea4]32    """
[8780e9a]33    ## File type
[28caa03]34    type_name = "ASCII"
[67cc2f7]35
[28caa03]36    ## Wildcards
[8780e9a]37    type = ["ASCII files (*.txt)|*.txt",
[470bf7e]38            "ASCII files (*.dat)|*.dat",
[ef9d209]39            "ASCII files (*.abs)|*.abs",
40            "CSV files (*.csv)|*.csv"]
[8bd8ea4]41    ## List of allowed extensions
[7d6351e]42    ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV']
[67cc2f7]43
[e082e2c]44    ## Flag to bypass extension check
45    allow_all = True
[67cc2f7]46
[8bd8ea4]47    def read(self, path):
[7d6351e]48        """
[0997158f]49        Load data file
[67cc2f7]50
[0997158f]51        :param path: file path
52        :return: Data1D object, or None
[67cc2f7]53
[0997158f]54        :raise RuntimeError: when the file can't be opened
55        :raise ValueError: when the length of the data vectors are inconsistent
[8bd8ea4]56        """
57        if os.path.isfile(path):
[7d6351e]58            basename = os.path.basename(path)
[a7a5886]59            _, extension = os.path.splitext(basename)
[e082e2c]60            if self.allow_all or extension.lower() in self.ext:
[8bd8ea4]61                try:
[9cd0baa]62                    # Read in binary mode since GRASP frequently has no-ascii
[67cc2f7]63                    # characters that breaks the open operation
[7d6351e]64                    input_f = open(path,'rb')
65                except:
[8bd8ea4]66                    raise  RuntimeError, "ascii_reader: cannot open %s" % path
67                buff = input_f.read()
[ef3445e2]68                lines = buff.splitlines()
[67cc2f7]69
70                # Arrays for data storage
71                tx = numpy.zeros(0)
72                ty = numpy.zeros(0)
[de1da34]73                tdy = numpy.zeros(0)
74                tdx = numpy.zeros(0)
[67cc2f7]75
[8bd8ea4]76                # The first good line of data will define whether
77                # we have 2-column or 3-column ascii
[de1da34]78                has_error_dx = None
79                has_error_dy = None
[67cc2f7]80
[892f246]81                #Initialize counters for data lines and header lines.
[67cc2f7]82                is_data = False
[a7a5886]83                # More than "5" lines of data is considered as actual
84                # data unless that is the only data
[67cc2f7]85                min_data_pts = 5
[a7a5886]86                # To count # of current data candidate lines
[67cc2f7]87                candidate_lines = 0
[7d6351e]88                # To count total # of previous data candidate lines
[67cc2f7]89                candidate_lines_previous = 0
90                #minimum required number of columns of data
[7d6351e]91                lentoks = 2
[8bd8ea4]92                for line in lines:
[67cc2f7]93                    toks = self.splitline(line)
94                    # To remember the # of columns in the current line of data
95                    new_lentoks = len(toks)
[8bd8ea4]96                    try:
[67cc2f7]97                        if new_lentoks == 1 and not is_data:
98                            ## If only one item in list, no longer data
99                            raise ValueError
100                        elif new_lentoks == 0:
101                            ## If the line is blank, skip and continue on
102                            ## In case of breaks within data sets.
103                            continue
104                        elif new_lentoks != lentoks and is_data:
105                            ## If a footer is found, break the loop and save the data
106                            break
107                        elif new_lentoks != lentoks and not is_data:
108                            ## If header lines are numerical
109                            candidate_lines = 0
110                            candidate_lines_previous = 0
111
[5f2d3c78]112                        #Make sure that all columns are numbers.
113                        for colnum in range(len(toks)):
[67cc2f7]114                            # Any non-floating point values throw ValueError
[5f2d3c78]115                            float(toks[colnum])
[67cc2f7]116
117                        candidate_lines += 1
[8bd8ea4]118                        _x = float(toks[0])
119                        _y = float(toks[1])
[de1da34]120                        _dx = None
[67cc2f7]121                        _dy = None
122
123                        #If 5 or more lines, this is considering the set data
124                        if candidate_lines >= min_data_pts:
125                            is_data = True
126
127                        # If a 3rd row is present, consider it dy
128                        if new_lentoks > 2:
129                            _dy = float(toks[2])
130                        has_error_dy = False if _dy == None else True
131
132                        # If a 4th row is present, consider it dx
133                        if new_lentoks > 3:
134                            _dx = float(toks[3])
135                        has_error_dx = False if _dx == None else True
136
137                        # Delete the previously stored lines of data candidates if
138                        # the list is not data
139                        if candidate_lines == 1 and -1 < candidate_lines_previous < min_data_pts and \
[a7a5886]140                            is_data == False:
[892f246]141                            try:
142                                tx = numpy.zeros(0)
143                                ty = numpy.zeros(0)
[67cc2f7]144                                tdy = numpy.zeros(0)
145                                tdx = numpy.zeros(0)
[892f246]146                            except:
[7d6351e]147                                pass
[892f246]148
[de1da34]149                        if has_error_dy == True:
150                            tdy = numpy.append(tdy, _dy)
151                        if has_error_dx == True:
152                            tdx = numpy.append(tdx, _dx)
[67cc2f7]153                        tx = numpy.append(tx, _x)
154                        ty = numpy.append(ty, _y)
[d508be9]155
[a7a5886]156                        #To remember the # of columns on the current line
157                        # for the next line of data
[67cc2f7]158                        lentoks = new_lentoks
159                        candidate_lines_previous = candidate_lines
160                    except ValueError:
[892f246]161                        # It is data and meet non - number, then stop reading
162                        if is_data == True:
[7d6351e]163                            break
[d508be9]164                        lentoks = 2
[67cc2f7]165                        has_error_dx = None
166                        has_error_dy = None
[892f246]167                        #Reset # of lines of data candidates
[67cc2f7]168                        candidate_lines = 0
169                    except:
[8bd8ea4]170                        pass
[67cc2f7]171
[7d6351e]172                input_f.close()
[7d94915]173                if not is_data:
174                    return None
[8bd8ea4]175                # Sanity check
[67cc2f7]176                if has_error_dy == True and not len(ty) == len(tdy):
[a7a5886]177                    msg = "ascii_reader: y and dy have different length"
178                    raise RuntimeError, msg
[67cc2f7]179                if has_error_dx == True and not len(tx) == len(tdx):
[a7a5886]180                    msg = "ascii_reader: y and dy have different length"
181                    raise RuntimeError, msg
[8bd8ea4]182                # If the data length is zero, consider this as
183                # though we were not able to read the file.
[67cc2f7]184                if len(tx) == 0:
[daa56d0]185                    raise RuntimeError, "ascii_reader: could not load file"
[67cc2f7]186
[a7a5886]187                #Let's re-order the data to make cal.
188                # curve look better some cases
189                ind = numpy.lexsort((ty, tx))
[67cc2f7]190                x = numpy.zeros(len(tx))
191                y = numpy.zeros(len(ty))
192                dy = numpy.zeros(len(tdy))
193                dx = numpy.zeros(len(tdx))
194                output = Data1D(x, y, dy=dy, dx=dx)
195                self.filename = output.filename = basename
196
[de1da34]197                for i in ind:
198                    x[i] = tx[ind[i]]
199                    y[i] = ty[ind[i]]
200                    if has_error_dy == True:
201                        dy[i] = tdy[ind[i]]
202                    if has_error_dx == True:
203                        dx[i] = tdx[ind[i]]
[7d6351e]204                # Zeros in dx, dy
[da96629]205                if has_error_dx:
[7d6351e]206                    dx[dx == 0] = _ZERO
[da96629]207                if has_error_dy:
[7d6351e]208                    dy[dy == 0] = _ZERO
209                #Data
210                output.x = x[x != 0]
211                output.y = y[x != 0]
212                output.dy = dy[x != 0] if has_error_dy == True\
213                    else numpy.zeros(len(output.y))
214                output.dx = dx[x != 0] if has_error_dx == True\
215                    else numpy.zeros(len(output.x))
[67cc2f7]216
217                output.xaxis("\\rm{Q}", 'A^{-1}')
218                output.yaxis("\\rm{Intensity}", "cm^{-1}")
219
[fe78c7b]220                # Store loading process information
[7d6351e]221                output.meta_data['loader'] = self.type_name
[83b81b8]222                if len(output.x) < 1:
223                    raise RuntimeError, "%s is empty" % path
[8bd8ea4]224                return output
[67cc2f7]225
[8bd8ea4]226        else:
227            raise RuntimeError, "%s is not a file" % path
228        return None
[67cc2f7]229
230    def splitline(self, line):
231        """
232        Splits a line into pieces based on common delimeters
233        :param line: A single line of text
234        :return: list of values
235        """
236        # Initial try for CSV (split on ,)
237        toks = line.split(',')
238        # Now try SCSV (split on ;)
239        if len(toks) < 2:
240            toks = line.split(';')
241        # Now go for whitespace
242        if len(toks) < 2:
243            toks = line.split()
244        return toks
Note: See TracBrowser for help on using the repository browser.