← Previous Change
Next Change →

Changeset 5dc01e5 in sasview for src/sas

Timestamp:

Sep 15, 2016 8:38:24 AM (8 years ago)

Author:

Piotr Rozyczko <rozyczko@…>

Branches:

ESS_GUI, ESS_GUI_Docs, ESS_GUI_batch_fitting, ESS_GUI_bumps_abstraction, ESS_GUI_iss1116, ESS_GUI_iss879, ESS_GUI_iss959, ESS_GUI_opencl, ESS_GUI_ordering, ESS_GUI_sync_sascalc

Children:

d0ccd80f

Parents:

d13386c

git-author:

Jeff Krzywon <krzywon@…> (09/13/16 16:25:23)

git-committer:

Piotr Rozyczko <rozyczko@…> (09/15/16 08:38:24)

Message:

Fixes #658: Modified ASCII reader to only accept data from lines with the same number of delimited values.

File:

: 1 edited

src/sas/sascalc/dataloader/readers/ascii_reader.py (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

src/sas/sascalc/dataloader/readers/ascii_reader.py

-                      rb699768
+                      r5dc01e5
     ## File type
     type_name = "ASCII"
     ## Wildcards
     type = ["ASCII files (*.txt)|*.txt",
 …
     ## List of allowed extensions
     ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV']
     ## Flag to bypass extension check
     allow_all = True
     def read(self, path):
         """
         Load data file
         :param path: file path
         :return: Data1D object, or None
         :raise RuntimeError: when the file can't be opened
         :raise ValueError: when the length of the data vectors are inconsistent
 …
                 try:
                     # Read in binary mode since GRASP frequently has no-ascii
                     # characters that brakes the open operation
+                    # characters that breaks the open operation
                     input_f = open(path,'rb')
                 except:
 …
                 buff = input_f.read()
                 lines = buff.splitlines()
+                x  = numpy.zeros(0)
+                y  = numpy.zeros(0)
+                dy = numpy.zeros(0)
+                dx = numpy.zeros(0)
+                #temp. space to sort data
+                tx  = numpy.zeros(0)
+                ty  = numpy.zeros(0)
+                # Arrays for data storage
+                tx = numpy.zeros(0)
+                ty = numpy.zeros(0)
                 tdy = numpy.zeros(0)
                 tdx = numpy.zeros(0)
+                output = Data1D(x, y, dy=dy, dx=dx)
+                self.filename = output.filename = basename
+                data_conv_q = None
+                data_conv_i = None
+                if has_converter == True and output.x_unit != '1/A':
+                    data_conv_q = Converter('1/A')
+                    # Test it
+                    data_conv_q(1.0, output.x_unit)
+                if has_converter == True and output.y_unit != '1/cm':
+                    data_conv_i = Converter('1/cm')
+                    # Test it
+                    data_conv_i(1.0, output.y_unit)
                 # The first good line of data will define whether
                 # we have 2-column or 3-column ascii
                 has_error_dx = None
                 has_error_dy = None
                 #Initialize counters for data lines and header lines.
                 is_data = False  # Has more than 5 lines
+                is_data = False
                 # More than "5" lines of data is considered as actual
                 # data unless that is the only data
                 mum_data_lines = 5
+                min_data_pts = 5
                 # To count # of current data candidate lines
                 i = -1
+                candidate_lines = 0
                 # To count total # of previous data candidate lines
+                i1 = -1
+                # To count # of header lines
+                j = -1
+                # Helps to count # of header lines
+                j1 = -1
+                #minimum required number of columns of data; ( <= 4).
+                candidate_lines_previous = 0
+                #minimum required number of columns of data
                 lentoks = 2
                 for line in lines:
+                    # Initial try for CSV (split on ,)
+                    toks = line.split(',')
+                    # Now try SCSV (split on ;)
+                    if len(toks) < 2:
+                        toks = line.split(';')
+                    # Now go for whitespace
+                    if len(toks) < 2:
+                        toks = line.split()
+                    toks = self.splitline(line)
+                    # To remember the # of columns in the current line of data
+                    new_lentoks = len(toks)
                     try:
+                        if new_lentoks == 1 and not is_data:
+                            ## If only one item in list, no longer data
+                            raise ValueError
+                        elif new_lentoks == 0:
+                            ## If the line is blank, skip and continue on
+                            ## In case of breaks within data sets.
+                            continue
+                        elif new_lentoks != lentoks and is_data:
+                            ## If a footer is found, break the loop and save the data
+                            break
+                        elif new_lentoks != lentoks and not is_data:
+                            ## If header lines are numerical
+                            candidate_lines = 0
+                            candidate_lines_previous = 0
                         #Make sure that all columns are numbers.
                         for colnum in range(len(toks)):
+                            # Any non-floating point values throw ValueError
                             float(toks[colnum])
+                        candidate_lines += 1
                         _x = float(toks[0])
                         _y = float(toks[1])
+                        #Reset the header line counters
+                        if j == j1:
+                            j = 0
+                            j1 = 0
+                        if i > 1:
+                        _dx = None
+                        _dy = None
+                        #If 5 or more lines, this is considering the set data
+                        if candidate_lines >= min_data_pts:
                             is_data = True
+                        if data_conv_q is not None:
+                            _x = data_conv_q(_x, units=output.x_unit)
+                        if data_conv_i is not None:
+                            _y = data_conv_i(_y, units=output.y_unit)
+                        # If we have an extra token, check
+                        # whether it can be interpreted as a
+                        # third column.
+                        _dy = None
+                        if len(toks) > 2:
+                            try:
+                                _dy = float(toks[2])
+                                if data_conv_i is not None:
+                                    _dy = data_conv_i(_dy, units=output.y_unit)
+                            except:
+                                # The third column is not a float, skip it.
+                                pass
+                        # If we haven't set the 3rd column
+                        # flag, set it now.
+                        if has_error_dy == None:
+                            has_error_dy = False if _dy == None else True
+                        #Check for dx
+                        _dx = None
+                        if len(toks) > 3:
+                            try:
+                                _dx = float(toks[3])
+                                if data_conv_i is not None:
+                                    _dx = data_conv_i(_dx, units=output.x_unit)
+                            except:
+                                # The 4th column is not a float, skip it.
+                                pass
+                        # If we haven't set the 3rd column
+                        # flag, set it now.
+                        if has_error_dx == None:
+                            has_error_dx = False if _dx == None else True
+                        #After talked with PB, we decided to take care of only
+                        # 4 columns of data for now.
+                        #number of columns in the current line
+                        #To remember the # of columns in the current
+                        #line of data
+                        new_lentoks = len(toks)
+                        #If the previous columns not equal to the current,
+                        #mark the previous as non-data and reset the dependents.
+                        if lentoks != new_lentoks:
+                            if is_data == True:
+                                break
+                            else:
+                                i = -1
+                                i1 = 0
+                                j = -1
+                                j1 = -1
+                        #Delete the previously stored lines of data candidates
+                        # if is not data.
+                        if i < 0 and -1 < i1 < mum_data_lines and \
+                            is_data == False:
+                            try:
+                                x = numpy.zeros(0)
+                                y = numpy.zeros(0)
+                            except:
+                                pass
+                        x = numpy.append(x, _x)
+                        y = numpy.append(y, _y)
+                        if has_error_dy == True:
+                            #Delete the previously stored lines of
+                            # data candidates if is not data.
+                            if i < 0 and -1 < i1 < mum_data_lines and \
+                                is_data == False:
+                                try:
+                                    dy = numpy.zeros(0)
+                                except:
+                                    pass
+                            dy = numpy.append(dy, _dy)
+                        if has_error_dx == True:
+                            #Delete the previously stored lines of
+                            # data candidates if is not data.
+                            if i < 0 and -1 < i1 < mum_data_lines and \
+                                is_data == False:
+                                try:
+                                    dx = numpy.zeros(0)
+                                except:
+                                    pass
+                            dx = numpy.append(dx, _dx)
+                        #Same for temp.
+                        #Delete the previously stored lines of data candidates
+                        # if is not data.
+                        if i < 0 and -1 < i1 < mum_data_lines and\
+                        # If a 3rd row is present, consider it dy
+                        if new_lentoks > 2:
+                            _dy = float(toks[2])
+                        has_error_dy = False if _dy == None else True
+                        # If a 4th row is present, consider it dx
+                        if new_lentoks > 3:
+                            _dx = float(toks[3])
+                        has_error_dx = False if _dx == None else True
+                        # Delete the previously stored lines of data candidates if
+                        # the list is not data
+                        if candidate_lines == 1 and -1 < candidate_lines_previous < min_data_pts and \
                             is_data == False:
                             try:
                                 tx = numpy.zeros(0)
                                 ty = numpy.zeros(0)
+                                tdy = numpy.zeros(0)
+                                tdx = numpy.zeros(0)
                             except:
                                 pass
+                        if has_error_dy == True:
+                            tdy = numpy.append(tdy, _dy)
+                        if has_error_dx == True:
+                            tdx = numpy.append(tdx, _dx)
                         tx = numpy.append(tx, _x)
                         ty = numpy.append(ty, _y)
+                        if has_error_dy == True:
+                            #Delete the previously stored lines of
+                            # data candidates if is not data.
+                            if i < 0 and -1 < i1 < mum_data_lines and \
+                                is_data == False:
+                                try:
+                                    tdy = numpy.zeros(0)
+                                except:
+                                    pass
+                            tdy = numpy.append(tdy, _dy)
+                        if has_error_dx == True:
+                            #Delete the previously stored lines of
+                            # data candidates if is not data.
+                            if i < 0 and -1 < i1 < mum_data_lines and \
+                                is_data == False:
+                                try:
+                                    tdx = numpy.zeros(0)
+                                except:
+                                    pass
+                            tdx = numpy.append(tdx, _dx)
+                        #reset i1 and flag lentoks for the next
+                        if lentoks < new_lentoks:
+                            if is_data == False:
+                                i1 = -1
                         #To remember the # of columns on the current line
                         # for the next line of data
+                        lentoks = len(toks)
+                        #Reset # of header lines and counts #
+                        # of data candidate lines
+                        if j == 0 and j1 == 0:
+                            i1 = i + 1
+                        i += 1
+                    except:
+                        lentoks = new_lentoks
+                        candidate_lines_previous = candidate_lines
+                    except ValueError:
                         # It is data and meet non - number, then stop reading
                         if is_data == True:
                             break
                         lentoks = 2
+                        #Counting # of header lines
+                        j += 1
+                        if j == j1 + 1:
+                            j1 = j
+                        else:
+                            j = -1
+                        has_error_dx = None
+                        has_error_dy = None
                         #Reset # of lines of data candidates
+                        i = -1
+                        # Couldn't parse this line, skip it
+                        candidate_lines = 0
+                    except:
                         pass
                 input_f.close()
                 # Sanity check
                 if has_error_dy == True and not len(y) == len(dy):
+                if has_error_dy == True and not len(ty) == len(tdy):
                     msg = "ascii_reader: y and dy have different length"
                     raise RuntimeError, msg
                 if has_error_dx == True and not len(x) == len(dx):
+                if has_error_dx == True and not len(tx) == len(tdx):
                     msg = "ascii_reader: y and dy have different length"
                     raise RuntimeError, msg
                 # If the data length is zero, consider this as
                 # though we were not able to read the file.
                 if len(x) == 0:
+                if len(tx) == 0:
                     raise RuntimeError, "ascii_reader: could not load file"
                 #Let's re-order the data to make cal.
                 # curve look better some cases
                 ind = numpy.lexsort((ty, tx))
+                x = numpy.zeros(len(tx))
+                y = numpy.zeros(len(ty))
+                dy = numpy.zeros(len(tdy))
+                dx = numpy.zeros(len(tdx))
+                output = Data1D(x, y, dy=dy, dx=dx)
+                self.filename = output.filename = basename
                 for i in ind:
                     x[i] = tx[ind[i]]
 …
                 output.dx = dx[x != 0] if has_error_dx == True\
                     else numpy.zeros(len(output.x))
+                if data_conv_q is not None:
+                    output.xaxis("\\rm{Q}", output.x_unit)
+                else:
+                    output.xaxis("\\rm{Q}", 'A^{-1}')
+                if data_conv_i is not None:
+                    output.yaxis("\\rm{Intensity}", output.y_unit)
+                else:
+                    output.yaxis("\\rm{Intensity}", "cm^{-1}")
+                output.xaxis("\\rm{Q}", 'A^{-1}')
+                output.yaxis("\\rm{Intensity}", "cm^{-1}")
                 # Store loading process information
                 output.meta_data['loader'] = self.type_name
 …
                     raise RuntimeError, "%s is empty" % path
                 return output
         else:
             raise RuntimeError, "%s is not a file" % path
         return None
+    def splitline(self, line):
+        """
+        Splits a line into pieces based on common delimeters
+        :param line: A single line of text
+        :return: list of values
+        """
+        # Initial try for CSV (split on ,)
+        toks = line.split(',')
+        # Now try SCSV (split on ;)
+        if len(toks) < 2:
+            toks = line.split(';')
+        # Now go for whitespace
+        if len(toks) < 2:
+            toks = line.split()
+        return toks

Note: See TracChangeset for help on using the changeset viewer.

SasView

Changeset 5dc01e5 in sasview for src/sas

Legend:

src/sas/sascalc/dataloader/readers/ascii_reader.py

Download in other formats: