-                      rbeba407
+                      rb09095a
 """
 This is the base file reader class all file readers should inherit from.
+This is the base file reader class most file readers should inherit from.
 All generic functionality required for a file loader/reader is built into this
 class
 …
 import os
 import logging
+import numpy as np
 from abc import abstractmethod
 from loader_exceptions import NoKnownLoaderException, FileContentsException,\
 …
     # List of Data1D and Data2D objects to be sent back to data_loader
     output = []
     # Current plottable1D/2D object being loaded in
+    # Current plottable_(1D/2D) object being loaded in
     current_dataset = None
     # Current DataInfo objecct being loaded in
+    # Current DataInfo object being loaded in
     current_datainfo = None
+    # Wildcards
+    type = ["Text files (*.txt)"]
+    # String to describe the type of data this reader can load
+    type_name = "ASCII"
+    # Wildcards to display
+    type = ["Text files (*.txt|*.TXT)"]
     # List of allowed extensions
     ext = ['.txt']
     # Bypass extension check and try to load anyway
     allow_all = False
+    # Able to import the unit converter
+    has_converter = True
+    # Open file handle
+    f_open = None
+    # Default value of zero
+    _ZERO = 1e-16
     def read(self, filepath):
 …
                 # Try to load the file, but raise an error if unable to.
                 try:
+                    input_f = open(filepath, 'rb')
+                    self.get_file_contents(input_f)
+                    self.unit_converter()
+                    self.f_open = open(filepath, 'rb')
+                    self.get_file_contents()
+                    self.sort_one_d_data()
                 except RuntimeError:
+                    # Reader specific errors
+                    # TODO: Give a specific error.
                     pass
                 except OSError as e:
+                    # If the file cannot be opened
                     msg = "Unable to open file: {}\n".format(filepath)
                     msg += e.message
                     self.handle_error_message(msg)
                 except Exception as e:
+                    self.handle_error_message(e.message)
+                    # Handle any other generic error
+                    # TODO: raise or log?
+                    raise
+                finally:
+                    if not self.f_open.closed:
+                        self.f_open.close()
         else:
             msg = "Unable to find file at: {}\n".format(filepath)
             msg += "Please check your file path and try again."
             self.handle_error_message(msg)
         # Return a list of parsed entries that dataloader can manage
+        # Return a list of parsed entries that data_loader can manage
         return self.output
 …
         self.output.append(data_obj)
+    def unit_converter(self):
+        """
+        Generic unit conversion import
+        """
+        # Check whether we have a converter available
+        self.has_converter = True
+        try:
+            from sas.sascalc.data_util.nxsunit import Converter
+        except:
+            self.has_converter = False
+    def sort_one_d_data(self):
+        """
+        Sort 1D data along the X axis for consistency
+        """
+        final_list = []
+        for data in self.output:
+            if isinstance(data, Data1D):
+                ind = np.lexsort((data.y, data.x))
+                data.x = np.asarray([data.x[i] for i in ind])
+                data.y = np.asarray([data.y[i] for i in ind])
+                if data.dx is not None:
+                    data.dx = np.asarray([data.dx[i] for i in ind])
+                if data.dxl is not None:
+                    data.dxl = np.asarray([data.dxl[i] for i in ind])
+                if data.dxw is not None:
+                    data.dxw = np.asarray([data.dxw[i] for i in ind])
+                if data.dy is not None:
+                    data.dy = np.asarray([data.dy[i] for i in ind])
+                if data.lam is not None:
+                    data.lam = np.asarray([data.lam[i] for i in ind])
+                if data.dlam is not None:
+                    data.dlam = np.asarray([data.dlam[i] for i in ind])
+            final_list.append(data)
+        self.output = final_list
+    @staticmethod
+    def splitline(line):
+        """
+        Splits a line into pieces based on common delimeters
+        :param line: A single line of text
+        :return: list of values
+        """
+        # Initial try for CSV (split on ,)
+        toks = line.split(',')
+        # Now try SCSV (split on ;)
+        if len(toks) < 2:
+            toks = line.split(';')
+        # Now go for whitespace
+        if len(toks) < 2:
+            toks = line.split()
+        return toks
     @abstractmethod
     def get_file_contents(self, contents):
+    def get_file_contents(self):
         """
+        All reader classes that inherit from here should implement
+        :param contents:
+        All reader classes that inherit from FileReader must implement
         """
         pass

src/sas/sascalc/dataloader/readers/ascii_reader.py

-                      r235f514
+                      rb09095a
 """
     ASCII reader
+    Generic multi-column ASCII data reader
 """
 ############################################################################
 #This software was developed by the University of Tennessee as part of the
 #Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
 #project funded by the US National Science Foundation.
 #If you use DANSE applications to do scientific research that leads to
 #publication, we ask that you acknowledge the use of the software with the
 #following sentence:
 #This work benefited from DANSE software developed under NSF award DMR-0520547.
 #copyright 2008, University of Tennessee
+# This software was developed by the University of Tennessee as part of the
+# Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
+# project funded by the US National Science Foundation.
+# If you use DANSE applications to do scientific research that leads to
+# publication, we ask that you acknowledge the use of the software with the
+# following sentence:
+# This work benefited from DANSE software developed under NSF award DMR-0520547.
+# copyright 2008, University of Tennessee
 #############################################################################
+import logging
+import numpy as np
+from sas.sascalc.dataloader.file_reader_base_class import FileReader
+from sas.sascalc.dataloader.data_info import DataInfo, plottable_1D
+import numpy as np
+import os
+from sas.sascalc.dataloader.data_info import Data1D
+# Check whether we have a converter available
+has_converter = True
+try:
+    from sas.sascalc.data_util.nxsunit import Converter
+except:
+    has_converter = False
+_ZERO = 1e-16
+logger = logging.getLogger(__name__)
 class Reader:
+class Reader(FileReader):
     """
     Class to load ascii files (2, 3 or 4 columns).
     """
     ## File type
+    # File type
     type_name = "ASCII"
+    ## Wildcards
+    # Wildcards
     type = ["ASCII files (*.txt)|*.txt",
             "ASCII files (*.dat)|*.dat",
             "ASCII files (*.abs)|*.abs",
             "CSV files (*.csv)|*.csv"]
+    ## List of allowed extensions
+    ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV']
+    # List of allowed extensions
+    ext = ['.txt', '.dat', '.abs', '.csv']
+    # Flag to bypass extension check
+    allow_all = True
+    # data unless that is the only data
+    min_data_pts = 5
+    ## Flag to bypass extension check
+    allow_all = True
+    def get_file_contents(self):
+        """
+        Get the contents of the file
+        """
+    def read(self, path):
+        """
+        Load data file
+        buff = self.f_open.read()
+        filepath = self.f_open.name
+        lines = buff.splitlines()
+        self.output = []
+        self.current_datainfo = DataInfo()
+        self.current_datainfo.filename = filepath
+        self.reset_data_list(len(lines))
+        :param path: file path
+        :return: Data1D object, or None
+        # The first good line of data will define whether
+        # we have 2-column or 3-column ascii
+        has_error_dx = None
+        has_error_dy = None
+        :raise RuntimeError: when the file can't be opened
+        :raise ValueError: when the length of the data vectors are inconsistent
+        """
+        if os.path.isfile(path):
+            basename = os.path.basename(path)
+            _, extension = os.path.splitext(basename)
+            if self.allow_all or extension.lower() in self.ext:
+                try:
+                    # Read in binary mode since GRASP frequently has no-ascii
+                    # characters that breaks the open operation
+                    input_f = open(path,'rb')
+                except:
+                    raise  RuntimeError, "ascii_reader: cannot open %s" % path
+                buff = input_f.read()
+                lines = buff.splitlines()
+        # Initialize counters for data lines and header lines.
+        is_data = False
+        # More than "5" lines of data is considered as actual
+        # To count # of current data candidate lines
+        candidate_lines = 0
+        # To count total # of previous data candidate lines
+        candidate_lines_previous = 0
+        # Current line number
+        line_no = 0
+        # minimum required number of columns of data
+        lentoks = 2
+        for line in lines:
+            toks = self.splitline(line.strip())
+            # To remember the number of columns in the current line of data
+            new_lentoks = len(toks)
+            try:
+                if new_lentoks == 0:
+                    # If the line is blank, skip and continue on
+                    # In case of breaks within data sets.
+                    continue
+                elif new_lentoks != lentoks and is_data:
+                    # If a footer is found, break the loop and save the data
+                    break
+                elif new_lentoks != lentoks and not is_data:
+                    # If header lines are numerical
+                    candidate_lines = 0
+                    self.reset_data_list(len(lines) - line_no)
+                # Arrays for data storage
+                tx = np.zeros(0)
+                ty = np.zeros(0)
+                tdy = np.zeros(0)
+                tdx = np.zeros(0)
+                candidate_lines += 1
+                # If 5 or more lines, this is considering the set data
+                if candidate_lines >= self.min_data_pts:
+                    is_data = True
+                # The first good line of data will define whether
+                # we have 2-column or 3-column ascii
+                self.current_dataset.x[candidate_lines - 1] = float(toks[0])
+                self.current_dataset.y[candidate_lines - 1] = float(toks[1])
+                # If a 3rd row is present, consider it dy
+                if new_lentoks > 2:
+                    self.current_dataset.dy[candidate_lines - 1] = \
+                        float(toks[2])
+                    has_error_dy = True
+                # If a 4th row is present, consider it dx
+                if new_lentoks > 3:
+                    self.current_dataset.dx[candidate_lines - 1] = \
+                        float(toks[3])
+                    has_error_dx = True
+                # To remember the # of columns on the current line
+                # for the next line of data
+                lentoks = new_lentoks
+                line_no += 1
+            except ValueError:
+                # It is data and meet non - number, then stop reading
+                if is_data:
+                    break
+                # Delete the previously stored lines of data candidates if
+                # the list is not data
+                self.reset_data_list(len(lines) - line_no)
+                lentoks = 2
                 has_error_dx = None
                 has_error_dy = None
+                # Reset # of lines of data candidates
+                candidate_lines = 0
+            except Exception:
+                # Handle any unexpected exceptions
+                raise
+                #Initialize counters for data lines and header lines.
+                is_data = False
+                # More than "5" lines of data is considered as actual
+                # data unless that is the only data
+                min_data_pts = 5
+                # To count # of current data candidate lines
+                candidate_lines = 0
+                # To count total # of previous data candidate lines
+                candidate_lines_previous = 0
+                #minimum required number of columns of data
+                lentoks = 2
+                for line in lines:
+                    toks = self.splitline(line)
+                    # To remember the # of columns in the current line of data
+                    new_lentoks = len(toks)
+                    try:
+                        if new_lentoks == 1 and not is_data:
+                            ## If only one item in list, no longer data
+                            raise ValueError
+                        elif new_lentoks == 0:
+                            ## If the line is blank, skip and continue on
+                            ## In case of breaks within data sets.
+                            continue
+                        elif new_lentoks != lentoks and is_data:
+                            ## If a footer is found, break the loop and save the data
+                            break
+                        elif new_lentoks != lentoks and not is_data:
+                            ## If header lines are numerical
+                            candidate_lines = 0
+                            candidate_lines_previous = 0
+        if not is_data:
+            # TODO: Check file extension - primary reader, throw error.
+            # TODO: Secondary check, pass and try next reader
+            msg = "ascii_reader: x has no data"
+            raise RuntimeError(msg)
+        # Sanity check
+        if has_error_dy and not len(self.current_dataset.y) == \
+                len(self.current_dataset.dy):
+            msg = "ascii_reader: y and dy have different length"
+            raise RuntimeError(msg)
+        if has_error_dx and not len(self.current_dataset.x) == \
+                len(self.current_dataset.dx):
+            msg = "ascii_reader: y and dy have different length"
+            raise RuntimeError(msg)
+        # If the data length is zero, consider this as
+        # though we were not able to read the file.
+        if len(self.current_dataset.x) < 1:
+            raise RuntimeError("ascii_reader: could not load file")
+            return None
+                        #Make sure that all columns are numbers.
+                        for colnum in range(len(toks)):
+                            # Any non-floating point values throw ValueError
+                            float(toks[colnum])
+        # Data
+        self.current_dataset.x = \
+            self.current_dataset.x[self.current_dataset.x != 0]
+        self.current_dataset.y = \
+            self.current_dataset.y[self.current_dataset.x != 0]
+        self.current_dataset.dy = \
+            self.current_dataset.dy[self.current_dataset.x != 0] if \
+                has_error_dy else np.zeros(len(self.current_dataset.y))
+        self.current_dataset.dx = \
+            self.current_dataset.dx[self.current_dataset.x != 0] if \
+                has_error_dx else np.zeros(len(self.current_dataset.x))
+                        candidate_lines += 1
+                        _x = float(toks[0])
+                        _y = float(toks[1])
+                        _dx = None
+                        _dy = None
+        self.current_dataset.xaxis("\\rm{Q}", 'A^{-1}')
+        self.current_dataset.yaxis("\\rm{Intensity}", "cm^{-1}")
                         #If 5 or more lines, this is considering the set data
                         if candidate_lines >= min_data_pts:
                             is_data = True
+        # Store loading process information
+        self.current_datainfo.meta_data['loader'] = self.type_name
+        self.send_to_output()
+                        # If a 3rd row is present, consider it dy
+                        if new_lentoks > 2:
+                            _dy = float(toks[2])
+                        has_error_dy = False if _dy is None else True
+                        # If a 4th row is present, consider it dx
+                        if new_lentoks > 3:
+                            _dx = float(toks[3])
+                        has_error_dx = False if _dx is None else True
+                        # Delete the previously stored lines of data candidates if
+                        # the list is not data
+                        if candidate_lines == 1 and -1 < candidate_lines_previous < min_data_pts and \
+                            is_data == False:
+                            try:
+                                tx = np.zeros(0)
+                                ty = np.zeros(0)
+                                tdy = np.zeros(0)
+                                tdx = np.zeros(0)
+                            except:
+                                pass
+                        if has_error_dy == True:
+                            tdy = np.append(tdy, _dy)
+                        if has_error_dx == True:
+                            tdx = np.append(tdx, _dx)
+                        tx = np.append(tx, _x)
+                        ty = np.append(ty, _y)
+                        #To remember the # of columns on the current line
+                        # for the next line of data
+                        lentoks = new_lentoks
+                        candidate_lines_previous = candidate_lines
+                    except ValueError:
+                        # It is data and meet non - number, then stop reading
+                        if is_data == True:
+                            break
+                        lentoks = 2
+                        has_error_dx = None
+                        has_error_dy = None
+                        #Reset # of lines of data candidates
+                        candidate_lines = 0
+                    except:
+                        pass
+                input_f.close()
+                if not is_data:
+                    msg = "ascii_reader: x has no data"
+                    raise RuntimeError, msg
+                # Sanity check
+                if has_error_dy == True and not len(ty) == len(tdy):
+                    msg = "ascii_reader: y and dy have different length"
+                    raise RuntimeError, msg
+                if has_error_dx == True and not len(tx) == len(tdx):
+                    msg = "ascii_reader: y and dy have different length"
+                    raise RuntimeError, msg
+                # If the data length is zero, consider this as
+                # though we were not able to read the file.
+                if len(tx) == 0:
+                    raise RuntimeError, "ascii_reader: could not load file"
+                #Let's re-order the data to make cal.
+                # curve look better some cases
+                ind = np.lexsort((ty, tx))
+                x = np.zeros(len(tx))
+                y = np.zeros(len(ty))
+                dy = np.zeros(len(tdy))
+                dx = np.zeros(len(tdx))
+                output = Data1D(x, y, dy=dy, dx=dx)
+                self.filename = output.filename = basename
+                for i in ind:
+                    x[i] = tx[ind[i]]
+                    y[i] = ty[ind[i]]
+                    if has_error_dy == True:
+                        dy[i] = tdy[ind[i]]
+                    if has_error_dx == True:
+                        dx[i] = tdx[ind[i]]
+                # Zeros in dx, dy
+                if has_error_dx:
+                    dx[dx == 0] = _ZERO
+                if has_error_dy:
+                    dy[dy == 0] = _ZERO
+                #Data
+                output.x = x[x != 0]
+                output.y = y[x != 0]
+                output.dy = dy[x != 0] if has_error_dy == True\
+                    else np.zeros(len(output.y))
+                output.dx = dx[x != 0] if has_error_dx == True\
+                    else np.zeros(len(output.x))
+                output.xaxis("\\rm{Q}", 'A^{-1}')
+                output.yaxis("\\rm{Intensity}", "cm^{-1}")
+                # Store loading process information
+                output.meta_data['loader'] = self.type_name
+                if len(output.x) < 1:
+                    raise RuntimeError, "%s is empty" % path
+                return output
+        else:
+            raise RuntimeError, "%s is not a file" % path
+        return None
+    def splitline(self, line):
+    def reset_data_list(self, no_lines):
         """
+        Splits a line into pieces based on common delimeters
+        :param line: A single line of text
+        :return: list of values
+        Reset the plottable_1D object
         """
+        # Initial try for CSV (split on ,)
+        toks = line.split(',')
+        # Now try SCSV (split on ;)
+        if len(toks) < 2:
+            toks = line.split(';')
+        # Now go for whitespace
+        if len(toks) < 2:
+            toks = line.split()
+        return toks
+        # Initialize data sets with arrays the maximum possible size
+        x = np.zeros(no_lines)
+        y = np.zeros(no_lines)
+        dy = np.zeros(no_lines)
+        dx = np.zeros(no_lines)
+        self.current_dataset = plottable_1D(x, y, dx, dy)

Note: See TracChangeset for help on using the changeset viewer.

SasView

Changeset b09095a in sasview for src/sas/sascalc

Legend:

src/sas/sascalc/dataloader/file_reader_base_class.py

src/sas/sascalc/dataloader/readers/ascii_reader.py

Download in other formats: