source: sasview/src/sas/sascalc/dataloader/readers/ascii_reader.py @ b09095a

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since b09095a was b09095a, checked in by krzywon, 8 years ago

Refactor ASCII reader to use FileReader? class.

  • Property mode set to 100644
File size: 6.8 KB
Line 
1"""
2    Generic multi-column ASCII data reader
3"""
4############################################################################
5# This software was developed by the University of Tennessee as part of the
6# Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
7# project funded by the US National Science Foundation.
8# If you use DANSE applications to do scientific research that leads to
9# publication, we ask that you acknowledge the use of the software with the
10# following sentence:
11# This work benefited from DANSE software developed under NSF award DMR-0520547.
12# copyright 2008, University of Tennessee
13#############################################################################
14
15import logging
16import numpy as np
17from sas.sascalc.dataloader.file_reader_base_class import FileReader
18from sas.sascalc.dataloader.data_info import DataInfo, plottable_1D
19
20logger = logging.getLogger(__name__)
21
22
23class Reader(FileReader):
24    """
25    Class to load ascii files (2, 3 or 4 columns).
26    """
27    # File type
28    type_name = "ASCII"
29    # Wildcards
30    type = ["ASCII files (*.txt)|*.txt",
31            "ASCII files (*.dat)|*.dat",
32            "ASCII files (*.abs)|*.abs",
33            "CSV files (*.csv)|*.csv"]
34    # List of allowed extensions
35    ext = ['.txt', '.dat', '.abs', '.csv']
36    # Flag to bypass extension check
37    allow_all = True
38    # data unless that is the only data
39    min_data_pts = 5
40
41    def get_file_contents(self):
42        """
43        Get the contents of the file
44        """
45
46        buff = self.f_open.read()
47        filepath = self.f_open.name
48        lines = buff.splitlines()
49        self.output = []
50        self.current_datainfo = DataInfo()
51        self.current_datainfo.filename = filepath
52        self.reset_data_list(len(lines))
53
54        # The first good line of data will define whether
55        # we have 2-column or 3-column ascii
56        has_error_dx = None
57        has_error_dy = None
58
59        # Initialize counters for data lines and header lines.
60        is_data = False
61        # More than "5" lines of data is considered as actual
62        # To count # of current data candidate lines
63        candidate_lines = 0
64        # To count total # of previous data candidate lines
65        candidate_lines_previous = 0
66        # Current line number
67        line_no = 0
68        # minimum required number of columns of data
69        lentoks = 2
70        for line in lines:
71            toks = self.splitline(line.strip())
72            # To remember the number of columns in the current line of data
73            new_lentoks = len(toks)
74            try:
75                if new_lentoks == 0:
76                    # If the line is blank, skip and continue on
77                    # In case of breaks within data sets.
78                    continue
79                elif new_lentoks != lentoks and is_data:
80                    # If a footer is found, break the loop and save the data
81                    break
82                elif new_lentoks != lentoks and not is_data:
83                    # If header lines are numerical
84                    candidate_lines = 0
85                    self.reset_data_list(len(lines) - line_no)
86
87                candidate_lines += 1
88                # If 5 or more lines, this is considering the set data
89                if candidate_lines >= self.min_data_pts:
90                    is_data = True
91
92                self.current_dataset.x[candidate_lines - 1] = float(toks[0])
93                self.current_dataset.y[candidate_lines - 1] = float(toks[1])
94
95                # If a 3rd row is present, consider it dy
96                if new_lentoks > 2:
97                    self.current_dataset.dy[candidate_lines - 1] = \
98                        float(toks[2])
99                    has_error_dy = True
100
101                # If a 4th row is present, consider it dx
102                if new_lentoks > 3:
103                    self.current_dataset.dx[candidate_lines - 1] = \
104                        float(toks[3])
105                    has_error_dx = True
106
107                # To remember the # of columns on the current line
108                # for the next line of data
109                lentoks = new_lentoks
110                line_no += 1
111            except ValueError:
112                # It is data and meet non - number, then stop reading
113                if is_data:
114                    break
115                # Delete the previously stored lines of data candidates if
116                # the list is not data
117                self.reset_data_list(len(lines) - line_no)
118                lentoks = 2
119                has_error_dx = None
120                has_error_dy = None
121                # Reset # of lines of data candidates
122                candidate_lines = 0
123            except Exception:
124                # Handle any unexpected exceptions
125                raise
126
127        if not is_data:
128            # TODO: Check file extension - primary reader, throw error.
129            # TODO: Secondary check, pass and try next reader
130            msg = "ascii_reader: x has no data"
131            raise RuntimeError(msg)
132        # Sanity check
133        if has_error_dy and not len(self.current_dataset.y) == \
134                len(self.current_dataset.dy):
135            msg = "ascii_reader: y and dy have different length"
136            raise RuntimeError(msg)
137        if has_error_dx and not len(self.current_dataset.x) == \
138                len(self.current_dataset.dx):
139            msg = "ascii_reader: y and dy have different length"
140            raise RuntimeError(msg)
141        # If the data length is zero, consider this as
142        # though we were not able to read the file.
143        if len(self.current_dataset.x) < 1:
144            raise RuntimeError("ascii_reader: could not load file")
145            return None
146
147        # Data
148        self.current_dataset.x = \
149            self.current_dataset.x[self.current_dataset.x != 0]
150        self.current_dataset.y = \
151            self.current_dataset.y[self.current_dataset.x != 0]
152        self.current_dataset.dy = \
153            self.current_dataset.dy[self.current_dataset.x != 0] if \
154                has_error_dy else np.zeros(len(self.current_dataset.y))
155        self.current_dataset.dx = \
156            self.current_dataset.dx[self.current_dataset.x != 0] if \
157                has_error_dx else np.zeros(len(self.current_dataset.x))
158
159        self.current_dataset.xaxis("\\rm{Q}", 'A^{-1}')
160        self.current_dataset.yaxis("\\rm{Intensity}", "cm^{-1}")
161
162        # Store loading process information
163        self.current_datainfo.meta_data['loader'] = self.type_name
164        self.send_to_output()
165
166    def reset_data_list(self, no_lines):
167        """
168        Reset the plottable_1D object
169        """
170        # Initialize data sets with arrays the maximum possible size
171        x = np.zeros(no_lines)
172        y = np.zeros(no_lines)
173        dy = np.zeros(no_lines)
174        dx = np.zeros(no_lines)
175        self.current_dataset = plottable_1D(x, y, dx, dy)
Note: See TracBrowser for help on using the repository browser.