1 | """ |
---|
2 | Generic multi-column ASCII data reader |
---|
3 | """ |
---|
4 | ############################################################################ |
---|
5 | # This software was developed by the University of Tennessee as part of the |
---|
6 | # Distributed Data Analysis of Neutron Scattering Experiments (DANSE) |
---|
7 | # project funded by the US National Science Foundation. |
---|
8 | # If you use DANSE applications to do scientific research that leads to |
---|
9 | # publication, we ask that you acknowledge the use of the software with the |
---|
10 | # following sentence: |
---|
11 | # This work benefited from DANSE software developed under NSF award DMR-0520547. |
---|
12 | # copyright 2008, University of Tennessee |
---|
13 | ############################################################################# |
---|
14 | |
---|
15 | import logging |
---|
16 | import numpy as np |
---|
17 | from sas.sascalc.dataloader.file_reader_base_class import FileReader |
---|
18 | from sas.sascalc.dataloader.data_info import DataInfo, plottable_1D |
---|
19 | |
---|
20 | logger = logging.getLogger(__name__) |
---|
21 | |
---|
22 | |
---|
23 | class Reader(FileReader): |
---|
24 | """ |
---|
25 | Class to load ascii files (2, 3 or 4 columns). |
---|
26 | """ |
---|
27 | # File type |
---|
28 | type_name = "ASCII" |
---|
29 | # Wildcards |
---|
30 | type = ["ASCII files (*.txt)|*.txt", |
---|
31 | "ASCII files (*.dat)|*.dat", |
---|
32 | "ASCII files (*.abs)|*.abs", |
---|
33 | "CSV files (*.csv)|*.csv"] |
---|
34 | # List of allowed extensions |
---|
35 | ext = ['.txt', '.dat', '.abs', '.csv'] |
---|
36 | # Flag to bypass extension check |
---|
37 | allow_all = True |
---|
38 | # data unless that is the only data |
---|
39 | min_data_pts = 5 |
---|
40 | |
---|
41 | def get_file_contents(self): |
---|
42 | """ |
---|
43 | Get the contents of the file |
---|
44 | """ |
---|
45 | |
---|
46 | buff = self.f_open.read() |
---|
47 | filepath = self.f_open.name |
---|
48 | lines = buff.splitlines() |
---|
49 | self.output = [] |
---|
50 | self.current_datainfo = DataInfo() |
---|
51 | self.current_datainfo.filename = filepath |
---|
52 | self.reset_data_list(len(lines)) |
---|
53 | |
---|
54 | # The first good line of data will define whether |
---|
55 | # we have 2-column or 3-column ascii |
---|
56 | has_error_dx = None |
---|
57 | has_error_dy = None |
---|
58 | |
---|
59 | # Initialize counters for data lines and header lines. |
---|
60 | is_data = False |
---|
61 | # More than "5" lines of data is considered as actual |
---|
62 | # To count # of current data candidate lines |
---|
63 | candidate_lines = 0 |
---|
64 | # To count total # of previous data candidate lines |
---|
65 | candidate_lines_previous = 0 |
---|
66 | # Current line number |
---|
67 | line_no = 0 |
---|
68 | # minimum required number of columns of data |
---|
69 | lentoks = 2 |
---|
70 | for line in lines: |
---|
71 | toks = self.splitline(line.strip()) |
---|
72 | # To remember the number of columns in the current line of data |
---|
73 | new_lentoks = len(toks) |
---|
74 | try: |
---|
75 | if new_lentoks == 0: |
---|
76 | # If the line is blank, skip and continue on |
---|
77 | # In case of breaks within data sets. |
---|
78 | continue |
---|
79 | elif new_lentoks != lentoks and is_data: |
---|
80 | # If a footer is found, break the loop and save the data |
---|
81 | break |
---|
82 | elif new_lentoks != lentoks and not is_data: |
---|
83 | # If header lines are numerical |
---|
84 | candidate_lines = 0 |
---|
85 | self.reset_data_list(len(lines) - line_no) |
---|
86 | |
---|
87 | candidate_lines += 1 |
---|
88 | # If 5 or more lines, this is considering the set data |
---|
89 | if candidate_lines >= self.min_data_pts: |
---|
90 | is_data = True |
---|
91 | |
---|
92 | self.current_dataset.x[candidate_lines - 1] = float(toks[0]) |
---|
93 | self.current_dataset.y[candidate_lines - 1] = float(toks[1]) |
---|
94 | |
---|
95 | # If a 3rd row is present, consider it dy |
---|
96 | if new_lentoks > 2: |
---|
97 | self.current_dataset.dy[candidate_lines - 1] = \ |
---|
98 | float(toks[2]) |
---|
99 | has_error_dy = True |
---|
100 | |
---|
101 | # If a 4th row is present, consider it dx |
---|
102 | if new_lentoks > 3: |
---|
103 | self.current_dataset.dx[candidate_lines - 1] = \ |
---|
104 | float(toks[3]) |
---|
105 | has_error_dx = True |
---|
106 | |
---|
107 | # To remember the # of columns on the current line |
---|
108 | # for the next line of data |
---|
109 | lentoks = new_lentoks |
---|
110 | line_no += 1 |
---|
111 | except ValueError: |
---|
112 | # It is data and meet non - number, then stop reading |
---|
113 | if is_data: |
---|
114 | break |
---|
115 | # Delete the previously stored lines of data candidates if |
---|
116 | # the list is not data |
---|
117 | self.reset_data_list(len(lines) - line_no) |
---|
118 | lentoks = 2 |
---|
119 | has_error_dx = None |
---|
120 | has_error_dy = None |
---|
121 | # Reset # of lines of data candidates |
---|
122 | candidate_lines = 0 |
---|
123 | except Exception: |
---|
124 | # Handle any unexpected exceptions |
---|
125 | raise |
---|
126 | |
---|
127 | if not is_data: |
---|
128 | # TODO: Check file extension - primary reader, throw error. |
---|
129 | # TODO: Secondary check, pass and try next reader |
---|
130 | msg = "ascii_reader: x has no data" |
---|
131 | raise RuntimeError(msg) |
---|
132 | # Sanity check |
---|
133 | if has_error_dy and not len(self.current_dataset.y) == \ |
---|
134 | len(self.current_dataset.dy): |
---|
135 | msg = "ascii_reader: y and dy have different length" |
---|
136 | raise RuntimeError(msg) |
---|
137 | if has_error_dx and not len(self.current_dataset.x) == \ |
---|
138 | len(self.current_dataset.dx): |
---|
139 | msg = "ascii_reader: y and dy have different length" |
---|
140 | raise RuntimeError(msg) |
---|
141 | # If the data length is zero, consider this as |
---|
142 | # though we were not able to read the file. |
---|
143 | if len(self.current_dataset.x) < 1: |
---|
144 | raise RuntimeError("ascii_reader: could not load file") |
---|
145 | return None |
---|
146 | |
---|
147 | # Data |
---|
148 | self.current_dataset.x = \ |
---|
149 | self.current_dataset.x[self.current_dataset.x != 0] |
---|
150 | self.current_dataset.y = \ |
---|
151 | self.current_dataset.y[self.current_dataset.x != 0] |
---|
152 | self.current_dataset.dy = \ |
---|
153 | self.current_dataset.dy[self.current_dataset.x != 0] if \ |
---|
154 | has_error_dy else np.zeros(len(self.current_dataset.y)) |
---|
155 | self.current_dataset.dx = \ |
---|
156 | self.current_dataset.dx[self.current_dataset.x != 0] if \ |
---|
157 | has_error_dx else np.zeros(len(self.current_dataset.x)) |
---|
158 | |
---|
159 | self.current_dataset.xaxis("\\rm{Q}", 'A^{-1}') |
---|
160 | self.current_dataset.yaxis("\\rm{Intensity}", "cm^{-1}") |
---|
161 | |
---|
162 | # Store loading process information |
---|
163 | self.current_datainfo.meta_data['loader'] = self.type_name |
---|
164 | self.send_to_output() |
---|
165 | |
---|
166 | def reset_data_list(self, no_lines): |
---|
167 | """ |
---|
168 | Reset the plottable_1D object |
---|
169 | """ |
---|
170 | # Initialize data sets with arrays the maximum possible size |
---|
171 | x = np.zeros(no_lines) |
---|
172 | y = np.zeros(no_lines) |
---|
173 | dy = np.zeros(no_lines) |
---|
174 | dx = np.zeros(no_lines) |
---|
175 | self.current_dataset = plottable_1D(x, y, dx, dy) |
---|