1 | """ |
---|
2 | Generic multi-column ASCII data reader |
---|
3 | """ |
---|
4 | ############################################################################ |
---|
5 | # This software was developed by the University of Tennessee as part of the |
---|
6 | # Distributed Data Analysis of Neutron Scattering Experiments (DANSE) |
---|
7 | # project funded by the US National Science Foundation. |
---|
8 | # If you use DANSE applications to do scientific research that leads to |
---|
9 | # publication, we ask that you acknowledge the use of the software with the |
---|
10 | # following sentence: |
---|
11 | # This work benefited from DANSE software developed under NSF award DMR-0520547. |
---|
12 | # copyright 2008, University of Tennessee |
---|
13 | ############################################################################# |
---|
14 | |
---|
15 | import logging |
---|
16 | from sas.sascalc.dataloader.file_reader_base_class import FileReader |
---|
17 | from sas.sascalc.dataloader.data_info import DataInfo, plottable_1D |
---|
18 | from sas.sascalc.dataloader.loader_exceptions import FileContentsException,\ |
---|
19 | DefaultReaderException |
---|
20 | |
---|
21 | logger = logging.getLogger(__name__) |
---|
22 | |
---|
23 | |
---|
24 | class Reader(FileReader): |
---|
25 | """ |
---|
26 | Class to load ascii files (2, 3 or 4 columns). |
---|
27 | """ |
---|
28 | # File type |
---|
29 | type_name = "ASCII" |
---|
30 | # Wildcards |
---|
31 | type = ["ASCII files (*.txt)|*.txt", |
---|
32 | "ASCII files (*.dat)|*.dat", |
---|
33 | "ASCII files (*.abs)|*.abs", |
---|
34 | "CSV files (*.csv)|*.csv"] |
---|
35 | # List of allowed extensions |
---|
36 | ext = ['.txt', '.dat', '.abs', '.csv'] |
---|
37 | # Flag to bypass extension check |
---|
38 | allow_all = True |
---|
39 | # data unless that is the only data |
---|
40 | min_data_pts = 5 |
---|
41 | |
---|
42 | def get_file_contents(self): |
---|
43 | """ |
---|
44 | Get the contents of the file |
---|
45 | """ |
---|
46 | |
---|
47 | buff = self.readall() |
---|
48 | filepath = self.f_open.name |
---|
49 | lines = buff.splitlines() |
---|
50 | self.output = [] |
---|
51 | self.current_datainfo = DataInfo() |
---|
52 | self.current_datainfo.filename = filepath |
---|
53 | self.reset_data_list(len(lines)) |
---|
54 | |
---|
55 | # The first good line of data will define whether |
---|
56 | # we have 2-column or 3-column ascii |
---|
57 | has_error_dx = None |
---|
58 | has_error_dy = None |
---|
59 | |
---|
60 | # Initialize counters for data lines and header lines. |
---|
61 | is_data = False |
---|
62 | # More than "5" lines of data is considered as actual |
---|
63 | # To count # of current data candidate lines |
---|
64 | candidate_lines = 0 |
---|
65 | # To count total # of previous data candidate lines |
---|
66 | candidate_lines_previous = 0 |
---|
67 | # Current line number |
---|
68 | line_no = 0 |
---|
69 | # minimum required number of columns of data |
---|
70 | lentoks = 2 |
---|
71 | for line in lines: |
---|
72 | toks = self.splitline(line.strip()) |
---|
73 | # To remember the number of columns in the current line of data |
---|
74 | new_lentoks = len(toks) |
---|
75 | try: |
---|
76 | if new_lentoks == 0: |
---|
77 | # If the line is blank, skip and continue on |
---|
78 | # In case of breaks within data sets. |
---|
79 | continue |
---|
80 | elif new_lentoks != lentoks and is_data: |
---|
81 | # If a footer is found, break the loop and save the data |
---|
82 | break |
---|
83 | elif new_lentoks != lentoks and not is_data: |
---|
84 | # If header lines are numerical |
---|
85 | candidate_lines = 0 |
---|
86 | self.reset_data_list(len(lines) - line_no) |
---|
87 | |
---|
88 | self.current_dataset.x[candidate_lines] = float(toks[0]) |
---|
89 | |
---|
90 | if new_lentoks > 1: |
---|
91 | self.current_dataset.y[candidate_lines] = float(toks[1]) |
---|
92 | |
---|
93 | # If a 3rd row is present, consider it dy |
---|
94 | if new_lentoks > 2: |
---|
95 | self.current_dataset.dy[candidate_lines] = \ |
---|
96 | float(toks[2]) |
---|
97 | has_error_dy = True |
---|
98 | |
---|
99 | # If a 4th row is present, consider it dx |
---|
100 | if new_lentoks > 3: |
---|
101 | self.current_dataset.dx[candidate_lines] = \ |
---|
102 | float(toks[3]) |
---|
103 | has_error_dx = True |
---|
104 | |
---|
105 | candidate_lines += 1 |
---|
106 | # If 5 or more lines, this is considering the set data |
---|
107 | if candidate_lines >= self.min_data_pts: |
---|
108 | is_data = True |
---|
109 | |
---|
110 | if is_data and new_lentoks >= 8: |
---|
111 | msg = "This data looks like 2D ASCII data. Use the file " |
---|
112 | msg += "converter tool to convert it to NXcanSAS." |
---|
113 | raise FileContentsException(msg) |
---|
114 | |
---|
115 | # To remember the # of columns on the current line |
---|
116 | # for the next line of data |
---|
117 | lentoks = new_lentoks |
---|
118 | line_no += 1 |
---|
119 | except ValueError: |
---|
120 | # ValueError is raised when non numeric strings conv. to float |
---|
121 | # It is data and meet non - number, then stop reading |
---|
122 | if is_data: |
---|
123 | break |
---|
124 | # Delete the previously stored lines of data candidates if |
---|
125 | # the list is not data |
---|
126 | self.reset_data_list(len(lines) - line_no) |
---|
127 | lentoks = 2 |
---|
128 | has_error_dx = None |
---|
129 | has_error_dy = None |
---|
130 | # Reset # of lines of data candidates |
---|
131 | candidate_lines = 0 |
---|
132 | |
---|
133 | if not is_data: |
---|
134 | self.set_all_to_none() |
---|
135 | if self.extension in self.ext: |
---|
136 | msg = "ASCII Reader error: Fewer than five Q data points found " |
---|
137 | msg += "in {}.".format(filepath) |
---|
138 | raise FileContentsException(msg) |
---|
139 | else: |
---|
140 | msg = "ASCII Reader could not load the file {}".format(filepath) |
---|
141 | raise DefaultReaderException(msg) |
---|
142 | # Sanity check |
---|
143 | if has_error_dy and not len(self.current_dataset.y) == \ |
---|
144 | len(self.current_dataset.dy): |
---|
145 | msg = "ASCII Reader error: Number of I and dI data points are" |
---|
146 | msg += " different in {}.".format(filepath) |
---|
147 | # TODO: Add error to self.current_datainfo.errors instead? |
---|
148 | self.set_all_to_none() |
---|
149 | raise FileContentsException(msg) |
---|
150 | if has_error_dx and not len(self.current_dataset.x) == \ |
---|
151 | len(self.current_dataset.dx): |
---|
152 | msg = "ASCII Reader error: Number of Q and dQ data points are" |
---|
153 | msg += " different in {}.".format(filepath) |
---|
154 | # TODO: Add error to self.current_datainfo.errors instead? |
---|
155 | self.set_all_to_none() |
---|
156 | raise FileContentsException(msg) |
---|
157 | |
---|
158 | self.remove_empty_q_values() |
---|
159 | self.current_dataset = self.set_default_1d_units(self.current_dataset) |
---|
160 | |
---|
161 | # Store loading process information |
---|
162 | self.current_datainfo.meta_data['loader'] = self.type_name |
---|
163 | self.send_to_output() |
---|