[959eb01] | 1 | """ |
---|
[b09095a] | 2 | Generic multi-column ASCII data reader |
---|
[959eb01] | 3 | """ |
---|
| 4 | ############################################################################ |
---|
[b09095a] | 5 | # This software was developed by the University of Tennessee as part of the |
---|
| 6 | # Distributed Data Analysis of Neutron Scattering Experiments (DANSE) |
---|
| 7 | # project funded by the US National Science Foundation. |
---|
| 8 | # If you use DANSE applications to do scientific research that leads to |
---|
| 9 | # publication, we ask that you acknowledge the use of the software with the |
---|
| 10 | # following sentence: |
---|
| 11 | # This work benefited from DANSE software developed under NSF award DMR-0520547. |
---|
| 12 | # copyright 2008, University of Tennessee |
---|
[959eb01] | 13 | ############################################################################# |
---|
| 14 | |
---|
[b09095a] | 15 | import logging |
---|
[959eb01] | 16 | import numpy as np |
---|
[b09095a] | 17 | from sas.sascalc.dataloader.file_reader_base_class import FileReader |
---|
| 18 | from sas.sascalc.dataloader.data_info import DataInfo, plottable_1D |
---|
[959eb01] | 19 | |
---|
[b09095a] | 20 | logger = logging.getLogger(__name__) |
---|
[959eb01] | 21 | |
---|
| 22 | |
---|
[b09095a] | 23 | class Reader(FileReader): |
---|
[959eb01] | 24 | """ |
---|
| 25 | Class to load ascii files (2, 3 or 4 columns). |
---|
| 26 | """ |
---|
[b09095a] | 27 | # File type |
---|
[959eb01] | 28 | type_name = "ASCII" |
---|
[b09095a] | 29 | # Wildcards |
---|
[959eb01] | 30 | type = ["ASCII files (*.txt)|*.txt", |
---|
| 31 | "ASCII files (*.dat)|*.dat", |
---|
| 32 | "ASCII files (*.abs)|*.abs", |
---|
| 33 | "CSV files (*.csv)|*.csv"] |
---|
[b09095a] | 34 | # List of allowed extensions |
---|
| 35 | ext = ['.txt', '.dat', '.abs', '.csv'] |
---|
| 36 | # Flag to bypass extension check |
---|
[959eb01] | 37 | allow_all = True |
---|
[b09095a] | 38 | # data unless that is the only data |
---|
| 39 | min_data_pts = 5 |
---|
[959eb01] | 40 | |
---|
[b09095a] | 41 | def get_file_contents(self): |
---|
[959eb01] | 42 | """ |
---|
[b09095a] | 43 | Get the contents of the file |
---|
[959eb01] | 44 | """ |
---|
[b09095a] | 45 | |
---|
| 46 | buff = self.f_open.read() |
---|
| 47 | filepath = self.f_open.name |
---|
| 48 | lines = buff.splitlines() |
---|
| 49 | self.output = [] |
---|
| 50 | self.current_datainfo = DataInfo() |
---|
| 51 | self.current_datainfo.filename = filepath |
---|
| 52 | self.reset_data_list(len(lines)) |
---|
| 53 | |
---|
| 54 | # The first good line of data will define whether |
---|
| 55 | # we have 2-column or 3-column ascii |
---|
| 56 | has_error_dx = None |
---|
| 57 | has_error_dy = None |
---|
| 58 | |
---|
| 59 | # Initialize counters for data lines and header lines. |
---|
| 60 | is_data = False |
---|
| 61 | # More than "5" lines of data is considered as actual |
---|
| 62 | # To count # of current data candidate lines |
---|
| 63 | candidate_lines = 0 |
---|
| 64 | # To count total # of previous data candidate lines |
---|
| 65 | candidate_lines_previous = 0 |
---|
| 66 | # Current line number |
---|
| 67 | line_no = 0 |
---|
| 68 | # minimum required number of columns of data |
---|
| 69 | lentoks = 2 |
---|
| 70 | for line in lines: |
---|
| 71 | toks = self.splitline(line.strip()) |
---|
| 72 | # To remember the number of columns in the current line of data |
---|
| 73 | new_lentoks = len(toks) |
---|
| 74 | try: |
---|
| 75 | if new_lentoks == 0: |
---|
| 76 | # If the line is blank, skip and continue on |
---|
| 77 | # In case of breaks within data sets. |
---|
| 78 | continue |
---|
| 79 | elif new_lentoks != lentoks and is_data: |
---|
| 80 | # If a footer is found, break the loop and save the data |
---|
| 81 | break |
---|
| 82 | elif new_lentoks != lentoks and not is_data: |
---|
| 83 | # If header lines are numerical |
---|
| 84 | candidate_lines = 0 |
---|
| 85 | self.reset_data_list(len(lines) - line_no) |
---|
| 86 | |
---|
[8ffafd1] | 87 | self.current_dataset.x[candidate_lines] = float(toks[0]) |
---|
| 88 | self.current_dataset.y[candidate_lines] = float(toks[1]) |
---|
[b09095a] | 89 | |
---|
| 90 | # If a 3rd row is present, consider it dy |
---|
| 91 | if new_lentoks > 2: |
---|
[8ffafd1] | 92 | self.current_dataset.dy[candidate_lines] = \ |
---|
[b09095a] | 93 | float(toks[2]) |
---|
| 94 | has_error_dy = True |
---|
| 95 | |
---|
| 96 | # If a 4th row is present, consider it dx |
---|
| 97 | if new_lentoks > 3: |
---|
[8ffafd1] | 98 | self.current_dataset.dx[candidate_lines] = \ |
---|
[b09095a] | 99 | float(toks[3]) |
---|
| 100 | has_error_dx = True |
---|
| 101 | |
---|
[8ffafd1] | 102 | candidate_lines += 1 |
---|
| 103 | # If 5 or more lines, this is considering the set data |
---|
| 104 | if candidate_lines >= self.min_data_pts: |
---|
| 105 | is_data = True |
---|
| 106 | |
---|
[b09095a] | 107 | # To remember the # of columns on the current line |
---|
| 108 | # for the next line of data |
---|
| 109 | lentoks = new_lentoks |
---|
| 110 | line_no += 1 |
---|
| 111 | except ValueError: |
---|
| 112 | # It is data and meet non - number, then stop reading |
---|
| 113 | if is_data: |
---|
| 114 | break |
---|
| 115 | # Delete the previously stored lines of data candidates if |
---|
| 116 | # the list is not data |
---|
| 117 | self.reset_data_list(len(lines) - line_no) |
---|
| 118 | lentoks = 2 |
---|
[959eb01] | 119 | has_error_dx = None |
---|
| 120 | has_error_dy = None |
---|
[b09095a] | 121 | # Reset # of lines of data candidates |
---|
[959eb01] | 122 | candidate_lines = 0 |
---|
[b09095a] | 123 | except Exception: |
---|
| 124 | # Handle any unexpected exceptions |
---|
| 125 | raise |
---|
| 126 | |
---|
| 127 | if not is_data: |
---|
| 128 | # TODO: Check file extension - primary reader, throw error. |
---|
| 129 | # TODO: Secondary check, pass and try next reader |
---|
| 130 | msg = "ascii_reader: x has no data" |
---|
| 131 | raise RuntimeError(msg) |
---|
| 132 | # Sanity check |
---|
| 133 | if has_error_dy and not len(self.current_dataset.y) == \ |
---|
| 134 | len(self.current_dataset.dy): |
---|
| 135 | msg = "ascii_reader: y and dy have different length" |
---|
| 136 | raise RuntimeError(msg) |
---|
| 137 | if has_error_dx and not len(self.current_dataset.x) == \ |
---|
| 138 | len(self.current_dataset.dx): |
---|
| 139 | msg = "ascii_reader: y and dy have different length" |
---|
| 140 | raise RuntimeError(msg) |
---|
| 141 | # If the data length is zero, consider this as |
---|
| 142 | # though we were not able to read the file. |
---|
| 143 | if len(self.current_dataset.x) < 1: |
---|
| 144 | raise RuntimeError("ascii_reader: could not load file") |
---|
| 145 | |
---|
| 146 | # Data |
---|
[8ffafd1] | 147 | x = self.current_dataset.x |
---|
| 148 | self.current_dataset.x = self.current_dataset.x[x != 0] |
---|
| 149 | self.current_dataset.y = self.current_dataset.y[x != 0] |
---|
| 150 | self.current_dataset.dy = self.current_dataset.dy[x != 0] if \ |
---|
| 151 | has_error_dy else np.zeros(len(self.current_dataset.y)) |
---|
| 152 | self.current_dataset.dx = self.current_dataset.dx[x != 0] if \ |
---|
| 153 | has_error_dx else np.zeros(len(self.current_dataset.x)) |
---|
[b09095a] | 154 | |
---|
| 155 | self.current_dataset.xaxis("\\rm{Q}", 'A^{-1}') |
---|
| 156 | self.current_dataset.yaxis("\\rm{Intensity}", "cm^{-1}") |
---|
| 157 | |
---|
| 158 | # Store loading process information |
---|
| 159 | self.current_datainfo.meta_data['loader'] = self.type_name |
---|
| 160 | self.send_to_output() |
---|
| 161 | |
---|
| 162 | def reset_data_list(self, no_lines): |
---|
[959eb01] | 163 | """ |
---|
[b09095a] | 164 | Reset the plottable_1D object |
---|
[959eb01] | 165 | """ |
---|
[b09095a] | 166 | # Initialize data sets with arrays the maximum possible size |
---|
| 167 | x = np.zeros(no_lines) |
---|
| 168 | y = np.zeros(no_lines) |
---|
| 169 | dy = np.zeros(no_lines) |
---|
| 170 | dx = np.zeros(no_lines) |
---|
| 171 | self.current_dataset = plottable_1D(x, y, dx, dy) |
---|