[959eb01] | 1 | """ |
---|
[b09095a] | 2 | Generic multi-column ASCII data reader |
---|
[959eb01] | 3 | """ |
---|
| 4 | ############################################################################ |
---|
[b09095a] | 5 | # This software was developed by the University of Tennessee as part of the |
---|
| 6 | # Distributed Data Analysis of Neutron Scattering Experiments (DANSE) |
---|
| 7 | # project funded by the US National Science Foundation. |
---|
| 8 | # If you use DANSE applications to do scientific research that leads to |
---|
| 9 | # publication, we ask that you acknowledge the use of the software with the |
---|
| 10 | # following sentence: |
---|
| 11 | # This work benefited from DANSE software developed under NSF award DMR-0520547. |
---|
| 12 | # copyright 2008, University of Tennessee |
---|
[959eb01] | 13 | ############################################################################# |
---|
| 14 | |
---|
[b09095a] | 15 | import logging |
---|
| 16 | from sas.sascalc.dataloader.file_reader_base_class import FileReader |
---|
| 17 | from sas.sascalc.dataloader.data_info import DataInfo, plottable_1D |
---|
[da8bb53] | 18 | from sas.sascalc.dataloader.loader_exceptions import FileContentsException,\ |
---|
| 19 | DefaultReaderException |
---|
[959eb01] | 20 | |
---|
[b09095a] | 21 | logger = logging.getLogger(__name__) |
---|
[959eb01] | 22 | |
---|
| 23 | |
---|
[b09095a] | 24 | class Reader(FileReader): |
---|
[959eb01] | 25 | """ |
---|
| 26 | Class to load ascii files (2, 3 or 4 columns). |
---|
| 27 | """ |
---|
[b09095a] | 28 | # File type |
---|
[959eb01] | 29 | type_name = "ASCII" |
---|
[b09095a] | 30 | # Wildcards |
---|
[959eb01] | 31 | type = ["ASCII files (*.txt)|*.txt", |
---|
| 32 | "ASCII files (*.dat)|*.dat", |
---|
| 33 | "ASCII files (*.abs)|*.abs", |
---|
| 34 | "CSV files (*.csv)|*.csv"] |
---|
[b09095a] | 35 | # List of allowed extensions |
---|
| 36 | ext = ['.txt', '.dat', '.abs', '.csv'] |
---|
| 37 | # Flag to bypass extension check |
---|
[959eb01] | 38 | allow_all = True |
---|
[b09095a] | 39 | # data unless that is the only data |
---|
| 40 | min_data_pts = 5 |
---|
[959eb01] | 41 | |
---|
[b09095a] | 42 | def get_file_contents(self): |
---|
[959eb01] | 43 | """ |
---|
[b09095a] | 44 | Get the contents of the file |
---|
[959eb01] | 45 | """ |
---|
[b09095a] | 46 | |
---|
[f7d720f] | 47 | buff = self.readall() |
---|
[b09095a] | 48 | filepath = self.f_open.name |
---|
| 49 | lines = buff.splitlines() |
---|
| 50 | self.output = [] |
---|
| 51 | self.current_datainfo = DataInfo() |
---|
| 52 | self.current_datainfo.filename = filepath |
---|
| 53 | self.reset_data_list(len(lines)) |
---|
| 54 | |
---|
| 55 | # The first good line of data will define whether |
---|
| 56 | # we have 2-column or 3-column ascii |
---|
| 57 | has_error_dx = None |
---|
| 58 | has_error_dy = None |
---|
| 59 | |
---|
| 60 | # Initialize counters for data lines and header lines. |
---|
| 61 | is_data = False |
---|
| 62 | # More than "5" lines of data is considered as actual |
---|
| 63 | # To count # of current data candidate lines |
---|
| 64 | candidate_lines = 0 |
---|
| 65 | # To count total # of previous data candidate lines |
---|
| 66 | candidate_lines_previous = 0 |
---|
| 67 | # Current line number |
---|
| 68 | line_no = 0 |
---|
| 69 | # minimum required number of columns of data |
---|
| 70 | lentoks = 2 |
---|
| 71 | for line in lines: |
---|
| 72 | toks = self.splitline(line.strip()) |
---|
| 73 | # To remember the number of columns in the current line of data |
---|
| 74 | new_lentoks = len(toks) |
---|
| 75 | try: |
---|
| 76 | if new_lentoks == 0: |
---|
| 77 | # If the line is blank, skip and continue on |
---|
| 78 | # In case of breaks within data sets. |
---|
| 79 | continue |
---|
| 80 | elif new_lentoks != lentoks and is_data: |
---|
| 81 | # If a footer is found, break the loop and save the data |
---|
| 82 | break |
---|
| 83 | elif new_lentoks != lentoks and not is_data: |
---|
| 84 | # If header lines are numerical |
---|
| 85 | candidate_lines = 0 |
---|
| 86 | self.reset_data_list(len(lines) - line_no) |
---|
| 87 | |
---|
[8ffafd1] | 88 | self.current_dataset.x[candidate_lines] = float(toks[0]) |
---|
[080d88e] | 89 | |
---|
| 90 | if new_lentoks > 1: |
---|
| 91 | self.current_dataset.y[candidate_lines] = float(toks[1]) |
---|
[b09095a] | 92 | |
---|
| 93 | # If a 3rd row is present, consider it dy |
---|
| 94 | if new_lentoks > 2: |
---|
[8ffafd1] | 95 | self.current_dataset.dy[candidate_lines] = \ |
---|
[b09095a] | 96 | float(toks[2]) |
---|
| 97 | has_error_dy = True |
---|
| 98 | |
---|
| 99 | # If a 4th row is present, consider it dx |
---|
| 100 | if new_lentoks > 3: |
---|
[8ffafd1] | 101 | self.current_dataset.dx[candidate_lines] = \ |
---|
[b09095a] | 102 | float(toks[3]) |
---|
| 103 | has_error_dx = True |
---|
| 104 | |
---|
[8ffafd1] | 105 | candidate_lines += 1 |
---|
| 106 | # If 5 or more lines, this is considering the set data |
---|
| 107 | if candidate_lines >= self.min_data_pts: |
---|
| 108 | is_data = True |
---|
| 109 | |
---|
[f994e8b1] | 110 | if is_data and new_lentoks >= 8: |
---|
| 111 | msg = "This data looks like 2D ASCII data. Use the file " |
---|
| 112 | msg += "converter tool to convert it to NXcanSAS." |
---|
| 113 | raise FileContentsException(msg) |
---|
| 114 | |
---|
[b09095a] | 115 | # To remember the # of columns on the current line |
---|
| 116 | # for the next line of data |
---|
| 117 | lentoks = new_lentoks |
---|
| 118 | line_no += 1 |
---|
| 119 | except ValueError: |
---|
[da8bb53] | 120 | # ValueError is raised when non numeric strings conv. to float |
---|
[b09095a] | 121 | # It is data and meet non - number, then stop reading |
---|
| 122 | if is_data: |
---|
| 123 | break |
---|
| 124 | # Delete the previously stored lines of data candidates if |
---|
| 125 | # the list is not data |
---|
| 126 | self.reset_data_list(len(lines) - line_no) |
---|
| 127 | lentoks = 2 |
---|
[959eb01] | 128 | has_error_dx = None |
---|
| 129 | has_error_dy = None |
---|
[b09095a] | 130 | # Reset # of lines of data candidates |
---|
[959eb01] | 131 | candidate_lines = 0 |
---|
[574adc7] | 132 | |
---|
[b09095a] | 133 | if not is_data: |
---|
[da8bb53] | 134 | self.set_all_to_none() |
---|
| 135 | if self.extension in self.ext: |
---|
| 136 | msg = "ASCII Reader error: Fewer than five Q data points found " |
---|
| 137 | msg += "in {}.".format(filepath) |
---|
| 138 | raise FileContentsException(msg) |
---|
| 139 | else: |
---|
| 140 | msg = "ASCII Reader could not load the file {}".format(filepath) |
---|
| 141 | raise DefaultReaderException(msg) |
---|
[b09095a] | 142 | # Sanity check |
---|
| 143 | if has_error_dy and not len(self.current_dataset.y) == \ |
---|
| 144 | len(self.current_dataset.dy): |
---|
[da8bb53] | 145 | msg = "ASCII Reader error: Number of I and dI data points are" |
---|
| 146 | msg += " different in {}.".format(filepath) |
---|
| 147 | # TODO: Add error to self.current_datainfo.errors instead? |
---|
| 148 | self.set_all_to_none() |
---|
| 149 | raise FileContentsException(msg) |
---|
[b09095a] | 150 | if has_error_dx and not len(self.current_dataset.x) == \ |
---|
| 151 | len(self.current_dataset.dx): |
---|
[da8bb53] | 152 | msg = "ASCII Reader error: Number of Q and dQ data points are" |
---|
| 153 | msg += " different in {}.".format(filepath) |
---|
| 154 | # TODO: Add error to self.current_datainfo.errors instead? |
---|
| 155 | self.set_all_to_none() |
---|
| 156 | raise FileContentsException(msg) |
---|
[b09095a] | 157 | |
---|
[7b07fbe] | 158 | self.remove_empty_q_values() |
---|
[3bab401] | 159 | self.current_dataset = self.set_default_1d_units(self.current_dataset) |
---|
[b09095a] | 160 | |
---|
| 161 | # Store loading process information |
---|
| 162 | self.current_datainfo.meta_data['loader'] = self.type_name |
---|
| 163 | self.send_to_output() |
---|