[4b862c4] | 1 | """ |
---|
| 2 | Here we handle loading of "OTOKO" data (for more info about this format see |
---|
| 3 | the comment in load_bsl_data). Given the paths of header and data files, we |
---|
| 4 | aim to load the data into numpy arrays for use later. |
---|
| 5 | """ |
---|
| 6 | |
---|
| 7 | import itertools |
---|
| 8 | import os |
---|
| 9 | import struct |
---|
| 10 | import numpy as np |
---|
| 11 | |
---|
| 12 | class CStyleStruct: |
---|
| 13 | """A nice and easy way to get "C-style struct" functionality.""" |
---|
| 14 | def __init__(self, **kwds): |
---|
| 15 | self.__dict__.update(kwds) |
---|
| 16 | |
---|
| 17 | class BSLParsingError(Exception): |
---|
| 18 | pass |
---|
| 19 | |
---|
| 20 | class BSLData: |
---|
| 21 | def __init__(self, q_axis, data_axis): |
---|
| 22 | self.q_axis = q_axis |
---|
| 23 | self.data_axis = data_axis |
---|
| 24 | |
---|
| 25 | class BSLLoader(object): |
---|
| 26 | |
---|
| 27 | def __init__(self, qaxis_path, data_path): |
---|
| 28 | self.qaxis_path = qaxis_path |
---|
| 29 | self.data_path = data_path |
---|
| 30 | |
---|
| 31 | def load_bsl_data(self): |
---|
| 32 | """ |
---|
| 33 | Loads "OTOKO" data, which is a format that stores each axis separately. |
---|
| 34 | An axis is represented by a "header" file, which in turn will give details |
---|
| 35 | of one or more binary files where the actual data is stored. |
---|
| 36 | |
---|
| 37 | Given the paths of two header files, this function will load each axis in |
---|
| 38 | turn. If loading is successfull then an instance of the BSLData class |
---|
| 39 | will be returned, else an exception will be raised. |
---|
| 40 | |
---|
| 41 | For more information on the OTOKO file format, please see: |
---|
| 42 | http://www.diamond.ac.uk/Home/Beamlines/small-angle/SAXS-Software/CCP13/ |
---|
| 43 | XOTOKO.html |
---|
| 44 | |
---|
| 45 | The BSL format, which is based on OTOKO, is also supported. Find out more |
---|
| 46 | about the BSL format at http://www.diamond.ac.uk/Home/Beamlines/small-angle |
---|
| 47 | /SAXS-Software/CCP13/BSL.html. |
---|
| 48 | """ |
---|
| 49 | q_axis = self._load_bsl_axis(self.qaxis_path) |
---|
| 50 | data_axis = self._load_bsl_axis(self.data_path) |
---|
| 51 | |
---|
| 52 | return BSLData(q_axis, data_axis) |
---|
| 53 | |
---|
| 54 | def _load_bsl_axis(self, header_path): |
---|
| 55 | """ |
---|
| 56 | Loads an "OTOKO" axis, given the header file path. Essentially, the |
---|
| 57 | header file contains information about the data in the form of integer |
---|
| 58 | "indicators", as well as the names of each of the binary files which are |
---|
| 59 | assumed to be in the same directory as the header. |
---|
| 60 | """ |
---|
| 61 | if not os.path.exists(header_path): |
---|
| 62 | raise BSLParsingError("The header file %s does not exist." % header_path) |
---|
| 63 | |
---|
| 64 | binary_file_info_list = [] |
---|
| 65 | total_frames = 0 |
---|
| 66 | header_dir = os.path.dirname(os.path.abspath(header_path)) |
---|
| 67 | |
---|
| 68 | with open(header_path, "r") as header_file: |
---|
| 69 | lines = header_file.readlines() |
---|
| 70 | if len(lines) < 4: |
---|
| 71 | raise BSLParsingError("Expected more lines in %s." % header_path) |
---|
| 72 | |
---|
| 73 | info = lines[0] + lines[1] |
---|
| 74 | |
---|
| 75 | def pairwise(iterable): |
---|
| 76 | """ |
---|
| 77 | s -> (s0,s1), (s2,s3), (s4, s5), ... |
---|
| 78 | From http://stackoverflow.com/a/5389547/778572 |
---|
| 79 | """ |
---|
| 80 | a = iter(iterable) |
---|
| 81 | return itertools.izip(a, a) |
---|
| 82 | |
---|
| 83 | for indicators, filename in pairwise(lines[2:]): |
---|
| 84 | indicators = indicators.split() |
---|
| 85 | |
---|
| 86 | if len(indicators) != 10: |
---|
| 87 | raise BSLParsingError( |
---|
| 88 | "Expected 10 integer indicators on line 3 of %s." \ |
---|
| 89 | % header_path) |
---|
| 90 | if not all([i.isdigit() for i in indicators]): |
---|
| 91 | raise BSLParsingError( |
---|
| 92 | "Expected all indicators on line 3 of %s to be integers." \ |
---|
| 93 | % header_path) |
---|
| 94 | |
---|
| 95 | binary_file_info = CStyleStruct( |
---|
| 96 | # The indicators at indices 4 to 8 are always zero since they |
---|
| 97 | # have been reserved for future use by the format. Also, the |
---|
| 98 | # "last_file" indicator seems to be there for legacy reasons, |
---|
| 99 | # as it doesn't appear to be something we have to bother |
---|
| 100 | # enforcing correct use of; we just define the last file as |
---|
| 101 | # being the last file in the list. |
---|
| 102 | file_path = os.path.join(header_dir, filename.strip()), |
---|
| 103 | n_channels = int(indicators[0]), |
---|
| 104 | n_frames = int(indicators[1]), |
---|
| 105 | dimensions = int(indicators[2]), |
---|
| 106 | swap_bytes = int(indicators[3]) == 0, |
---|
| 107 | last_file = int(indicators[9]) == 0 # We don't use this. |
---|
| 108 | ) |
---|
| 109 | binary_file_info_list.append(binary_file_info) |
---|
| 110 | |
---|
| 111 | total_frames += binary_file_info.n_frames |
---|
| 112 | |
---|
| 113 | # Check that all binary files are listed in the header as having the same |
---|
| 114 | # number of channels, since I don't think CorFunc can handle ragged data. |
---|
| 115 | all_n_channels = [info.n_channels for info in binary_file_info_list] |
---|
| 116 | if not all(all_n_channels[0] == c for c in all_n_channels): |
---|
| 117 | raise BSLParsingError( |
---|
| 118 | "Expected all binary files listed in %s to have the same number of channels." % header_path) |
---|
| 119 | |
---|
| 120 | data = np.zeros(shape=(total_frames, all_n_channels[0])) |
---|
| 121 | frames_so_far = 0 |
---|
| 122 | |
---|
| 123 | for info in binary_file_info_list: |
---|
| 124 | if not os.path.exists(info.file_path): |
---|
| 125 | raise BSLParsingError( |
---|
| 126 | "The data file %s does not exist." % info.file_path) |
---|
| 127 | |
---|
| 128 | with open(info.file_path, "rb") as binary_file: |
---|
| 129 | # Ideally we'd like to use numpy's fromfile() to read in binary |
---|
| 130 | # data, but we are forced to roll our own float-by-float file |
---|
| 131 | # reader because of the rules imposed on us by the file format; |
---|
| 132 | # namely, if the swap indicator flag has been raised then the bytes |
---|
| 133 | # of each float occur in reverse order. |
---|
| 134 | for frame in range(info.n_frames): |
---|
| 135 | for channel in range(info.n_channels): |
---|
| 136 | b = bytes(binary_file.read(4)) |
---|
| 137 | if info.swap_bytes: |
---|
| 138 | b = b[::-1] # "Extended slice" syntax, used to reverse. |
---|
| 139 | value = struct.unpack('f', b)[0] |
---|
| 140 | data[frames_so_far + frame][channel] = value |
---|
| 141 | |
---|
| 142 | frames_so_far += info.n_frames |
---|
| 143 | |
---|
| 144 | return CStyleStruct( |
---|
| 145 | header_path = header_path, |
---|
| 146 | data = data, |
---|
| 147 | binary_file_info_list = binary_file_info_list, |
---|
| 148 | header_info = info |
---|
| 149 | ) |
---|