[4b862c4] | 1 | """ |
---|
| 2 | Here we handle loading of "OTOKO" data (for more info about this format see |
---|
[9c500ab] | 3 | the comment in load_otoko_data). Given the paths of header and data files, we |
---|
[4b862c4] | 4 | aim to load the data into numpy arrays for use later. |
---|
| 5 | """ |
---|
| 6 | |
---|
| 7 | import itertools |
---|
| 8 | import os |
---|
| 9 | import struct |
---|
| 10 | import numpy as np |
---|
| 11 | |
---|
| 12 | class CStyleStruct: |
---|
| 13 | """A nice and easy way to get "C-style struct" functionality.""" |
---|
| 14 | def __init__(self, **kwds): |
---|
| 15 | self.__dict__.update(kwds) |
---|
| 16 | |
---|
[ba65aff] | 17 | class OTOKOParsingError(Exception): |
---|
[4b862c4] | 18 | pass |
---|
| 19 | |
---|
[ba65aff] | 20 | class OTOKOData: |
---|
[4b862c4] | 21 | def __init__(self, q_axis, data_axis): |
---|
| 22 | self.q_axis = q_axis |
---|
| 23 | self.data_axis = data_axis |
---|
| 24 | |
---|
[ba65aff] | 25 | class OTOKOLoader(object): |
---|
[4b862c4] | 26 | |
---|
| 27 | def __init__(self, qaxis_path, data_path): |
---|
| 28 | self.qaxis_path = qaxis_path |
---|
| 29 | self.data_path = data_path |
---|
| 30 | |
---|
[9c500ab] | 31 | def load_otoko_data(self): |
---|
[4b862c4] | 32 | """ |
---|
| 33 | Loads "OTOKO" data, which is a format that stores each axis separately. |
---|
| 34 | An axis is represented by a "header" file, which in turn will give details |
---|
| 35 | of one or more binary files where the actual data is stored. |
---|
| 36 | |
---|
| 37 | Given the paths of two header files, this function will load each axis in |
---|
[ba65aff] | 38 | turn. If loading is successfull then an instance of the OTOKOData class |
---|
[4b862c4] | 39 | will be returned, else an exception will be raised. |
---|
| 40 | |
---|
| 41 | For more information on the OTOKO file format, please see: |
---|
| 42 | http://www.diamond.ac.uk/Home/Beamlines/small-angle/SAXS-Software/CCP13/ |
---|
| 43 | XOTOKO.html |
---|
| 44 | """ |
---|
[9c500ab] | 45 | q_axis = self._load_otoko_axis(self.qaxis_path) |
---|
| 46 | data_axis = self._load_otoko_axis(self.data_path) |
---|
[4b862c4] | 47 | |
---|
[ba65aff] | 48 | return OTOKOData(q_axis, data_axis) |
---|
[4b862c4] | 49 | |
---|
[9c500ab] | 50 | def _load_otoko_axis(self, header_path): |
---|
[4b862c4] | 51 | """ |
---|
| 52 | Loads an "OTOKO" axis, given the header file path. Essentially, the |
---|
| 53 | header file contains information about the data in the form of integer |
---|
| 54 | "indicators", as well as the names of each of the binary files which are |
---|
| 55 | assumed to be in the same directory as the header. |
---|
| 56 | """ |
---|
| 57 | if not os.path.exists(header_path): |
---|
[ba65aff] | 58 | raise OTOKOParsingError("The header file %s does not exist." % header_path) |
---|
[4b862c4] | 59 | |
---|
| 60 | binary_file_info_list = [] |
---|
| 61 | total_frames = 0 |
---|
| 62 | header_dir = os.path.dirname(os.path.abspath(header_path)) |
---|
| 63 | |
---|
| 64 | with open(header_path, "r") as header_file: |
---|
| 65 | lines = header_file.readlines() |
---|
| 66 | if len(lines) < 4: |
---|
[ba65aff] | 67 | raise OTOKOParsingError("Expected more lines in %s." % header_path) |
---|
[4b862c4] | 68 | |
---|
| 69 | info = lines[0] + lines[1] |
---|
| 70 | |
---|
| 71 | def pairwise(iterable): |
---|
| 72 | """ |
---|
| 73 | s -> (s0,s1), (s2,s3), (s4, s5), ... |
---|
| 74 | From http://stackoverflow.com/a/5389547/778572 |
---|
| 75 | """ |
---|
| 76 | a = iter(iterable) |
---|
| 77 | return itertools.izip(a, a) |
---|
| 78 | |
---|
| 79 | for indicators, filename in pairwise(lines[2:]): |
---|
| 80 | indicators = indicators.split() |
---|
| 81 | |
---|
| 82 | if len(indicators) != 10: |
---|
[ba65aff] | 83 | raise OTOKOParsingError( |
---|
[4b862c4] | 84 | "Expected 10 integer indicators on line 3 of %s." \ |
---|
| 85 | % header_path) |
---|
| 86 | if not all([i.isdigit() for i in indicators]): |
---|
[ba65aff] | 87 | raise OTOKOParsingError( |
---|
[4b862c4] | 88 | "Expected all indicators on line 3 of %s to be integers." \ |
---|
| 89 | % header_path) |
---|
| 90 | |
---|
| 91 | binary_file_info = CStyleStruct( |
---|
| 92 | # The indicators at indices 4 to 8 are always zero since they |
---|
| 93 | # have been reserved for future use by the format. Also, the |
---|
| 94 | # "last_file" indicator seems to be there for legacy reasons, |
---|
| 95 | # as it doesn't appear to be something we have to bother |
---|
| 96 | # enforcing correct use of; we just define the last file as |
---|
| 97 | # being the last file in the list. |
---|
| 98 | file_path = os.path.join(header_dir, filename.strip()), |
---|
| 99 | n_channels = int(indicators[0]), |
---|
| 100 | n_frames = int(indicators[1]), |
---|
| 101 | dimensions = int(indicators[2]), |
---|
| 102 | swap_bytes = int(indicators[3]) == 0, |
---|
| 103 | last_file = int(indicators[9]) == 0 # We don't use this. |
---|
| 104 | ) |
---|
[9c500ab] | 105 | if binary_file_info.dimensions != 1: |
---|
| 106 | msg = "File {} has {} dimensions, expected 1. Is it a BSL file?" |
---|
| 107 | raise OTOKOParsingError(msg.format(filename.strip(), |
---|
| 108 | binary_file_info.dimensions)) |
---|
| 109 | |
---|
[4b862c4] | 110 | binary_file_info_list.append(binary_file_info) |
---|
| 111 | |
---|
| 112 | total_frames += binary_file_info.n_frames |
---|
| 113 | |
---|
| 114 | # Check that all binary files are listed in the header as having the same |
---|
| 115 | # number of channels, since I don't think CorFunc can handle ragged data. |
---|
| 116 | all_n_channels = [info.n_channels for info in binary_file_info_list] |
---|
| 117 | if not all(all_n_channels[0] == c for c in all_n_channels): |
---|
[ba65aff] | 118 | raise OTOKOParsingError( |
---|
[4b862c4] | 119 | "Expected all binary files listed in %s to have the same number of channels." % header_path) |
---|
| 120 | |
---|
| 121 | data = np.zeros(shape=(total_frames, all_n_channels[0])) |
---|
| 122 | frames_so_far = 0 |
---|
| 123 | |
---|
| 124 | for info in binary_file_info_list: |
---|
| 125 | if not os.path.exists(info.file_path): |
---|
[ba65aff] | 126 | raise OTOKOParsingError( |
---|
[4b862c4] | 127 | "The data file %s does not exist." % info.file_path) |
---|
| 128 | |
---|
| 129 | with open(info.file_path, "rb") as binary_file: |
---|
| 130 | # Ideally we'd like to use numpy's fromfile() to read in binary |
---|
| 131 | # data, but we are forced to roll our own float-by-float file |
---|
| 132 | # reader because of the rules imposed on us by the file format; |
---|
| 133 | # namely, if the swap indicator flag has been raised then the bytes |
---|
| 134 | # of each float occur in reverse order. |
---|
| 135 | for frame in range(info.n_frames): |
---|
| 136 | for channel in range(info.n_channels): |
---|
| 137 | b = bytes(binary_file.read(4)) |
---|
| 138 | if info.swap_bytes: |
---|
| 139 | b = b[::-1] # "Extended slice" syntax, used to reverse. |
---|
| 140 | value = struct.unpack('f', b)[0] |
---|
| 141 | data[frames_so_far + frame][channel] = value |
---|
| 142 | |
---|
| 143 | frames_so_far += info.n_frames |
---|
| 144 | |
---|
| 145 | return CStyleStruct( |
---|
| 146 | header_path = header_path, |
---|
| 147 | data = data, |
---|
| 148 | binary_file_info_list = binary_file_info_list, |
---|
| 149 | header_info = info |
---|
| 150 | ) |
---|