1 | """ |
---|
2 | Here we handle loading of "OTOKO" data (for more info about this format see |
---|
3 | the comment in load_otoko_data). Given the paths of header and data files, we |
---|
4 | aim to load the data into numpy arrays for use later. |
---|
5 | """ |
---|
6 | |
---|
7 | import itertools |
---|
8 | import os |
---|
9 | import struct |
---|
10 | import numpy as np |
---|
11 | |
---|
12 | class CStyleStruct: |
---|
13 | """A nice and easy way to get "C-style struct" functionality.""" |
---|
14 | def __init__(self, **kwds): |
---|
15 | self.__dict__.update(kwds) |
---|
16 | |
---|
17 | class OTOKOParsingError(Exception): |
---|
18 | pass |
---|
19 | |
---|
20 | class OTOKOData: |
---|
21 | def __init__(self, q_axis, data_axis): |
---|
22 | self.q_axis = q_axis |
---|
23 | self.data_axis = data_axis |
---|
24 | |
---|
25 | class OTOKOLoader(object): |
---|
26 | |
---|
27 | def __init__(self, qaxis_path, data_path): |
---|
28 | self.qaxis_path = qaxis_path |
---|
29 | self.data_path = data_path |
---|
30 | |
---|
31 | def load_otoko_data(self): |
---|
32 | """ |
---|
33 | Loads "OTOKO" data, which is a format that stores each axis separately. |
---|
34 | An axis is represented by a "header" file, which in turn will give details |
---|
35 | of one or more binary files where the actual data is stored. |
---|
36 | |
---|
37 | Given the paths of two header files, this function will load each axis in |
---|
38 | turn. If loading is successful then an instance of the OTOKOData class |
---|
39 | will be returned, else an exception will be raised. |
---|
40 | |
---|
41 | For more information on the OTOKO file format, please see: |
---|
42 | http://www.diamond.ac.uk/Home/Beamlines/small-angle/SAXS-Software/CCP13/ |
---|
43 | XOTOKO.html |
---|
44 | """ |
---|
45 | q_axis = self._load_otoko_axis(self.qaxis_path) |
---|
46 | data_axis = self._load_otoko_axis(self.data_path) |
---|
47 | |
---|
48 | return OTOKOData(q_axis, data_axis) |
---|
49 | |
---|
50 | def _load_otoko_axis(self, header_path): |
---|
51 | """ |
---|
52 | Loads an "OTOKO" axis, given the header file path. Essentially, the |
---|
53 | header file contains information about the data in the form of integer |
---|
54 | "indicators", as well as the names of each of the binary files which are |
---|
55 | assumed to be in the same directory as the header. |
---|
56 | """ |
---|
57 | if not os.path.exists(header_path): |
---|
58 | raise OTOKOParsingError("The header file %s does not exist." % header_path) |
---|
59 | |
---|
60 | binary_file_info_list = [] |
---|
61 | total_frames = 0 |
---|
62 | header_dir = os.path.dirname(os.path.abspath(header_path)) |
---|
63 | |
---|
64 | with open(header_path, "r") as header_file: |
---|
65 | lines = header_file.readlines() |
---|
66 | if len(lines) < 4: |
---|
67 | raise OTOKOParsingError("Expected more lines in %s." % header_path) |
---|
68 | |
---|
69 | info = lines[0] + lines[1] |
---|
70 | |
---|
71 | def pairwise(iterable): |
---|
72 | """ |
---|
73 | s -> (s0,s1), (s2,s3), (s4, s5), ... |
---|
74 | From http://stackoverflow.com/a/5389547/778572 |
---|
75 | """ |
---|
76 | a = iter(iterable) |
---|
77 | return itertools.izip(a, a) |
---|
78 | |
---|
79 | for indicators, filename in pairwise(lines[2:]): |
---|
80 | indicators = indicators.split() |
---|
81 | |
---|
82 | if len(indicators) != 10: |
---|
83 | raise OTOKOParsingError( |
---|
84 | "Expected 10 integer indicators on line 3 of %s." \ |
---|
85 | % header_path) |
---|
86 | if not all([i.isdigit() for i in indicators]): |
---|
87 | raise OTOKOParsingError( |
---|
88 | "Expected all indicators on line 3 of %s to be integers." \ |
---|
89 | % header_path) |
---|
90 | |
---|
91 | binary_file_info = CStyleStruct( |
---|
92 | # The indicators at indices 4 to 8 are always zero since they |
---|
93 | # have been reserved for future use by the format. Also, the |
---|
94 | # "last_file" indicator seems to be there for legacy reasons, |
---|
95 | # as it doesn't appear to be something we have to bother |
---|
96 | # enforcing correct use of; we just define the last file as |
---|
97 | # being the last file in the list. |
---|
98 | file_path = os.path.join(header_dir, filename.strip()), |
---|
99 | n_channels = int(indicators[0]), |
---|
100 | n_frames = int(indicators[1]), |
---|
101 | dimensions = int(indicators[2]), |
---|
102 | swap_bytes = int(indicators[3]) == 0, |
---|
103 | last_file = int(indicators[9]) == 0 # We don't use this. |
---|
104 | ) |
---|
105 | if binary_file_info.dimensions != 1: |
---|
106 | msg = "File {} has {} dimensions, expected 1. Is it a BSL file?" |
---|
107 | raise OTOKOParsingError(msg.format(filename.strip(), |
---|
108 | binary_file_info.dimensions)) |
---|
109 | |
---|
110 | binary_file_info_list.append(binary_file_info) |
---|
111 | |
---|
112 | total_frames += binary_file_info.n_frames |
---|
113 | |
---|
114 | # Check that all binary files are listed in the header as having the same |
---|
115 | # number of channels, since I don't think CorFunc can handle ragged data. |
---|
116 | all_n_channels = [info.n_channels for info in binary_file_info_list] |
---|
117 | if not all(all_n_channels[0] == c for c in all_n_channels): |
---|
118 | raise OTOKOParsingError( |
---|
119 | "Expected all binary files listed in %s to have the same number of channels." % header_path) |
---|
120 | |
---|
121 | data = np.zeros(shape=(total_frames, all_n_channels[0])) |
---|
122 | frames_so_far = 0 |
---|
123 | |
---|
124 | for info in binary_file_info_list: |
---|
125 | if not os.path.exists(info.file_path): |
---|
126 | raise OTOKOParsingError( |
---|
127 | "The data file %s does not exist." % info.file_path) |
---|
128 | |
---|
129 | with open(info.file_path, "rb") as binary_file: |
---|
130 | # Ideally we'd like to use numpy's fromfile() to read in binary |
---|
131 | # data, but we are forced to roll our own float-by-float file |
---|
132 | # reader because of the rules imposed on us by the file format; |
---|
133 | # namely, if the swap indicator flag has been raised then the bytes |
---|
134 | # of each float occur in reverse order. |
---|
135 | for frame in range(info.n_frames): |
---|
136 | for channel in range(info.n_channels): |
---|
137 | b = bytes(binary_file.read(4)) |
---|
138 | if info.swap_bytes: |
---|
139 | b = b[::-1] # "Extended slice" syntax, used to reverse. |
---|
140 | value = struct.unpack('f', b)[0] |
---|
141 | data[frames_so_far + frame][channel] = value |
---|
142 | |
---|
143 | frames_so_far += info.n_frames |
---|
144 | |
---|
145 | return CStyleStruct( |
---|
146 | header_path = header_path, |
---|
147 | data = data, |
---|
148 | binary_file_info_list = binary_file_info_list, |
---|
149 | header_info = info |
---|
150 | ) |
---|