-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_data.m
364 lines (271 loc) · 14.5 KB
/
generate_data.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
% Supplementary material for the fridgie paper.
% Part 1: The synthetic data.
% This script generates the required data for the neural network to be
% trained. IMPORTANT: Change the main parameters to suit your device.
clear;
clc;
%% Main parameters
% These are the conditions the neural network should detect.
% Feel free to add your own.
labels = ["normal"; "low_refrigerant"; "check_airflow"; "check_condenser"; "check_compressor"; "high_heatload"];
no_of_conditions = length(labels); % Using this for sanity checks
% These specify how many images should be generated for each condition.
% We will generate different distributions with the same parameters.
% The idea is that we train with the training data, and we validate the
% neural network model with the testing data, because it is 'new' data it
% has not seen before.
training_data_per_condition = 10000;
testing_data_per_condition = 2500;
% To maximise the dynamic range of the 8-bit sensor values, we will define
% absolute minimum and maximum temperatures: these are temperature
% boundaries outside which we should never ever go.
% These are in Celsius, but it could be anything else, as long as you are
% consistent with the units throughout.
absolute_mimimum_temp = -20; % This will be 0
absolute_maximum_temp = 80; % This will be 1
% These are the five parameters we are looking at. They are based upon
% my old hunk-of-junk 7 kW R22 system that failed so many times.
% Naturally I didn't leak refrigerant into the atmosphere, and
% I haven't stayed outside in the 45 degrees heat over hours to collect this data.
% The standard deviations are estimated.
% A useful visualisation tool:
% https://www.desmos.com/calculator/0x3rpqtgrx
% The standard deviations are intentionally wide, so it will be difficult to
% train the neural network. In return, it will be more sensitive.
% IMPORTANT: CHANGE THESE TO YOUR SYSTEM! YOU NEED TO MEASURE THIS ON YOUR OWN!
% The order of the data ia as per the labels specified above.
%
% t1_means = [60; 45; 55; 65; 40; 65];
% t1_standard_deviations = [2; 2; 2; 2; 0.1; 2];
%
% t3_means = [48; 40; 45; 60; 40; 45];
% t3_standard_deviations = [2; 2; 2; 2; 0.1; 2];
%
% t2_means = [4; 0; 4; 15; 35; 4];
% t2_standard_deviations = [2; 2; 2; 2; 0.1; 2];
%
% t4_means = [8; 15; 5; 30; 30; 20];
% t4_standard_deviations = [2; 2; 2; 2; 0.1; 2];
%
% d_means = [0.5; 1; 0.8; 0.3; 1; 1];
% d_standard_deviations = [0.1; 0.01; 0.05; 0.06; 0.01; 0.04];
% Following on from reviewers, these numbers came from:
% Kim, M., Payne, W. V., Domanski, P. A., Yoon, S. H., & Hermes, C. J. (2009).
% Performance of a residential heat pump operating in the cooling mode with single faults imposed.
% Applied thermal engineering, 29(4), 770-778.
% https://doi.org/10.1016/j.applthermaleng.2008.04.009
% ...at around 10-20% fault, wherever it's the earliest. What they call 'restriction', I call 'high_heatload'.
% Normal conditions (fig 2): 10-15° superheat, similar subcooling. Evaporator is 4 °C, condenser is 50°C.
% NOTE:
% The standard deviations for the data set have been increased.
% This was done so that the neual network can be more sensitive.
% As a reminder, the values in the vectors below correspond to:
% | Normal | Low refrigerant | Check airflow | Check condenser | Check compressor | High heatload |
% Normal operation:
t1_means(1, 1) = 60; % At the input of the condenser
t3_means(1, 1) = 40; % At the output of the condenser
t2_means(1, 1) = 4; % At the input of the evaporator
t4_means(1, 1) = 14; % At the output of the evaporator
d_means(1, 1) = 0.3; % Compressor utilisation rate
% Low refrigerant:
% Superheat increases, subcooling decreases, compressor works harder.
% Heat exchanger inputs are colder
% Kim et al 2009, Figure 7, 10% fault level
t1_means(2, 1) = 50; % At the input of the condenser
t3_means(2, 1) = 40; % At the output of the condenser
t2_means(2, 1) = 0; % At the input of the evaporator
t4_means(2, 1) = 15; % At the output of the evaporator
d_means(2, 1) = 0.7; % Compressor utilisation rate
% Check airflow (at the evaporator):
% Superheat decreases, subcooling increases, compressor works harder.
% Low heat transfer at the evaporator, lots of heat at the condenser
% Kim et al 2009, Figure 5, 20% fault level
t1_means(3, 1) = 70; % At the input of the condenser
t3_means(3, 1) = 55; % At the output of the condenser
t2_means(3, 1) = 4; % At the input of the evaporator
t4_means(3, 1) = 8; % At the output of the evaporator
d_means(3, 1) = 0.7; % Compressor utilisation rate
% Check condenser:
% Superheat increases, subcooling decreases, compressor works harder.
% Low heat transfer at the condenser, evaporator not cold enough
% Kim et al 2009, Figure 4, 20% fault level
t1_means(4, 1) = 70; % At the input of the condenser
t3_means(4, 1) = 65; % At the output of the condenser
t2_means(4, 1) = 10; % At the input of the evaporator
t4_means(4, 1) = 15; % At the output of the evaporator
d_means(4, 1) = 0.7; % Compressor utilisation rate
% Check compressor:
% This one is the easiest, no pumping action, controller pushes the poor thing hard
t1_means(5, 1) = 45; % At the input of the condenser
t3_means(5, 1) = 40; % At the output of the condenser
t2_means(5, 1) = 15; % At the input of the evaporator
t4_means(5, 1) = 25; % At the output of the evaporator
d_means(5, 1) = 1; % Compressor utilisation rate
% High heatload:
% Superheat increases, subcooling increases, compressor works harder.
% Massive amount of heat exchange at both ends.
% Kim et al 2009, Figure 6, 10% fault level
t1_means(6, 1) = 65; % At the input of the condenser
t3_means(6, 1) = 40; % At the output of the condenser
t2_means(6, 1) = 4; % At the input of the evaporator
t4_means(6, 1) = 20; % At the output of the evaporator
d_means(6, 1) = 0.7; % Compressor utilisation rate
% The RMS errors are given for everything in Table 4.
% I increased these, to create noisier data.
% Effectively, I quadrupled noise levels.
t1_standard_deviations = [1.5; 1.5; 1.5; 1.5; 0.1; 1.5];
t3_standard_deviations = [1.5; 1.5; 1.5; 1.5; 0.1; 1.5];
t2_standard_deviations = [1.5; 1.5; 1.5; 1.5; 0.1; 1.5];
t4_standard_deviations = [1.5; 1.5; 1.5; 1.5; 0.1; 1.5];
d_standard_deviations = [0.1; 0.01; 0.05; 0.06; 0.01; 0.04];
% Sanity checks!
% t1
if(length(t1_means) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of t1_means: %d\n', no_of_conditions, length(t1_means))
error('t1_means does not align with the labels.')
end
if(length(t1_standard_deviations) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of t1_standard_deviations: %d\n', no_of_conditions, length(t1_standard_deveiations))
error('t1_standard_deviations does not align with the labels.')
end
% t3
if(length(t3_means) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of t3_means: %d\n', no_of_conditions, length(t3_means))
error('t3_means does not align with the labels.')
end
if(length(t3_standard_deviations) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of t3_standard_deviations: %d\n', no_of_conditions, length(t3_standard_deveiations))
error('t3_standard_deviations does not align with the labels.')
end
% t2
if(length(t2_means) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of t2_means: %d\n', no_of_conditions, length(t2_means))
error('t2_means does not align with the labels.')
end
if(length(t2_standard_deviations) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of t2_standard_deviations: %d\n', no_of_conditions, length(t2_standard_deveiations))
error('t2_standard_deviations does not align with the labels.')
end
% t4
if(length(t4_means) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of t4_means: %d\n', no_of_conditions, length(t4_means))
error('t4_means does not align with the labels.')
end
if(length(t4_standard_deviations) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of t4_standard_deviations: %d\n', no_of_conditions, length(t4_standard_deveiations))
error('t4_standard_deviations does not align with the labels.')
end
% d
if(length(d_means) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of d_means: %d\n', no_of_conditions, length(d_means))
error('d_means does not align with the labels.')
end
if(length(d_standard_deviations) ~= no_of_conditions)
fprintf('lentgh of labels: %d, length of d_standard_deviations: %d\n', no_of_conditions, length(d_standard_deveiations))
error('d_standard_deviations does not align with the labels.')
end
% We put these together into a table, and check
table_1_in_paper = table(labels, t1_means, t3_means, t2_means, t4_means, d_means)
%% Create normal distributions for each variable
% Preallocate the distribution arrays.
t1_training = zeros(training_data_per_condition, no_of_conditions);
t3_training = zeros(training_data_per_condition, no_of_conditions);
t2_training = zeros(training_data_per_condition, no_of_conditions);
t4_training = zeros(training_data_per_condition, no_of_conditions);
d_training = zeros(training_data_per_condition, no_of_conditions);
t1_testing = zeros(testing_data_per_condition, no_of_conditions);
t3_testing = zeros(testing_data_per_condition, no_of_conditions);
t2_testing = zeros(testing_data_per_condition, no_of_conditions);
t4_testing = zeros(testing_data_per_condition, no_of_conditions);
d_testing = zeros(testing_data_per_condition, no_of_conditions);
% This can be done faster, but this is clearer. There are two loops here, so that the
% random number generator would work with a different seed for the training and testing data.
% Shuffle the random number generator
rng(posixtime(datetime('now', 'TimeZone', 'UTC'))); % UTC unix time. Just for the hell of it.
for i = 1:no_of_conditions
% For each condition, we generate the distributions as specified above.
t1_training(:, i) = normrnd(t1_means(i), t1_standard_deviations(i), [1, training_data_per_condition]);
t3_training(:, i) = normrnd(t3_means(i), t3_standard_deviations(i), [1, training_data_per_condition]);
t2_training(:, i) = normrnd(t2_means(i), t2_standard_deviations(i), [1, training_data_per_condition]);
t4_training(:, i) = normrnd(t4_means(i), t4_standard_deviations(i), [1, training_data_per_condition]);
d_training(:, i) = normrnd(d_means(i), d_standard_deviations(i), [1, training_data_per_condition]);
end
% Shuffle the random number generator, again, this time a little later.
rng(posixtime(datetime('now', 'TimeZone', 'UTC'))); % This will give a different seed from above
for i = 1:no_of_conditions
% For each condition, we generate the distributions as specified above.
t1_testing(:, i) = normrnd(t1_means(i), t1_standard_deviations(i), [1, testing_data_per_condition]);
t3_testing(:, i) = normrnd(t3_means(i), t3_standard_deviations(i), [1, testing_data_per_condition]);
t2_testing(:, i) = normrnd(t2_means(i), t2_standard_deviations(i), [1, testing_data_per_condition]);
t4_testing(:, i) = normrnd(t4_means(i), t4_standard_deviations(i), [1, testing_data_per_condition]);
d_testing(:, i) = normrnd(d_means(i), d_standard_deviations(i), [1, testing_data_per_condition]);
end
%% Create and save the training data
tic; % Measure time here.
% Example figure for the paper.
example_figure_to_show = zeros(no_of_conditions, 5);
fprintf('Gerenaring training data.\nLabels: ')
output_root_dir = 'fridgie_data';
if(exist(sprintf("%s_training", output_root_dir), 'dir'))
% If we have this directory, delete it and its contents.
rmdir(sprintf("%s_training", output_root_dir), 's')
end
% First of all, we make the root directory, and enter it.
mkdir(sprintf("%s_training", output_root_dir));
cd(sprintf("%s_training", output_root_dir));
for i = 1:no_of_conditions
% Create the condition's directory
mkdir(sprintf("%s", labels(i)));
cd(sprintf("%s", labels(i)));
fprintf("%s, ", labels(i));
% And this is the bit where we generate the image
for j = 1:training_data_per_condition
temperatures = [t1_training(j, i), t3_training(j, i), t2_training(j, i), t4_training(j, i)];
% We rescale the temperature data
temperatures_rescaled = rescale(temperatures, 'InputMin', absolute_mimimum_temp, 'InputMax', absolute_maximum_temp);
% We whack the duty cycle information to it as well
normalised_image_data = cat(2, temperatures_rescaled, d_training(j, i));
% Save the image, with custom formatting options
imwrite(normalised_image_data, sprintf("%d.png", j), 'BitDepth', 8);
end
% We generate this for the paper. Just an example image of all the conditions.
example_figure_to_show(i, :) = normalised_image_data;
% Once all done, go up one level
cd ..
end
fprintf("...done!\n")
cd ..
imwrite(example_figure_to_show, 'condition_figure.png')
%% Create and save the testing data
fprintf('Generating testing data.\nLabels: ')
if(exist(sprintf("%s_testing", output_root_dir), 'dir'))
% If we have this directory, delete it and its contents.
rmdir(sprintf("%s_testing", output_root_dir), 's')
end
% First of all, we make the root directory, and enter it.
mkdir(sprintf("%s_testing", output_root_dir));
cd(sprintf("%s_testing", output_root_dir));
for i = 1:no_of_conditions
% Create the condition's directory
mkdir(sprintf("%s", labels(i)));
cd(sprintf("%s", labels(i)));
fprintf('%s, ', labels(i))
% And this is the bit where we generate the image
for j = 1:testing_data_per_condition
temperatures = [t1_training(j, i), t3_training(j, i), t2_training(j, i), t4_training(j, i)];
% We rescale the temperature data
temperatures_rescaled = rescale(temperatures, 'InputMin', absolute_mimimum_temp, 'InputMax', absolute_maximum_temp);
% We whack the duty cycle information to it as well
normalised_image_data = cat(2, temperatures_rescaled, d_training(j, i));
% Save the image, with custom formatting options
imwrite(normalised_image_data, sprintf("%d.png", j), 'BitDepth', 8);
end
% Once all done, go up one level
cd ..
end
fprintf("...done!\n")
cd ..
fprintf("Zipping the data together so you won't have to copy thousands of files ")
zip('use_this_in_the_tensorflow_code', ["fridgie_data_testing", "fridgie_data_training"])
fprintf('...done!\n')
fprintf('All done! Took %.1f minutes.\n', toc/60)