-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.m
202 lines (170 loc) · 8.3 KB
/
main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
% Colonoscopy Lesions Classification
% Author: Macário Martins <[email protected]>
%% Cleaning the workspace
%
% Just a few commands to close previous windows, clear the workspace and
% clear the command prompt.
%
close all;
clear;
clc;
%% Loading database and Settings
%
% Bellow you will find configurations to be used in neural networks. Feel
% free to test any configuration you want.
%
load('Dataset\gastrointestinal_colonoscopy_lesions_dataset.mat');
diary(char(strcat('log-', char(datetime('now', 'Format', 'y-M-d-H-m-s')), '.txt')));
tic;
%--------------------------------------------------------------------------
% Ilumination settings
%--------------------------------------------------------------------------
all_lights = find(light_type ~= 0); % Select all ilumination indexes
white_light = find(light_type == 1); % Select just white light ilumination
nbi_light = find(light_type == 2); % Select just NBI ilumination
%--------------------------------------------------------------------------
% Database size initial settings
%--------------------------------------------------------------------------
samples_inds = all_lights; % Use it as a pivot to select the ilumination
classes_num = max(class_label(samples_inds)); % Get the number of used classes
clear all_lights white_light nbi_light;
%--------------------------------------------------------------------------
% Neural networks settings
%--------------------------------------------------------------------------
L_mlp = [7, 5]; % 1-by-P vector of neurons for each layer
L_rbf = 5; % Number of neurons for the hidden-layer
learning_rate = 0.01; % Learning rate to be used in weights adjusts
epochs = 500; % Max number of epochs
err = 1e-7; % Mean squared error goal for both neural networks
%% Adjust and Balance the Database and create Label Vectors
%
% The code bellow creates a binary label representation where the correct
% class for a sample is set to 1 and other classes indexes are set to 0.
%
% The dataset is, also, balanced in order to keep the same number of
% samples for every class.
%
%--------------------------------------------------------------------------
% The following lines separate all samples by their classes in order to
% balance and discard extra samples from bigger classes. This task is made
% in order to keep all classes with the same number of samples
%--------------------------------------------------------------------------
classified_samples = {};
classes_upper_bound = size(features(:, samples_inds), 2);
for class = 1:classes_num
classified_samples{class} = find(class_label(samples_inds) == class);
if (size(classified_samples{class}, 2) < classes_upper_bound)
classes_upper_bound = size(classified_samples{class}, 2);
end
end
samples_num = classes_upper_bound; % The samples num is redefined
clear classes_upper_bound;
%--------------------------------------------------------------------------
% Since we know which is the smallest class and the number of samples it
% has (upper bound), it is time to cut off other samples class from classes
% which the number of samples is higher than the upper bound.
%--------------------------------------------------------------------------
rng('shuffle'); % Just to make sure the seed will be randomly chosen
selected_samples_inds = zeros(classes_num, samples_num);
%--------------------------------------------------------------------------
% Select the same number of samples for each class
%--------------------------------------------------------------------------
for class = 1:classes_num
class_samples_num = size(classified_samples{class}, 2);
class_samples_inds = randperm(class_samples_num, samples_num);
selected_samples_inds(class, :) = classified_samples{class}(class_samples_inds);
end
clear class_samples_num;
%--------------------------------------------------------------------------
% Compose the database with just the randomly chosen samples and create the
% label vectors
%--------------------------------------------------------------------------
X = zeros(size(features, 1), classes_num * samples_num);
D = zeros(classes_num, classes_num * samples_num);
samples_count = zeros(classes_num, 1);
database_ind = 1;
for sample = 1:samples_num
for class = 1:classes_num
sample_index = selected_samples_inds(class, sample);
X(:, database_ind) = features(:, sample_index);
D(class, database_ind) = 1;
samples_count(class) = samples_count(class) + 1;
database_ind = database_ind + 1;
end
end
samples_num = samples_num * classes_num; % Samples num is redefined to represent the whole database
clear class database_ind samples_count samples_inds selected_samples_inds;
%% Build Neural Networks
%
% The functions bellow build the neural networks with global parameters,
% specified above.
%
mlp = NeuralNetworks.MLP(X, D, L_mlp, learning_rate, epochs, err);
rbf = NeuralNetworks.RBF(X, D, L_rbf, learning_rate, epochs, err);
%% Validations
%
% The lines bellow makes the cross-validation with Leave-One-Out (LOO) and
% K-Fold with K = 10 (default). You may set a number of trials as well, to
% search for the best and worst cases for k-fold validation. Be carefull
% when selecting the number of trials. Higher numbers will need several
% time to be finished.
%
k = 10; % Use it for k-fold
trials = 10; % Number of times the validations will be called
%--------------------------------------------------------------------------
% Variables bellow store the accuracies for each trial in both LOO and
% KFold validations.
%--------------------------------------------------------------------------
mlp_accuracies = zeros(2, trials);
rbf_accuracies = zeros(2, trials);
%--------------------------------------------------------------------------
% The following variables store the confusion matrices for each LOO and
% KFold validations. Each confusion matrix is already a mean matrix of the
% validation internal executions.
%--------------------------------------------------------------------------
mlp_confusions = zeros(classes_num, classes_num, 2, trials);
rbf_confusions = zeros(classes_num, classes_num, 2, trials);
%--------------------------------------------------------------------------
% "loo_ind" and "kfold_ind" are just names to the indexes. It is good for
% better understanding of the attributions returned from validation methods
%--------------------------------------------------------------------------
loo_ind = 1;
kfold_ind = 2;
for t = 1:trials
fprintf("Trial %2d/%2d", t, trials);
fprintf("\n--------------------------");
fprintf("\n\tMLP - LOO: ");
[accuracy, confusion] = Validations.LOO(mlp, X, D);
mlp_accuracies(loo_ind, t) = accuracy;
mlp_confusions(:, :, loo_ind, t) = confusion;
fprintf("%.4f", mlp_accuracies(loo_ind, t));
fprintf("\n\tRBF - LOO: ");
[accuracy, confusion] = Validations.LOO(rbf, X, D);
rbf_accuracies(loo_ind, t) = accuracy;
rbf_confusions(:, :, loo_ind, t) = confusion;
fprintf("%.4f", rbf_accuracies(loo_ind, t));
fprintf("\n\tMLP - %d-Fold: ", k);
[accuracy, confusion] = Validations.KFold(mlp, X, D, k);
mlp_accuracies(kfold_ind, t) = accuracy;
mlp_confusions(:, :, kfold_ind, t) = confusion;
fprintf("%.4f", mlp_accuracies(kfold_ind, t));
fprintf("\n\tRBF - %d-Fold: ", k);
[accuracy, confusion] = Validations.KFold(rbf, X, D, k);
rbf_accuracies(kfold_ind, t) = accuracy;
rbf_confusions(:, :, kfold_ind, t) = confusion;
fprintf("%.4f\n\n", rbf_accuracies(kfold_ind, t));
end
%% Results
%
% The lines bellow collect the results for MLP and RBF classifications,
% showing the best, worst cases and the mean confusion matrix for each
% validation method.
%
mlp_best_cases = max(mlp_accuracies, [], 2);
rbf_best_cases = max(rbf_accuracies, [], 2);
mlp_worst_cases = min(mlp_accuracies, [], 2);
rbf_worst_cases = min(rbf_accuracies, [], 2);
mlp_mean_confusion = mean(mlp_confusions, 4);
rbf_mean_confusion = mean(rbf_confusions, 4);
toc;
diary off;