-
Notifications
You must be signed in to change notification settings - Fork 0
/
FeatureExtractor.py
143 lines (126 loc) · 7.56 KB
/
FeatureExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from utils import *
class FeatureExtractor: # TODO testme @20240318 - updated interface.
# TODO remake class to process signal (1D array) as well as list of windows (2D array)
def __init__(self, feature_list, features_dict, variance_type='var', raw_features = True,
keep_feature_dims = False, location='Features_functions', extension='py', feature_base_name='feature'):
"""The constructor should set the location and extension of the features, and load all the features."""
self.__location = str(location)
self.__extension = str(extension)
self.signal = None
self.__feature_base_name = str(feature_base_name)
self.__load_features()
self.feature_list = feature_list
self.features_dict = features_dict
self.variance_type = variance_type
self.raw_features = raw_features
self.keep_feature_dims = keep_feature_dims
def __get_feature_name(self, feature_id):
# print("in get feature name feature_id = ", feature_id)
return self.__feature_base_name + '_' + feature_id
def __set_params(self, signal, features_dict=None):
'''This method should set the signal and all the parameters to be used for extracting features, e.g. self.n_fft = features_dict['n_fft]'''
setattr(self, 'signal', np.asanyarray(signal, dtype=np.float64))
for key, value in self.features_dict.items():
setattr(self, key, value)
# print("self.n_lvls = ", self.n_lvls)
# print("features_dict = ", features_dict)
def __load_features(self):
'''This method should load all the existing features and save them as atributes of this object, e.g. self.fft'''
for feature_function_file_name in os.listdir(self.__location):
if not feature_function_file_name.endswith("." + self.__extension):
continue
function = open(os.path.join(self.__location, feature_function_file_name), 'r').read()
dic = {}
exec(function, None, dic)
reference = list(dic.values())[0]
feature_name = self.__get_feature_name(feature_function_file_name.split('.')[0])
setattr(self, feature_name, reference)
def __extract_feature_by_id(self, id):
'''This method should extract only the feature with the given unique id'''
feature_id = self.__get_feature_name(str(id))
if hasattr(self, feature_id):
try:
return getattr(self, feature_id)(self)
except:
self.__load_features()
try:
print(debugger_details(), "retry load features")
return getattr(self, feature_id)(self)
except Exception as e:
print(debugger_details(), "retry load features failed")
raise Exception('Attribute ' + feature_id + ' not found after reloading. '
'Or calling the function failed:', e)
else:
self.__load_features()
try:
return getattr(self, feature_id)(self)
except Exception as e:
raise Exception('Attribute ' + feature_id + ' not found after reloading. '
'Or calling the function failed:', e)
def extract_features(self, signal): #, feature_list, features_dict, variance_type='var', raw_features = True, keep_feature_dims = False):
'''This method should return a list of features for a given signal.
If the signal is 1D (a window), the mean and variance should be extracted from each feature.
If the signal is 2D (a matrix of windows), the features should be extracted from each window.
--- generated by copilot do not trust before checking ---
The features_dict should contain all the parameters to be used for extracting features, e.g. features_dict = {'n_fft': 2048, 'hop_length': 512, 'n_mels': 128, 'n_mfcc': 13, 'sr': 22050, 'n_lvls': 6}
The feature_list should contain the list of features to be extracted, e.g. feature_list = ['mfcc', 'tempo', 'chroma_stft', 'spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff', 'zero_crossing_rate']
The variance_type should be either 'var' or 'smad' (squared mean absolute deviation) and should be used to calculate the variance of the features.
If raw_features is True, the features should be extracted without calculating the mean and variance.
If keep_feature_dims is True, the features should be extracted without reducing the dimensions of the features, e.g. the mfcc feature should be extracted as a matrix and not as a vector.
--- generated by copilot do not trust before checking ---
:param signal: np.array
:param feature_list: list
:param features_dict: dict
:param variance_type: str
:param raw_features: bool
:param keep_feature_dims: bool
:return: np.array
@BR20240319 updated return type from list to np.array
'''
#returns the list of features or matrix of features if raw dims is set. e.g. mfcc = matrix & you output the matrix as such
features = []
self.__set_params(signal, self.features_dict)
for id in self.feature_list:
if self.raw_features: #extracts features only without calculating mean & var.
feature = self.__extract_feature_by_id(id)
try:
fshapelen = (len(feature.shape) != 2)
except:
fshapelen = True # if there is no shape it's different from 2
if fshapelen: #shape is not 2 i.e. not mfcc or something
try:
features.extend(feature) # if feature is an iterable
except:
features.append(feature) # if feature is not an iterable i.e. tempo
if id == 'mfcc' and not self.keep_feature_dims:
feature = np.mean(feature, axis=-1)
try:
features.extend(feature) # if feature is an iterable
except:
features.append(feature) # if feature is not an iterable
if not self.raw_features:
if len(signal.shape) == 1:
feature = self.__extract_feature_by_id(id)
if id == 'tempo':
features.append(float(feature))
# else:
# try:
# features.extend(feature) #if feature is an iterable
# except:
# features.append(feature) #if feature is not an iterable
elif id == 'mfcc':
features.extend(np.mean(feature, axis=1))
if self.variance_type == 'var':
features.extend(np.var(feature, axis=1) ** 2)
else:
features.extend(scipy.stats.median_absolute_deviation(feature, axis=1) ** 2)
else:
features.append(np.mean(feature))
if self.variance_type == 'var':
features.append(np.var(feature))
else:
features.append(squared_median_abs_dev(feature))
if len(signal.shape) == 2:
feature = self.__extract_feature_by_id(id)
features.extend(feature.tolist())
return np.array(features)