forked from jmarkow/zftftb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
zftftb_song_det.m
95 lines (72 loc) · 2.25 KB
/
zftftb_song_det.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
function [SONG_IDX,T]=zftftb_song_det(AUDIO,FS,varargin)
%based on Andalmann's algorithm
if nargin<2
disp('Setting FS to 30e3...');
FS=30e3;
end
% the template cutoff could be defined by the 95th prctile of the abs(noise) magnitude
nparams=length(varargin);
if mod(nparams,2)>0
error('Parameters must be specified as parameter/value pairs!');
end
len=.005; % window length (s)
song_band=[2e3 6e3]; % frequency band for singing
overlap=0; % overlap (s)
song_duration=.8; % smoothing (s)
ratio_thresh=2; % ratio song:nonsong
pow_thresh=-inf; % power threshold (au)
song_thresh=.2; % song threshold
songpow_thresh=.8; % threshold for song power
silence=0; % check for silence rather than singing
for i=1:2:nparams
switch lower(varargin{i})
case 'song_band'
song_band=varargin{i+1};
case 'len'
len=varargin{i+1};
case 'overlap'
overlap=varargin{i+1};
case 'song_duration'
song_duration=varargin{i+1};
case 'ratio_thresh'
ratio_thresh=varargin{i+1};
case 'song_thresh'
song_thresh=varargin{i+1};
case 'pow_thresh'
pow_thresh=varargin{i+1};
case 'songpow_thresh'
songpow_thresh=varargin{i+1};
case 'silence'
silence=varargin{i+1};
end
end
len=round(len*FS);
overlap=round(overlap*FS);
if isempty(pow_thresh)
pow_thresh=0;
end
[s,f,T]=spectrogram(AUDIO,len,overlap,[],FS);
% take the power and find our FS band
power=abs(s);
min_idx=max(find(f<=song_band(1)));
max_idx=min(find(f>=song_band(2)));
% take the song/nonsong power ratio
song=mean(power(min_idx:max_idx,:),1);
nonsong=mean(power([1:min_idx-1 max_idx+1:end],:),1)+eps;
song_ratio=song./nonsong;
%song_detvec=smooth(double(song_ratio>ratio_thresh),round((FS*song_duration)/(len-overlap)));
% convolve with a moving average filter
filt_size=round((FS*song_duration)/(len-overlap));
mov_filt=ones(1,filt_size)*1/filt_size;
if ~silence
song_detvec=conv(double(song_ratio>ratio_thresh),mov_filt,'same');
pow_detvec=conv(double(song>pow_thresh),mov_filt,'same');
else
song_detvec=conv(double(song_ratio<ratio_thresh),mov_filt,'same');
pow_detvec=conv(double(song<pow_thresh),mov_filt,'same');
end
% where is the threshold exceeded for both the raw power and the ratio?
pow_idx=pow_detvec>songpow_thresh;
ratio_idx=song_detvec>song_thresh;
%%%%
SONG_IDX=pow_idx&ratio_idx;