-
Notifications
You must be signed in to change notification settings - Fork 39
/
generate_spectrograms.py
49 lines (39 loc) · 1.64 KB
/
generate_spectrograms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import matplotlib
matplotlib.use('agg')
from matplotlib import pyplot as plt
from matplotlib import cm
from tqdm import tqdm
import pylab
import librosa
from librosa import display
import numpy as np
WAV_DIR = 'wav_files/'
IMG_DIR = 'spectrogram_images/'
wav_files = os.listdir(WAV_DIR)
for f in tqdm(wav_files):
try:
# Read wav-file
y, sr = librosa.load(WAV_DIR+f, sr = 22050) # Use the default sampling rate of 22,050 Hz
# Pre-emphasis filter
pre_emphasis = 0.97
y = np.append(y[0], y[1:] - pre_emphasis * y[:-1])
# Compute spectrogram
M = librosa.feature.melspectrogram(y, sr,
fmax = sr/2, # Maximum frequency to be used on the on the MEL scale
n_fft=2048,
hop_length=512,
n_mels = 96, # As per the Google Large-scale audio CNN paper
power = 2) # Power = 2 refers to squared amplitude
# Power in DB
log_power = librosa.power_to_db(M, ref=np.max)# Covert to dB (log) scale
# Plotting the spectrogram and save as JPG without axes (just the image)
pylab.figure(figsize=(3,3))
pylab.axis('off')
pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[]) # Remove the white edge
librosa.display.specshow(log_power, cmap=cm.jet)
pylab.savefig(IMG_DIR + f[:-4]+'.jpg', bbox_inches=None, pad_inches=0)
pylab.close()
except Exception as e:
print(f, e)
pass