-
Notifications
You must be signed in to change notification settings - Fork 3
/
feature_selection_final.py
44 lines (42 loc) · 2.62 KB
/
feature_selection_final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
import pandas as pd
import time
# data = pd.read_csv('data/drug_cell/drug/17-AAG_train_data.csv') # fn=10 0.8529
# data = pd.read_csv('data/drug_cell/drug/Erlotinib_train_data.csv') # fn=10 0.9245
# data = pd.read_csv('data/drug_cell/drug/Irinotecan_train_data.csv') # fn=9, 0.8404->0.8560(测试集0.3->0.4)
# data = pd.read_csv('data/drug_cell/drug/AZD6244_train_data.csv') # fn=12, 0.9394
# data = pd.read_csv('data/drug_cell/drug/Lapatinib_train_data.csv') # fn=7 0.8704
# data = pd.read_csv('data/drug_cell/drug/PD-0325901_train_data.csv') # fn=7 0.9412
# data = pd.read_csv('data/drug_cell/drug/Sorafenib_train_data.csv') # fn=9 0.8679
# data = pd.read_csv('data/drug_cell/drug/AEW541_train_data.csv') # fn=7 0.8095
# data = pd.read_csv('data/drug_cell/drug/PHA-665752_train_data.csv') # fn=10 0.8214,fn=3时,只有0.69
# data = pd.read_csv('data/drug_cell/drug/Paclitaxel_train_data.csv') # fn=11 0.8857
# data = pd.read_csv('data/drug_cell/drug/PLX4720_train_data.csv') # fn=7 没有负样本,不能运行
# data = pd.read_csv('data/drug_cell/drug/AZD0530_train_data.csv') # fn=11 0.8308
# data = pd.read_csv('data/drug_cell/drug/LBW242_train_data.csv') # fn=6 0.8000->0.8302(测试集0.3->0.4)
# data = pd.read_csv('data/drug_cell/drug/Nutlin-3_train_data.csv') # fn=9 0.7763->0.8421(测试集0.3->0.4
# data = pd.read_csv('data/drug_cell/drug/Panobinostat_train_data.csv') # fn=14 0.9825, 50%测试集 0.9789
# data = pd.read_csv('data/drug_cell/drug/PD-0332991_train_data.csv') # fn=8 0.9036
# data = pd.read_csv('data/drug_cell/drug/PF2341066_train_data.csv') # fn=10 0.9000
# data = pd.read_csv('data/drug_cell/drug/RAF265_train_data.csv') # fn=10 0.8837->0.8953
# data = pd.read_csv('data/drug_cell/drug/TAE684_train_data.csv') # fn=8 0.8205
# data = pd.read_csv('data/drug_cell/drug/TKI258_train_data.csv') # fn=13 0.9125,->0.9250
# data = pd.read_csv('data/drug_cell/drug/Topotecan_train_data.csv') # fn=10 0.9432->0.9545
data = pd.read_csv('data/drug_cell/drug/ZD-6474_train_data.csv') # fn=8 0.8375
data = data.fillna(0)
x = data.iloc[:, :-1]
y = data.iloc[:, -1]
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=8, step=1)
start = time.time()
print('算法运行开始......')
rfe.fit(x, y) # 训练时间特别长!!!
print('算法运行结束......')
end = time.time()
print("特征选择运行时间:", end - start)
# x_new = x[x.columns[rfe.get_support()]]
x_new = x.loc[:, rfe.get_support()]
x_new['label'] = y
print(x_new)
x_new.to_csv('data/drug_cell/drug/ZD-6474_train_data-rfe.csv', index=False, float_format='%.2f')