-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_categorical_features.py
49 lines (35 loc) · 1.58 KB
/
convert_categorical_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
def encode_binary_column(column_data):
column_label_encoder = LabelEncoder()
return column_label_encoder.fit_transform(column_data)
def encode_categorical_column(columns, index):
ct = ColumnTransformer(
[('one_hot_encoder', OneHotEncoder(categories='auto'), index)],
remainder='passthrough'
)
columns = ct.fit_transform(columns)
return columns
data_from_file = pd.read_excel('stroke_rus_emissions.xlsx')
sex_column = data_from_file.iloc[:, 0].values
married_column = data_from_file.iloc[:, 4].values
place_column = data_from_file.iloc[:, 6].values
sex_column = encode_binary_column(sex_column)
married_column = encode_binary_column(married_column)
place_column = encode_binary_column(place_column)
work_type_column = data_from_file.iloc[:, 5].values
smoke_status_column = data_from_file.iloc[:, 9].values
work_type_column = encode_binary_column(work_type_column)
smoke_status_column = encode_binary_column(smoke_status_column)
data_from_file.iloc[:, 0] = sex_column;
data_from_file.iloc[:, 4] = married_column;
data_from_file.iloc[:, 6] = place_column;
data_from_file.iloc[:, 5] = work_type_column;
data_from_file.iloc[:, 9] = smoke_status_column;
columns = encode_categorical_column(data_from_file, [5, 9])
data_from_file = pd.DataFrame(columns)
dataframe_to_save = pd.DataFrame(data_from_file)
dataframe_to_save.to_excel("stroke_rus_processed.xlsx")
#Перевод категориальных признаков в числовые: