The objective of this activity is to practice data preprocessing techniques on a sample dataset.
import pandas as pd
data = pd.read_csv('sample_data.csv')
print(data.head())
print(data.info())
data.fillna(method='ffill', inplace=True) # Forward fill
data.drop_duplicates(inplace=True)
data['date'] = pd.to_datetime(data['date'])
data = pd.get_dummies(data, columns=['category'])
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data[['feature1', 'feature2']] = scaler.fit_transform(data[['feature1', 'feature2']])
Here’s the complete code for reference:
import pandas as pd
data = pd.read_csv('sample_data.csv')
print(data.head())
print(data.info())
data.fillna(method='ffill', inplace=True) # Forward fill
data.drop_duplicates(inplace=True)
data['date'] = pd.to_datetime(data['date'])
data = pd.get_dummies(data, columns=['category'])
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data[['feature1', 'feature2']] = scaler.fit_transform(data[['feature1', 'feature2']])