Skip to content

Commit

Permalink
Own implementation min max scaler
Browse files Browse the repository at this point in the history
  • Loading branch information
cheziyi committed Aug 29, 2024
1 parent eee90ec commit bfb210f
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 15 deletions.
4 changes: 0 additions & 4 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,2 @@
[mypy]
disable_error_code = annotation-unchecked

[mypy-sklearn.*]
; TechDebt: Should implement typings
ignore_missing_imports = True
8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "synnax-shared"
version = "1.5.2"
version = "1.6.0"
description = "Synnax shared Python pacakges"
readme = "README.md"
requires-python = ">=3.11"
Expand All @@ -17,9 +17,11 @@ helpers = []
http-client = ["requests", "types-requests", "retry", "types-retry"]
logging-json-formatter = ["python-json-logger"]
system-token-issuer = ["boto3", "boto3-stubs[lambda]", "PyJWT"]
data-processing-modules = ["scikit-learn==1.5.1"]
data-processing = ["pandas", "pandas-stubs"]
dev = ["mypy", "black"]
all = ["synnax-shared[events,helpers,http-client,logging-json-formatter,system-token-issuer,data-processing-modules,dev]"]
all = [
"synnax-shared[events,helpers,http-client,logging-json-formatter,system-token-issuer,data-processing,dev]",
]

[tool.hatch.build.targets.sdist]
include = ["/src"]
Empty file.
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from sklearn.preprocessing import MinMaxScaler
from typing import Dict
from pandas import DataFrame

# TechDebt: We cannot modify or move this file because we pickle it.
# This class needs to be properly serialized.
from synnax_shared.data_processing.scaler.min_max_scaler import (
MinMaxScaler,
MinMaxScalerDto,
)


class MultipleColumnsMinMaxScaler:
class DataFrameMinMaxScaler:

def __init__(self):
self.scalers = {}
self.scalers: Dict[str, MinMaxScaler] = {}

def fit_transform(self, df, col=None):
def fit_transform(self, df: DataFrame, col=None):
df = df.copy()
if col is None:
for col in df.select_dtypes(exclude="O"):
Expand All @@ -22,7 +25,7 @@ def fit_transform(self, df, col=None):
self.scalers[col] = scaler
return df

def transform(self, df, col=None):
def transform(self, df: DataFrame, col=None):
df = df.copy()
if col is None:
for col in df.select_dtypes(exclude="O"):
Expand All @@ -31,7 +34,7 @@ def transform(self, df, col=None):
df[col] = self.scalers[col].transform(df[col].values.reshape(-1, 1))
return df

def inverse_transform(self, df, col=None):
def inverse_transform(self, df: DataFrame, col=None):
df = df.copy()
if col is None:
for col in df.select_dtypes(exclude="O"):
Expand All @@ -41,3 +44,14 @@ def inverse_transform(self, df, col=None):
else:
df[col] = self.scalers[col].inverse_transform(df[col].values.reshape(-1, 1))
return df

def toDto(self) -> Dict[str, MinMaxScalerDto]:
return {col: scaler.toDto() for col, scaler in self.scalers.items()}

@staticmethod
def fromDto(dto: Dict[str, MinMaxScalerDto]):
scaler = DataFrameMinMaxScaler()
scaler.scalers = {
col: MinMaxScaler.fromDto(scalerDto) for col, scalerDto in dto.items()
}
return scaler
40 changes: 40 additions & 0 deletions src/synnax_shared/data_processing/scaler/min_max_scaler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import TypedDict
from pandas import Series


class MinMaxScalerDto(TypedDict):
min: float
max: float


class MinMaxScaler:

def __init__(self, min: float | None = None, max: float | None = None):
self.min = min
self.max = max

def fit_transform(self, series: Series) -> Series:
self.min = series.min()
self.max = series.max()
return self.transform(series)

def transform(self, series: Series) -> Series:
if self.min is None or self.max is None:
raise ValueError("MinMaxScaler not fitted")
series = series.copy()
return (series - self.min) / (self.max - self.min)

def inverse_transform(self, series: Series) -> Series:
if self.min is None or self.max is None:
raise ValueError("MinMaxScaler not fitted")
series = series.copy()
return series * (self.max - self.min) + self.min

def toDto(self) -> MinMaxScalerDto:
if self.min is None or self.max is None:
raise ValueError("MinMaxScaler not fitted")
return {"min": self.min, "max": self.max}

@staticmethod
def fromDto(dto: MinMaxScalerDto):
return MinMaxScaler(dto["min"], dto["max"])

0 comments on commit bfb210f

Please sign in to comment.