Skip to content

Commit

Permalink
data drift detection script
Browse files Browse the repository at this point in the history
  • Loading branch information
yuriihavrylko committed Jan 12, 2024
1 parent 6c57e12 commit 2beb756
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 0 deletions.
2 changes: 2 additions & 0 deletions app/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ wandb==0.16.1
httpx==0.23.0
locust==2.20.1
textpruner==1.1.post2
evidently==0.4.13
sentence_transformers==2.2.2
55 changes: 55 additions & 0 deletions app/src/monitoring/drift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import pandas as pd
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metrics import EmbeddingsDriftMetric
from evidently.metrics.data_drift.embedding_drift_methods import distance
from sentence_transformers import SentenceTransformer

from src.model.training import load_data


def create_embeddings(model, texts, batch_size=32):
embeddings = model.encode(texts, batch_size=batch_size)
return embeddings

def prepare_dataframe(dataset, embeddings):
df = pd.DataFrame(dataset[:10000])
embeddings_df = pd.DataFrame(embeddings, index=df.index)
df = pd.concat([df, embeddings_df], axis=1)
return df

def generate_report(df, column_mapping):
report = Report(metrics=[
EmbeddingsDriftMetric('small_subset',
drift_method=distance(
dist='cosine',
threshold=0.2,
pca_components=None,
bootstrap=None,
quantile_probability=0.95
)
)
])
report.run(reference_data=df[:2000], current_data=df[2000:], column_mapping=column_mapping)
return report

def main():
model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L12-v2')

ds = load_data()

embeddings = create_embeddings(model, ds['text'])

df = prepare_dataframe(ds, embeddings)

column_mapping = ColumnMapping(
embeddings={'small_subset': df.columns[4:]}
)

report = generate_report(df, column_mapping)

report.show(mode='inline')

if __name__ == "__main__":
main()

0 comments on commit 2beb756

Please sign in to comment.