From 833d4d0d2adab898ea89bc11b9ba27b385e12bfc Mon Sep 17 00:00:00 2001 From: nazarfil Date: Tue, 3 Dec 2024 14:32:53 +0100 Subject: [PATCH] feat: skip any byte column for profiling, adds number of column --- hexa/datasets/queue.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hexa/datasets/queue.py b/hexa/datasets/queue.py index 10fabde7c..19c90fceb 100644 --- a/hexa/datasets/queue.py +++ b/hexa/datasets/queue.py @@ -110,11 +110,12 @@ def generate_profile(df: pd.DataFrame) -> list: unique_values = df.nunique().to_dict() distinct_values = df.apply(lambda x: x.nunique(dropna=False)).to_dict() constant_values = df.apply(lambda x: x.nunique() == 1).astype("bool").to_dict() + count = df.count() metadata_per_column = [ { "column_name": column, - "number_of_rows": len(df), + "count": count.get(column), "data_type": data_types.get(column), "missing_values": missing_values.get(column), "unique_values": unique_values.get(column),