From 89d63ea174b1f3475a686f6503e3a90919b39a4d Mon Sep 17 00:00:00 2001 From: marcus-snx Date: Thu, 19 Dec 2024 13:57:33 +0200 Subject: [PATCH] Fix clean data column naming --- indexers/scripts/clean_parquet.py | 2 ++ indexers/scripts/listener.py | 8 +------- indexers/scripts/utils.py | 6 ++++++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/indexers/scripts/clean_parquet.py b/indexers/scripts/clean_parquet.py index 5f9343cc..fdd735f0 100644 --- a/indexers/scripts/clean_parquet.py +++ b/indexers/scripts/clean_parquet.py @@ -2,6 +2,7 @@ from pathlib import Path import pandas as pd import os +from utils import convert_case RAW_DATA_PATH = "/parquet-data/indexers/raw" CLEAN_DATA_PATH = "/parquet-data/indexers/clean" @@ -33,6 +34,7 @@ def clean_parquet_files(network_name: str, protocol_name: str): if df.empty: empty_files += 1 continue + df.columns = [convert_case(col) for col in df.columns] event_dir.mkdir(parents=True, exist_ok=True) df.to_parquet(output_file, index=False) written_files += 1 diff --git a/indexers/scripts/listener.py b/indexers/scripts/listener.py index 74bcb6b9..54fc09bc 100644 --- a/indexers/scripts/listener.py +++ b/indexers/scripts/listener.py @@ -1,22 +1,16 @@ from pathlib import Path import time -import re import pandas as pd from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler import clickhouse_connect -from utils import create_table_from_path, insert_data_from_path +from utils import create_table_from_path, insert_data_from_path, convert_case CLICKHOUSE_INTERNAL_PATH = "/var/lib/clickhouse/user_files/parquet-data/indexers/clean" RAW_DATA_PATH = "/parquet-data/indexers/raw" CLEAN_DATA_PATH = "/parquet-data/indexers/clean" -def convert_case(name): - snake_case = re.sub(r"(?