From e3129e785e3695fe3af196a68bd809eeda9b6b6b Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 28 Nov 2024 17:04:07 +0000 Subject: [PATCH] utility command to pivot a CSV helpful for turning CSVs with multuple rows per constituency into ones with multuple columns per constituency --- hub/management/commands/pivot_csv.py | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 hub/management/commands/pivot_csv.py diff --git a/hub/management/commands/pivot_csv.py b/hub/management/commands/pivot_csv.py new file mode 100644 index 000000000..0d4895ce7 --- /dev/null +++ b/hub/management/commands/pivot_csv.py @@ -0,0 +1,48 @@ +import json + +from django.conf import settings +from django.core.management.base import BaseCommand + +import pandas as pd + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--infile", + action="store", + help="File to process", + ) + + parser.add_argument( + "--outfile", + action="store", + help="File to output to", + ) + + parser.add_argument( + "--column", + action="store", + help="Column to pivot on", + ) + + parser.add_argument( + "--value_column", + action="store", + help="Column with values", + ) + + parser.add_argument( + "--index", + action="store", + help="Column with index", + ) + + def handle(self, *args, **kwargs): + df = pd.read_csv(settings.BASE_DIR / "data" / kwargs["infile"]) + df = df.pivot( + columns=kwargs["column"], + values=kwargs["value_column"], + index=kwargs["index"], + ) + df.to_csv(settings.BASE_DIR / "data" / kwargs["outfile"])