From b0e0aa4d104fee3c52e48f4c423765f04ec8ceb5 Mon Sep 17 00:00:00 2001
From: Darl Jed Matundan <darljedmatundan@gmail.com>
Date: Tue, 25 Jul 2023 22:45:17 +0800
Subject: [PATCH] Updated print values and optimized loading algorithm

---
 excel_field_deleter.py | 32 +++++++++++++++++++++-----------
 requirements.txt       |  1 +
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/excel_field_deleter.py b/excel_field_deleter.py
index 434ef49..6e72418 100644
--- a/excel_field_deleter.py
+++ b/excel_field_deleter.py
@@ -8,7 +8,8 @@ class Deleter:
     def __init__(self):
         self.pwd = os.path.dirname(os.path.realpath(__file__))
         self.debugMode = True
-        self.output_filename = f'output-{datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")}'
+        self.output_filename = ""
+        self.complete = False
         self.mainData = []
         self.tasks = []
         try:
@@ -25,14 +26,17 @@ def debug(self,str):
         if(self.debugMode):
             print(str)
     
-    def saveOutput(self,dataArr):
+    async def saveOutput(self,dataArr):
         with pd.ExcelWriter(os.path.join(self.pwd,f'{self.output_filename}.xlsx')) as writer:
             for data in dataArr:
                 df = pd.DataFrame(data['data'])
                 df.to_excel(writer,data['name'])
+                
+            self.complete = True
+            self.debug("self.complete has been set to True")
 
     async def processWorksheet(self,sheet):
-        self.debug(f"{self.filename} : Parsing fields for worksheet {sheet}... This might take a few minutes to complete.")
+        self.debug(f"{self.filename} : Parsing fields for sheet '{sheet}'... ")
         sheet_content = self.xl.parse(sheet)
         j = sheet_content.to_dict()
         self.debug(f"{self.filename} : removing unncessary fields...")
@@ -47,7 +51,7 @@ async def processWorksheet(self,sheet):
             "data": newDict
         }
         self.mainData.append(newObj)
-        self.debug(f"{self.filename} : worksheet {sheet} has been cleaned-up.")
+        self.debug(f"{self.filename} : sheet '{sheet}' has been cleaned-up.")
 
     async def main(self):
         
@@ -72,28 +76,34 @@ async def main(self):
         if(os.path.exists(os.path.join(self.pwd,self.filename))):
             self.xl = pd.ExcelFile(os.path.join(self.pwd,self.filename))
             self.debug(f"{self.filename} has been successfully loaded.")
-            self.debug(f"{self.filename} : Reading worksheet names")
+            self.debug(f"{self.filename} : Reading ...")
             # read the sheet names
             sheetnames = self.xl.sheet_names
-            self.debug(f"{self.filename} : Found {len(sheetnames)} worksheet.")
+            self.debug(f"{self.filename} : Found {len(sheetnames)} sheets.")
             for sheet in sheetnames:
                 # create a task
                 self.tasks.append(asyncio.create_task(self.processWorksheet(sheet)))
 
         else:
-            self.debug(f"The file {self.filename} does not exists. Aborting execution.")
+            self.debug(f"The file '{self.filename}' does not exists. Aborting execution.")
             exit(1)
 
         await asyncio.gather(*self.tasks)
         self.debug("Almost there! Please wait while the new file is being generated... This will take some time for large datasets.")
-        self.saveOutput(self.mainData)
+        self.output_filename = f'output-{datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")}'
+        self.tasks.append(asyncio.create_task(self.saveOutput(self.mainData)))
+        while not self.complete:
+            print(".",end="")
+            sys.stdout.flush()
+            await asyncio.sleep(1)
+        
+        # await asyncio.gather(*self.tasks)
         # with open('file.json',"w") as f:
         #     f.write(json.dumps(self.mainData))
         endtime = time.time()
-        self.debug(f"Success! The clean up process has been completed within {endtime - startime} seconds.")
-        self.debug(f"The new file {self.output_filename}.xlsx has been generated.")
+        self.debug(f"\n\nSuccess! The clean up process has been completed within {round(endtime - startime,2)} seconds.")
+        self.debug(f"A new file has been generated in:\n{os.path.join(self.pwd,self.output_filename)}.xlsx")
 
-    
         
 
 if __name__ == "__main__":
diff --git a/requirements.txt b/requirements.txt
index 1934088..34a40b0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
+openpyxl==3.1.2
 pandas==2.0.3
\ No newline at end of file