From 38d5df97589b342db14595c2c340c9c72f99b637 Mon Sep 17 00:00:00 2001 From: zer0 Date: Sun, 5 May 2024 22:22:48 -0500 Subject: [PATCH] Refactor file handling and add new code in backend and frontend --- backend/app.py | 1 - backend/database.py | 6 +++ backend/requirements.txt | 3 +- backend/resources/RoDe.py | 10 +++-- backend/src/data_validation.py | 10 +++++ backend/src/filters.py | 11 ++++- frontend/pages/auditoria.py | 1 - frontend/pages/rode.py | 80 ++++++++++++++++++++++------------ frontend/requirements.txt | 3 +- frontend/resources/RoDeProc.py | 4 +- frontend/resources/__init__.py | 1 - frontend/resources/filters.py | 9 ---- 12 files changed, 90 insertions(+), 49 deletions(-) delete mode 100644 frontend/resources/filters.py diff --git a/backend/app.py b/backend/app.py index e10740d..52bd1b1 100644 --- a/backend/app.py +++ b/backend/app.py @@ -24,7 +24,6 @@ basedir = os.path.abspath(os.path.dirname(__file__)) app = Flask(__name__) -app.config['SQLALCHEMY_DATABASE_URI'] ='sqlite:///' + os.path.join(basedir, 'database.db') api = Api(app) api.add_resource(Works, "/works") diff --git a/backend/database.py b/backend/database.py index 0547b3f..ff46860 100644 --- a/backend/database.py +++ b/backend/database.py @@ -1,5 +1,11 @@ import pymongo def get_database(): + """ + Returns the MongoDB database object for the DESD application. + + Returns: + pymongo.database.Database: The MongoDB database object. + """ client = pymongo.MongoClient("mongodb://localhost:27017/") return client["DESD"] \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 83f5fbc..2c274a0 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -5,4 +5,5 @@ pandas torch ultralytics>=8.1.45 sentry-sdk -pymongo \ No newline at end of file +pymongo +pytesseract \ No newline at end of file diff --git a/backend/resources/RoDe.py b/backend/resources/RoDe.py index c9067a9..bcad543 100644 --- a/backend/resources/RoDe.py +++ b/backend/resources/RoDe.py @@ -48,10 +48,15 @@ def post(self): if "filtros" in response: response["filtros"].append("hoja de control") else: - response["filtros"] = ["Hoja de Control"] + response["filtros"] = ["hoja de control"] + r_form = request.form.to_dict() + r_form.pop("filtros") # unir response y request.form - documento = request.form | response + documento = r_form | response + documento['prediccion'] = response['data'][0]['name'] + documento['confianza'] = response['data'][0]['confidence'] + documento.pop("data") doc_id = work.save(documento) response["_id"] = str(doc_id) @@ -61,5 +66,4 @@ def post(self): ) os.remove(file_name) - return jsonify(response) \ No newline at end of file diff --git a/backend/src/data_validation.py b/backend/src/data_validation.py index 5a032f8..f14e1c8 100644 --- a/backend/src/data_validation.py +++ b/backend/src/data_validation.py @@ -1,4 +1,14 @@ def data_file_validation(request): + """ + Validate the data in the request object and provide default values if necessary. + + Args: + request (object): The request object containing the data. + + Returns: + dict: A dictionary containing the validated data. + + """ if request.method == "POST": if "work_id" not in request.form: request.form["work_id"] = "rode_test" diff --git a/backend/src/filters.py b/backend/src/filters.py index d2fc72a..e5e44b6 100644 --- a/backend/src/filters.py +++ b/backend/src/filters.py @@ -3,10 +3,19 @@ import cv2 def hoja_control(image) -> bool: + """ + Check if the given image contains the text "hoja de control" in the first 44 characters. + + Args: + image: The image to be processed. It can be either a bytes object representing the image data or a string representing the file path. + + Returns: + bool: True if the text "hoja de control" is found in the image, False otherwise. + """ if type(image) == bytes: image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR) elif type(image) == str: image = cv2.imread(image, cv2.IMREAD_COLOR) - + text = pytesseract.image_to_string(image) return "hoja de control" in text[:44].lower() \ No newline at end of file diff --git a/frontend/pages/auditoria.py b/frontend/pages/auditoria.py index ae2e875..4ac735d 100644 --- a/frontend/pages/auditoria.py +++ b/frontend/pages/auditoria.py @@ -6,7 +6,6 @@ from resources import ( single_model_metrics, - hoja_control, procces_image_rode, procces_pdf2image_rode ) diff --git a/frontend/pages/rode.py b/frontend/pages/rode.py index 5c3eab3..26fdffa 100644 --- a/frontend/pages/rode.py +++ b/frontend/pages/rode.py @@ -6,7 +6,6 @@ from resources import ( single_model_metrics, - hoja_control, procces_image_rode, procces_pdf2image_rode ) @@ -20,6 +19,9 @@ st.title("RoDe (Rotation Detection) Detección de rotación 🔄") +work_id_default = f"rode_{datetime.now().strftime('%Y%m%d%H%M%S')}" +work_id = st.text_input("Identificador de trabajo", placeholder=f"Identificador de trabajo (Opcional)") + version = "v1" filters = st.multiselect( @@ -46,12 +48,13 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"): global bad_dataframe global dataframe + global work_id, work_id_default errors = [] with st.spinner(f"Procesando {len(uploaded_file)} imágenes..."): - # work_id = f"rode_{datetime.now().strftime('%Y%m%d%H%M%S')}_{len(uploaded_file)}" - work_id = 'rode_testing' + if not work_id: + work_id = work_id_default st.info(f'Identificador de trabajo: **{work_id}**') st.info(f'Procesando **{len(uploaded_file)}** imágenes.') @@ -71,17 +74,13 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"): data, response = procces_image_rode(image, file.name, version, data_file) - # if "Hoja de Control" in filters: - # filtered = hoja_control(image) - - # if filtered: - # st.toast(f'Existe una hoja de control en la imagen **{file.name}**', icon="⚠️") - # errors.append(f'Existe una hoja de control en la imagen **{file.name}**') - # data["filtros"] = ["hoja de control"] + if "filtros" not in response: + response["filtros"] = False - if "Hoja de Control" in response['filtros']: + if "hoja de control" in response['filtros']: st.error(f':warning: Existe una hoja de control en la imagen "**{file.name}**"') errors.append(f'Existe una hoja de control en la imagen "**{file.name}**"') + data["filtros"] = ["hoja de control"] st.caption(file.name) @@ -109,38 +108,70 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"): def process_pdf_file(uploaded_pdf, show_image, version="v1"): + global work_id, work_id_default global bad_dataframe global dataframe errors = [] with st.spinner(f"Procesando {len(uploaded_pdf)} PDFs..."): + if not work_id: + work_id = work_id_default + + st.info(f'Identificador de trabajo: **{work_id}**') st.info(f'Procesando **{len(uploaded_pdf)}** PDFs.') st.info(f'Inicio del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**') inicio_time = datetime.now() fin_process = st.empty() + if len(uploaded_pdf) > 3: + st.warning(f":warning: solo se mostrarán los resultados con problemas, para ver todos los resultados puede ir a la pagina de **trabajos** y seleccionar el trabajo: **{work_id}**") + + st.divider() + for pdf in uploaded_pdf: images = convert_from_bytes(pdf.read()) for i, image in enumerate(images): - data, response, image_path, name_file_rand = procces_pdf2image_rode(image, pdf.name, version, i) + + data_file = { + "work_id": work_id, + "archivo": pdf.name, + "tipo": "pdf", + "pagina": i + 1, + "filtros": [f for f in filters] + } + + data, response, image_path, name_file_rand = procces_pdf2image_rode(image, pdf.name, version, i, data_file) - if "Hoja de Control" in filters: - filtered = hoja_control(image_path) - if filtered: - st.error(f':warning: Existe una hoja de control en la página **{i + 1}** del PDF **{pdf.name}**') - errors.append(f'Existe una hoja de control en la página **{i + 1}** del PDF **{pdf.name}**') + if "filtros" in response: + if "hoja de control" in response['filtros']: + st.error(f':warning: Existe una hoja de control en la página **{i + 1}** del PDF "**{pdf.name}**"') + errors.append(f'Existe una hoja de control en la página **{i + 1}** del PDF "**{pdf.name}**"') data["filtros"] = ["hoja de control"] if version == "v1": - single_model_metrics(response) - dataframe = pd.concat([dataframe, pd.DataFrame(data)], axis=0, ignore_index=True) + if len(uploaded_pdf) < 3: + st.caption(f"Pagina {i + 1} del PDF {pdf.name}") + single_model_metrics(response) + dataframe = pd.concat([dataframe, pd.DataFrame(data)], axis=0, ignore_index=True) + placeholder.dataframe(dataframe) + + + if response['data'][0]['name'] == "rotado" or data.get("filtros"): + if len(uploaded_pdf) >= 3: + st.caption(f"Pagina {i + 1} del PDF {pdf.name}") + single_model_metrics(response) - if response['data'][0]['name'] == "rotado": bad_dataframe = pd.concat([bad_dataframe, pd.DataFrame(data)], axis=0, ignore_index=True) - st.error(f':warning: La Página **{i + 1}** en el PDF está rotada.') + bad_placeholder.dataframe(bad_dataframe) + + if response['data'][0]['name'] == "rotado": + st.error(f':warning: La Página **{i + 1}** en el PDF está rotada.') - if show_image: + if show_image and len(uploaded_pdf) >= 3: + st.image(image_path, use_column_width=True, caption="Uploaded Image", output_format="JPEG") + + if show_image and len(uploaded_pdf) < 3: st.image(image_path, use_column_width=True, caption="Uploaded Image", output_format="JPEG") if errors: @@ -151,11 +182,6 @@ def process_pdf_file(uploaded_pdf, show_image, version="v1"): except PermissionError: print(f"Error al eliminar el archivo {image_path}") - st.divider() - - placeholder.dataframe(dataframe) - bad_placeholder.dataframe(bad_dataframe) - fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**, tiempo total: **{(datetime.now() - inicio_time).total_seconds()}** Segundos') def main(): diff --git a/frontend/requirements.txt b/frontend/requirements.txt index 8d90c8f..87f7032 100644 --- a/frontend/requirements.txt +++ b/frontend/requirements.txt @@ -5,5 +5,4 @@ pdf2image matplotlib opencv-python sentry-sdk -numpy -pytesseract \ No newline at end of file +numpy \ No newline at end of file diff --git a/frontend/resources/RoDeProc.py b/frontend/resources/RoDeProc.py index dcc44ac..54bd898 100644 --- a/frontend/resources/RoDeProc.py +++ b/frontend/resources/RoDeProc.py @@ -41,13 +41,11 @@ def procces_pdf2image_rode(image, name, version="v1", i=0, data_file: dict = {}) image_path = name_file_rand with open(image_path, "rb") as image: - response = ImageProccesing("rode").process_file(image, version) + response = ImageProccesing("rode").process_file(image, version, data_file) #change names to spanish response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado" - st.caption(f"Pagina {i + 1} del PDF {name}") - data = { "archivo": [name], "pagina": [f'Page {i + 1}'], # "Page 1 diff --git a/frontend/resources/__init__.py b/frontend/resources/__init__.py index 11d5cf8..f3346bd 100644 --- a/frontend/resources/__init__.py +++ b/frontend/resources/__init__.py @@ -1,4 +1,3 @@ from .display_metrics import single_model_metrics from .process_files import ImageProccesing -from .filters import hoja_control from .RoDeProc import procces_image_rode, procces_pdf2image_rode \ No newline at end of file diff --git a/frontend/resources/filters.py b/frontend/resources/filters.py deleted file mode 100644 index 5e142f5..0000000 --- a/frontend/resources/filters.py +++ /dev/null @@ -1,9 +0,0 @@ -import pytesseract -import numpy as np -import cv2 - -def hoja_control(image) -> bool: - if type(image) == bytes: - image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR) - text = pytesseract.image_to_string(image) - return "hoja de control" in text[:44].lower() \ No newline at end of file