diff --git a/frontend/pages/rode.py b/frontend/pages/rode.py index 6e65290..6556668 100644 --- a/frontend/pages/rode.py +++ b/frontend/pages/rode.py @@ -1,17 +1,17 @@ from pdf2image import convert_from_bytes from datetime import datetime +import dask.dataframe as dd import streamlit as st import pandas as pd -import random import os from resources import ( - ImageProccesing, single_model_metrics, - hoja_control + hoja_control, + procces_image_rode, + procces_pdf2image_rode ) - st.set_page_config( page_title="RoDe (Rotation Detection)", page_icon="馃攧", @@ -22,6 +22,12 @@ st.title("RoDe (Rotation Detection) Detecci贸n de rotaci贸n 馃攧") version = "v1" + +filters = st.multiselect( + "Selecciona los filtros a utilizar", + ["Hoja de Control", "Hoja en Blanco"], + ["Hoja de Control"] +) show_image = st.checkbox("Mostrar imagenes", value=False) uploaded_file = st.file_uploader("Upload image(s)", type=["jpg", "jpeg", "png", "tif", "tiff"], accept_multiple_files=True) @@ -35,14 +41,8 @@ alerts = st.empty() -fin_process = st.empty() - -dataframe = pd.DataFrame(columns=["archivo", "predicci贸n", "confianza", "tiempo(s)"]) -bad_dataframe = pd.DataFrame(columns=["archivo", "predicci贸n", "confianza", "tiempo(s)"]) - -with st.container(): - bad_placeholder.dataframe(bad_dataframe) - placeholder.dataframe(dataframe) +dataframe = dd.from_pandas(pd.DataFrame(columns=["archivo", "predicci贸n", "confianza", "tiempo(s)"]), npartitions=1) +bad_dataframe = dd.from_pandas(pd.DataFrame(columns=["archivo", "predicci贸n", "confianza", "tiempo(s)"]), npartitions=1) def process_uploaded_images(uploaded_file, show_image, version="v1"): global bad_dataframe @@ -53,38 +53,30 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"): with st.spinner("Procesando..."): st.info(f'Procesando **{len(uploaded_file)}** im谩genes.') st.info(f'Inicio del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**') + inicio_time = datetime.now() + fin_process = st.empty() for file in uploaded_file: image = file.read() - response = ImageProccesing("rode").process_file(image, version, file.name) - filtered = hoja_control(image) - - # cambiar nombres a espa帽ol - response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado" - response['data'][1]['name'] = "rotado" if response['data'][1]['name'] == "rotated" else "no rotado" - - data = { - "archivo": [file.name], - "predicci贸n": [response['data'][0]['name']], - "confianza": [response['data'][0]['confidence'] * 100], - "tiempo(s)": [response['time']], - "fecha": [datetime.now().strftime("%Y-%m-%d %H:%M:%S")] - } - - if filtered: - st.toast(f'Existe una hoja de control en la imagen **{file.name}**', icon="鈿狅笍") - errors.append(f'Existe una hoja de control en la imagen **{file.name}**') - data["filtros"] = ["hoja de control"] + data, response = procces_image_rode(image, file.name, version) + + if "Hoja de Control" in filters: + filtered = hoja_control(image) + + if filtered: + st.toast(f'Existe una hoja de control en la imagen **{file.name}**', icon="鈿狅笍") + errors.append(f'Existe una hoja de control en la imagen **{file.name}**') + data["filtros"] = ["hoja de control"] st.caption(file.name) if version == "v1": single_model_metrics(response) - dataframe = pd.concat([dataframe, pd.DataFrame(data)], ignore_index=True) + dataframe = dd.concat([dataframe, dd.from_pandas(pd.DataFrame(data), npartitions=1)], axis=0) if response['data'][0]['name'] == "rotado": - bad_dataframe = pd.concat([bad_dataframe, pd.DataFrame(data)], ignore_index=True) + bad_dataframe = dd.concat([bad_dataframe, dd.from_pandas(pd.DataFrame(data), npartitions=1)], axis=0) st.error(f':warning: La imagen "**{file.name}**" est谩 rotada.') if show_image: @@ -95,8 +87,10 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"): st.divider() - placeholder.dataframe(dataframe) - bad_placeholder.dataframe(bad_dataframe) + placeholder.dataframe(dataframe.compute()) + bad_placeholder.dataframe(bad_dataframe.compute()) + + fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**, tiempo total (Segundos): **{round((datetime.now() - inicio_time).total_seconds(), 2)}**') def process_pdf_file(uploaded_pdf, show_image, version="v1"): @@ -108,43 +102,27 @@ def process_pdf_file(uploaded_pdf, show_image, version="v1"): with st.spinner(f"Procesando {len(uploaded_pdf)} PDFs..."): st.info(f'Procesando **{len(uploaded_pdf)}** PDFs.') st.info(f'Inicio del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**') + inicio_time = datetime.now() + fin_process = st.empty() for pdf in uploaded_pdf: images = convert_from_bytes(pdf.read()) for i, image in enumerate(images): - name_file_rand = f'temp/{"".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=10))}.jpg' - image.save(name_file_rand) - image_path = name_file_rand - filtered = hoja_control(image) - - with open(image_path, "rb") as image: - response = ImageProccesing("rode").process_file(image, version, pdf.name, i + 1, "pdf") - - #change names to spanish - response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado" - - st.caption(f"Pagina {i + 1} del PDF {pdf.name}") - - data = { - "archivo": [pdf.name], - "pagina": [f'Page {i + 1}'], # "Page 1 - "predicci贸n": [response['data'][0]['name']], - "confianza": [response['data'][0]['confidence'] * 100], - "tiempo(s)": [response['time']], - "fecha": [datetime.now().strftime("%Y-%m-%d %H:%M:%S")] - } - - if filtered: - st.error(f':warning: Existe una hoja de control en la p谩gina **{i + 1}** del PDF **{pdf.name}**') - errors.append(f'Existe una hoja de control en la p谩gina **{i + 1}** del PDF **{pdf.name}**') - data["filtros"] = ["hoja de control"] + data, response, image_path, name_file_rand = procces_pdf2image_rode(image, pdf.name, version, i) + + if "Hoja de Control" in filters: + filtered = hoja_control(image_path) + if filtered: + st.error(f':warning: Existe una hoja de control en la p谩gina **{i + 1}** del PDF **{pdf.name}**') + errors.append(f'Existe una hoja de control en la p谩gina **{i + 1}** del PDF **{pdf.name}**') + data["filtros"] = ["hoja de control"] if version == "v1": single_model_metrics(response) - dataframe = pd.concat([dataframe, pd.DataFrame(data)], ignore_index=True) + dataframe = dd.concat([dataframe, dd.from_pandas(pd.DataFrame(data), npartitions=1)], axis=0) if response['data'][0]['name'] == "rotado": - bad_dataframe = pd.concat([bad_dataframe, pd.DataFrame(data)], ignore_index=True) + bad_dataframe = dd.concat([bad_dataframe, dd.from_pandas(pd.DataFrame(data), npartitions=1)], axis=0) st.error(f':warning: La P谩gina **{i + 1}** en el PDF est谩 rotada.') if show_image: @@ -160,16 +138,16 @@ def process_pdf_file(uploaded_pdf, show_image, version="v1"): st.divider() - bad_placeholder.dataframe(bad_dataframe) - placeholder.dataframe(dataframe) + placeholder.dataframe(dataframe.compute()) + bad_placeholder.dataframe(bad_dataframe.compute()) + + fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**, tiempo total: **{(datetime.now() - inicio_time).total_seconds()}** Segundos') def main(): if uploaded_file: process_uploaded_images(uploaded_file, show_image, version) - fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**') if uploaded_pdf: process_pdf_file(uploaded_pdf, show_image, version) - fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**') if __name__ == "__main__": main() \ No newline at end of file diff --git a/frontend/requirements.txt b/frontend/requirements.txt index 8d90c8f..ceb67e5 100644 --- a/frontend/requirements.txt +++ b/frontend/requirements.txt @@ -6,4 +6,5 @@ matplotlib opencv-python sentry-sdk numpy -pytesseract \ No newline at end of file +pytesseract +dask \ No newline at end of file diff --git a/frontend/resources/RoDeProc.py b/frontend/resources/RoDeProc.py new file mode 100644 index 0000000..b726e77 --- /dev/null +++ b/frontend/resources/RoDeProc.py @@ -0,0 +1,45 @@ +from .process_files import ImageProccesing +from datetime import datetime +import streamlit as st +import random + +def procces_image_rode(image, name, version="v1"): + response = ImageProccesing("rode").process_file(image, version) + + # cambiar nombres a espa帽ol + response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado" + response['data'][1]['name'] = "rotado" if response['data'][1]['name'] == "rotated" else "no rotado" + + data = { + "archivo": [name], + "predicci贸n": [response['data'][0]['name']], + "confianza": [response['data'][0]['confidence'] * 100], + "tiempo(s)": [response['time']], + "fecha": [datetime.now().strftime("%Y-%m-%d %H:%M:%S")] + } + + return data, response + +def procces_pdf2image_rode(image, name, version="v1", i=0): + name_file_rand = f'temp/{"".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=10))}.jpg' + image.save(name_file_rand) + image_path = name_file_rand + + with open(image_path, "rb") as image: + response = ImageProccesing("rode").process_file(image, version) + + #change names to spanish + response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado" + + st.caption(f"Pagina {i + 1} del PDF {name}") + + data = { + "archivo": [name], + "pagina": [f'Page {i + 1}'], # "Page 1 + "predicci贸n": [response['data'][0]['name']], + "confianza": [response['data'][0]['confidence'] * 100], + "tiempo(s)": [response['time']], + "fecha": [datetime.now().strftime("%Y-%m-%d %H:%M:%S")] + } + + return data, response, image_path, name_file_rand diff --git a/frontend/resources/__init__.py b/frontend/resources/__init__.py index 300f8e8..11d5cf8 100644 --- a/frontend/resources/__init__.py +++ b/frontend/resources/__init__.py @@ -1,3 +1,4 @@ from .display_metrics import single_model_metrics from .process_files import ImageProccesing -from .filters import hoja_control \ No newline at end of file +from .filters import hoja_control +from .RoDeProc import procces_image_rode, procces_pdf2image_rode \ No newline at end of file diff --git a/frontend/resources/display_metrics.py b/frontend/resources/display_metrics.py index 48f7a3c..9620f00 100644 --- a/frontend/resources/display_metrics.py +++ b/frontend/resources/display_metrics.py @@ -7,4 +7,4 @@ def single_model_metrics(response): prediction, confidence, tiempo = st.columns(3) prediction.metric("Predicci贸n", response['data'][0]['name']) confidence.metric("Confianza", f"{response['data'][0]['confidence'] * 100} %") - tiempo.metric("Tiempo", f"{round(response['time'], 3)} s") \ No newline at end of file + tiempo.metric("Tiempo", f"{round(response['time'], 2)} s") \ No newline at end of file