Skip to content

Commit

Permalink
Refactor file handling and add dependencies in frontend/requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
Erik172 committed May 5, 2024
1 parent e211397 commit 4ec1a08
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 70 deletions.
112 changes: 45 additions & 67 deletions frontend/pages/rode.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from pdf2image import convert_from_bytes
from datetime import datetime
import dask.dataframe as dd
import streamlit as st
import pandas as pd
import random
import os

from resources import (
ImageProccesing,
single_model_metrics,
hoja_control
hoja_control,
procces_image_rode,
procces_pdf2image_rode
)


st.set_page_config(
page_title="RoDe (Rotation Detection)",
page_icon="🔄",
Expand All @@ -22,6 +22,12 @@
st.title("RoDe (Rotation Detection) Detección de rotación 🔄")

version = "v1"

filters = st.multiselect(
"Selecciona los filtros a utilizar",
["Hoja de Control", "Hoja en Blanco"],
["Hoja de Control"]
)

show_image = st.checkbox("Mostrar imagenes", value=False)
uploaded_file = st.file_uploader("Upload image(s)", type=["jpg", "jpeg", "png", "tif", "tiff"], accept_multiple_files=True)
Expand All @@ -35,14 +41,8 @@

alerts = st.empty()

fin_process = st.empty()

dataframe = pd.DataFrame(columns=["archivo", "predicción", "confianza", "tiempo(s)"])
bad_dataframe = pd.DataFrame(columns=["archivo", "predicción", "confianza", "tiempo(s)"])

with st.container():
bad_placeholder.dataframe(bad_dataframe)
placeholder.dataframe(dataframe)
dataframe = dd.from_pandas(pd.DataFrame(columns=["archivo", "predicción", "confianza", "tiempo(s)"]), npartitions=1)
bad_dataframe = dd.from_pandas(pd.DataFrame(columns=["archivo", "predicción", "confianza", "tiempo(s)"]), npartitions=1)

def process_uploaded_images(uploaded_file, show_image, version="v1"):
global bad_dataframe
Expand All @@ -53,38 +53,30 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"):
with st.spinner("Procesando..."):
st.info(f'Procesando **{len(uploaded_file)}** imágenes.')
st.info(f'Inicio del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**')
inicio_time = datetime.now()
fin_process = st.empty()

for file in uploaded_file:
image = file.read()
response = ImageProccesing("rode").process_file(image, version, file.name)
filtered = hoja_control(image)

# cambiar nombres a español
response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado"
response['data'][1]['name'] = "rotado" if response['data'][1]['name'] == "rotated" else "no rotado"

data = {
"archivo": [file.name],
"predicción": [response['data'][0]['name']],
"confianza": [response['data'][0]['confidence'] * 100],
"tiempo(s)": [response['time']],
"fecha": [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
}

if filtered:
st.toast(f'Existe una hoja de control en la imagen **{file.name}**', icon="⚠️")
errors.append(f'Existe una hoja de control en la imagen **{file.name}**')
data["filtros"] = ["hoja de control"]
data, response = procces_image_rode(image, file.name, version)

if "Hoja de Control" in filters:
filtered = hoja_control(image)

if filtered:
st.toast(f'Existe una hoja de control en la imagen **{file.name}**', icon="⚠️")
errors.append(f'Existe una hoja de control en la imagen **{file.name}**')
data["filtros"] = ["hoja de control"]

st.caption(file.name)

if version == "v1":
single_model_metrics(response)

dataframe = pd.concat([dataframe, pd.DataFrame(data)], ignore_index=True)
dataframe = dd.concat([dataframe, dd.from_pandas(pd.DataFrame(data), npartitions=1)], axis=0)

if response['data'][0]['name'] == "rotado":
bad_dataframe = pd.concat([bad_dataframe, pd.DataFrame(data)], ignore_index=True)
bad_dataframe = dd.concat([bad_dataframe, dd.from_pandas(pd.DataFrame(data), npartitions=1)], axis=0)
st.error(f':warning: La imagen "**{file.name}**" está rotada.')

if show_image:
Expand All @@ -95,8 +87,10 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"):

st.divider()

placeholder.dataframe(dataframe)
bad_placeholder.dataframe(bad_dataframe)
placeholder.dataframe(dataframe.compute())
bad_placeholder.dataframe(bad_dataframe.compute())

fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**, tiempo total (Segundos): **{round((datetime.now() - inicio_time).total_seconds(), 2)}**')


def process_pdf_file(uploaded_pdf, show_image, version="v1"):
Expand All @@ -108,43 +102,27 @@ def process_pdf_file(uploaded_pdf, show_image, version="v1"):
with st.spinner(f"Procesando {len(uploaded_pdf)} PDFs..."):
st.info(f'Procesando **{len(uploaded_pdf)}** PDFs.')
st.info(f'Inicio del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**')
inicio_time = datetime.now()
fin_process = st.empty()

for pdf in uploaded_pdf:
images = convert_from_bytes(pdf.read())
for i, image in enumerate(images):
name_file_rand = f'temp/{"".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=10))}.jpg'
image.save(name_file_rand)
image_path = name_file_rand
filtered = hoja_control(image)

with open(image_path, "rb") as image:
response = ImageProccesing("rode").process_file(image, version, pdf.name, i + 1, "pdf")

#change names to spanish
response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado"

st.caption(f"Pagina {i + 1} del PDF {pdf.name}")

data = {
"archivo": [pdf.name],
"pagina": [f'Page {i + 1}'], # "Page 1
"predicción": [response['data'][0]['name']],
"confianza": [response['data'][0]['confidence'] * 100],
"tiempo(s)": [response['time']],
"fecha": [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
}

if filtered:
st.error(f':warning: Existe una hoja de control en la página **{i + 1}** del PDF **{pdf.name}**')
errors.append(f'Existe una hoja de control en la página **{i + 1}** del PDF **{pdf.name}**')
data["filtros"] = ["hoja de control"]
data, response, image_path, name_file_rand = procces_pdf2image_rode(image, pdf.name, version, i)

if "Hoja de Control" in filters:
filtered = hoja_control(image_path)
if filtered:
st.error(f':warning: Existe una hoja de control en la página **{i + 1}** del PDF **{pdf.name}**')
errors.append(f'Existe una hoja de control en la página **{i + 1}** del PDF **{pdf.name}**')
data["filtros"] = ["hoja de control"]

if version == "v1":
single_model_metrics(response)
dataframe = pd.concat([dataframe, pd.DataFrame(data)], ignore_index=True)
dataframe = dd.concat([dataframe, dd.from_pandas(pd.DataFrame(data), npartitions=1)], axis=0)

if response['data'][0]['name'] == "rotado":
bad_dataframe = pd.concat([bad_dataframe, pd.DataFrame(data)], ignore_index=True)
bad_dataframe = dd.concat([bad_dataframe, dd.from_pandas(pd.DataFrame(data), npartitions=1)], axis=0)
st.error(f':warning: La Página **{i + 1}** en el PDF está rotada.')

if show_image:
Expand All @@ -160,16 +138,16 @@ def process_pdf_file(uploaded_pdf, show_image, version="v1"):

st.divider()

bad_placeholder.dataframe(bad_dataframe)
placeholder.dataframe(dataframe)
placeholder.dataframe(dataframe.compute())
bad_placeholder.dataframe(bad_dataframe.compute())

fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**, tiempo total: **{(datetime.now() - inicio_time).total_seconds()}** Segundos')

def main():
if uploaded_file:
process_uploaded_images(uploaded_file, show_image, version)
fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**')
if uploaded_pdf:
process_pdf_file(uploaded_pdf, show_image, version)
fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**')

if __name__ == "__main__":
main()
3 changes: 2 additions & 1 deletion frontend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ matplotlib
opencv-python
sentry-sdk
numpy
pytesseract
pytesseract
dask
45 changes: 45 additions & 0 deletions frontend/resources/RoDeProc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from .process_files import ImageProccesing
from datetime import datetime
import streamlit as st
import random

def procces_image_rode(image, name, version="v1"):
response = ImageProccesing("rode").process_file(image, version)

# cambiar nombres a español
response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado"
response['data'][1]['name'] = "rotado" if response['data'][1]['name'] == "rotated" else "no rotado"

data = {
"archivo": [name],
"predicción": [response['data'][0]['name']],
"confianza": [response['data'][0]['confidence'] * 100],
"tiempo(s)": [response['time']],
"fecha": [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
}

return data, response

def procces_pdf2image_rode(image, name, version="v1", i=0):
name_file_rand = f'temp/{"".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=10))}.jpg'
image.save(name_file_rand)
image_path = name_file_rand

with open(image_path, "rb") as image:
response = ImageProccesing("rode").process_file(image, version)

#change names to spanish
response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado"

st.caption(f"Pagina {i + 1} del PDF {name}")

data = {
"archivo": [name],
"pagina": [f'Page {i + 1}'], # "Page 1
"predicción": [response['data'][0]['name']],
"confianza": [response['data'][0]['confidence'] * 100],
"tiempo(s)": [response['time']],
"fecha": [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
}

return data, response, image_path, name_file_rand
3 changes: 2 additions & 1 deletion frontend/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .display_metrics import single_model_metrics
from .process_files import ImageProccesing
from .filters import hoja_control
from .filters import hoja_control
from .RoDeProc import procces_image_rode, procces_pdf2image_rode
2 changes: 1 addition & 1 deletion frontend/resources/display_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ def single_model_metrics(response):
prediction, confidence, tiempo = st.columns(3)
prediction.metric("Predicción", response['data'][0]['name'])
confidence.metric("Confianza", f"{response['data'][0]['confidence'] * 100} %")
tiempo.metric("Tiempo", f"{round(response['time'], 3)} s")
tiempo.metric("Tiempo", f"{round(response['time'], 2)} s")

0 comments on commit 4ec1a08

Please sign in to comment.