Skip to content

Commit

Permalink
Refactor file handling and add new code in backend and frontend
Browse files Browse the repository at this point in the history
  • Loading branch information
Erik172 committed May 6, 2024
1 parent f25e0fd commit 38d5df9
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 49 deletions.
1 change: 0 additions & 1 deletion backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
basedir = os.path.abspath(os.path.dirname(__file__))

app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] ='sqlite:///' + os.path.join(basedir, 'database.db')
api = Api(app)

api.add_resource(Works, "/works")
Expand Down
6 changes: 6 additions & 0 deletions backend/database.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import pymongo

def get_database():
"""
Returns the MongoDB database object for the DESD application.
Returns:
pymongo.database.Database: The MongoDB database object.
"""
client = pymongo.MongoClient("mongodb://localhost:27017/")
return client["DESD"]
3 changes: 2 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ pandas
torch
ultralytics>=8.1.45
sentry-sdk
pymongo
pymongo
pytesseract
10 changes: 7 additions & 3 deletions backend/resources/RoDe.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,15 @@ def post(self):
if "filtros" in response:
response["filtros"].append("hoja de control")
else:
response["filtros"] = ["Hoja de Control"]
response["filtros"] = ["hoja de control"]

r_form = request.form.to_dict()
r_form.pop("filtros")
# unir response y request.form
documento = request.form | response
documento = r_form | response
documento['prediccion'] = response['data'][0]['name']
documento['confianza'] = response['data'][0]['confidence']
documento.pop("data")
doc_id = work.save(documento)
response["_id"] = str(doc_id)

Expand All @@ -61,5 +66,4 @@ def post(self):
)

os.remove(file_name)

return jsonify(response)
10 changes: 10 additions & 0 deletions backend/src/data_validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
def data_file_validation(request):
"""
Validate the data in the request object and provide default values if necessary.
Args:
request (object): The request object containing the data.
Returns:
dict: A dictionary containing the validated data.
"""
if request.method == "POST":
if "work_id" not in request.form:
request.form["work_id"] = "rode_test"
Expand Down
11 changes: 10 additions & 1 deletion backend/src/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,19 @@
import cv2

def hoja_control(image) -> bool:
"""
Check if the given image contains the text "hoja de control" in the first 44 characters.
Args:
image: The image to be processed. It can be either a bytes object representing the image data or a string representing the file path.
Returns:
bool: True if the text "hoja de control" is found in the image, False otherwise.
"""
if type(image) == bytes:
image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
elif type(image) == str:
image = cv2.imread(image, cv2.IMREAD_COLOR)

text = pytesseract.image_to_string(image)
return "hoja de control" in text[:44].lower()
1 change: 0 additions & 1 deletion frontend/pages/auditoria.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from resources import (
single_model_metrics,
hoja_control,
procces_image_rode,
procces_pdf2image_rode
)
Expand Down
80 changes: 53 additions & 27 deletions frontend/pages/rode.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from resources import (
single_model_metrics,
hoja_control,
procces_image_rode,
procces_pdf2image_rode
)
Expand All @@ -20,6 +19,9 @@

st.title("RoDe (Rotation Detection) Detección de rotación 🔄")

work_id_default = f"rode_{datetime.now().strftime('%Y%m%d%H%M%S')}"
work_id = st.text_input("Identificador de trabajo", placeholder=f"Identificador de trabajo (Opcional)")

version = "v1"

filters = st.multiselect(
Expand All @@ -46,12 +48,13 @@
def process_uploaded_images(uploaded_file, show_image, version="v1"):
global bad_dataframe
global dataframe
global work_id, work_id_default

errors = []

with st.spinner(f"Procesando {len(uploaded_file)} imágenes..."):
# work_id = f"rode_{datetime.now().strftime('%Y%m%d%H%M%S')}_{len(uploaded_file)}"
work_id = 'rode_testing'
if not work_id:
work_id = work_id_default

st.info(f'Identificador de trabajo: **{work_id}**')
st.info(f'Procesando **{len(uploaded_file)}** imágenes.')
Expand All @@ -71,17 +74,13 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"):

data, response = procces_image_rode(image, file.name, version, data_file)

# if "Hoja de Control" in filters:
# filtered = hoja_control(image)

# if filtered:
# st.toast(f'Existe una hoja de control en la imagen **{file.name}**', icon="⚠️")
# errors.append(f'Existe una hoja de control en la imagen **{file.name}**')
# data["filtros"] = ["hoja de control"]
if "filtros" not in response:
response["filtros"] = False

if "Hoja de Control" in response['filtros']:
if "hoja de control" in response['filtros']:
st.error(f':warning: Existe una hoja de control en la imagen "**{file.name}**"')
errors.append(f'Existe una hoja de control en la imagen "**{file.name}**"')
data["filtros"] = ["hoja de control"]

st.caption(file.name)

Expand Down Expand Up @@ -109,38 +108,70 @@ def process_uploaded_images(uploaded_file, show_image, version="v1"):


def process_pdf_file(uploaded_pdf, show_image, version="v1"):
global work_id, work_id_default
global bad_dataframe
global dataframe

errors = []

with st.spinner(f"Procesando {len(uploaded_pdf)} PDFs..."):
if not work_id:
work_id = work_id_default

st.info(f'Identificador de trabajo: **{work_id}**')
st.info(f'Procesando **{len(uploaded_pdf)}** PDFs.')
st.info(f'Inicio del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**')
inicio_time = datetime.now()
fin_process = st.empty()

if len(uploaded_pdf) > 3:
st.warning(f":warning: solo se mostrarán los resultados con problemas, para ver todos los resultados puede ir a la pagina de **trabajos** y seleccionar el trabajo: **{work_id}**")

st.divider()

for pdf in uploaded_pdf:
images = convert_from_bytes(pdf.read())
for i, image in enumerate(images):
data, response, image_path, name_file_rand = procces_pdf2image_rode(image, pdf.name, version, i)

data_file = {
"work_id": work_id,
"archivo": pdf.name,
"tipo": "pdf",
"pagina": i + 1,
"filtros": [f for f in filters]
}

data, response, image_path, name_file_rand = procces_pdf2image_rode(image, pdf.name, version, i, data_file)

if "Hoja de Control" in filters:
filtered = hoja_control(image_path)
if filtered:
st.error(f':warning: Existe una hoja de control en la página **{i + 1}** del PDF **{pdf.name}**')
errors.append(f'Existe una hoja de control en la página **{i + 1}** del PDF **{pdf.name}**')
if "filtros" in response:
if "hoja de control" in response['filtros']:
st.error(f':warning: Existe una hoja de control en la página **{i + 1}** del PDF "**{pdf.name}**"')
errors.append(f'Existe una hoja de control en la página **{i + 1}** del PDF "**{pdf.name}**"')
data["filtros"] = ["hoja de control"]

if version == "v1":
single_model_metrics(response)
dataframe = pd.concat([dataframe, pd.DataFrame(data)], axis=0, ignore_index=True)
if len(uploaded_pdf) < 3:
st.caption(f"Pagina {i + 1} del PDF {pdf.name}")
single_model_metrics(response)
dataframe = pd.concat([dataframe, pd.DataFrame(data)], axis=0, ignore_index=True)
placeholder.dataframe(dataframe)


if response['data'][0]['name'] == "rotado" or data.get("filtros"):
if len(uploaded_pdf) >= 3:
st.caption(f"Pagina {i + 1} del PDF {pdf.name}")
single_model_metrics(response)

if response['data'][0]['name'] == "rotado":
bad_dataframe = pd.concat([bad_dataframe, pd.DataFrame(data)], axis=0, ignore_index=True)
st.error(f':warning: La Página **{i + 1}** en el PDF está rotada.')
bad_placeholder.dataframe(bad_dataframe)

if response['data'][0]['name'] == "rotado":
st.error(f':warning: La Página **{i + 1}** en el PDF está rotada.')

if show_image:
if show_image and len(uploaded_pdf) >= 3:
st.image(image_path, use_column_width=True, caption="Uploaded Image", output_format="JPEG")

if show_image and len(uploaded_pdf) < 3:
st.image(image_path, use_column_width=True, caption="Uploaded Image", output_format="JPEG")

if errors:
Expand All @@ -151,11 +182,6 @@ def process_pdf_file(uploaded_pdf, show_image, version="v1"):
except PermissionError:
print(f"Error al eliminar el archivo {image_path}")

st.divider()

placeholder.dataframe(dataframe)
bad_placeholder.dataframe(bad_dataframe)

fin_process.info(f'Fin del procesamiento: **{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}**, tiempo total: **{(datetime.now() - inicio_time).total_seconds()}** Segundos')

def main():
Expand Down
3 changes: 1 addition & 2 deletions frontend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@ pdf2image
matplotlib
opencv-python
sentry-sdk
numpy
pytesseract
numpy
4 changes: 1 addition & 3 deletions frontend/resources/RoDeProc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,11 @@ def procces_pdf2image_rode(image, name, version="v1", i=0, data_file: dict = {})
image_path = name_file_rand

with open(image_path, "rb") as image:
response = ImageProccesing("rode").process_file(image, version)
response = ImageProccesing("rode").process_file(image, version, data_file)

#change names to spanish
response['data'][0]['name'] = "rotado" if response['data'][0]['name'] == "rotated" else "no rotado"

st.caption(f"Pagina {i + 1} del PDF {name}")

data = {
"archivo": [name],
"pagina": [f'Page {i + 1}'], # "Page 1
Expand Down
1 change: 0 additions & 1 deletion frontend/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from .display_metrics import single_model_metrics
from .process_files import ImageProccesing
from .filters import hoja_control
from .RoDeProc import procces_image_rode, procces_pdf2image_rode
9 changes: 0 additions & 9 deletions frontend/resources/filters.py

This file was deleted.

0 comments on commit 38d5df9

Please sign in to comment.