Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sylvanie85/issue74 Batch import backend endpoints, tests and documentation update #91

Merged
merged 27 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
a6c542e
Merge commit 'a501fcabf59cff2a70927abb6d9f7b1affb590bd' into sylvanie…
sylvanie85 May 29, 2024
20d645d
Seeds endpoint Fixes #88
sylvanie85 May 29, 2024
7d1de49
Merge branch 'sylvanie85/issue88' into sylvanie85/issue87
sylvanie85 May 29, 2024
44471db
feedback routes and fixes in azure_storage
sylvanie85 May 30, 2024
4265bc1
datastore connectivity update #87
sylvanie85 May 31, 2024
c01724d
Merge branch 'sylvanie85/issue85' into sylvanie85/issue87
sylvanie85 May 31, 2024
deb083a
feeback routes
sylvanie85 May 31, 2024
d2493c7
negative feedback endpoint #87
sylvanie85 Jun 3, 2024
661a912
Fixes #87
sylvanie85 Jun 3, 2024
36223df
Merge branch 'sylvanie85/issue85' into sylvanie85/issue87
sylvanie85 Jun 3, 2024
2575045
Merge branch 'sylvanie85/issue87' into sylvanie85/issue74
sylvanie85 Jun 3, 2024
c0456d8
datastore connectivity update #87
sylvanie85 Jun 4, 2024
8865c1b
datastore connectivity update #74
sylvanie85 Jun 4, 2024
1ebbc97
update doc #74
sylvanie85 Jun 5, 2024
9883b44
Positive Feedback endpoint #87
sylvanie85 Jun 7, 2024
469524d
Merge branch 'sylvanie85/issue85' into sylvanie85/issue87
sylvanie85 Jun 7, 2024
f92ae48
Merge remote-tracking branch 'origin/main' into sylvanie85/issue87
sylvanie85 Jun 7, 2024
36b72d4
Merge branch 'sylvanie85/issue87' into sylvanie85/issue74
sylvanie85 Jun 7, 2024
c2d1247
create picture set #74
sylvanie85 Jun 10, 2024
24a0292
Implement API route for picture batch import
sylvanie85 Jun 12, 2024
e4e86a1
Fix get_user_id
sylvanie85 Jun 12, 2024
46bcab1
Merge branch 'sylvanie85/issue87' into sylvanie85/issue74
sylvanie85 Jun 12, 2024
c22afa6
Implement API route for picture batch import
sylvanie85 Jun 12, 2024
1236f86
update batch import doc
sylvanie85 Jun 17, 2024
26fec7b
fixes and unit tests #74
sylvanie85 Jun 17, 2024
8f4e29a
Fixes upload picture endpoint #74
sylvanie85 Jun 18, 2024
b9f588c
Lint Fixes
sylvanie85 Jun 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 101 additions & 10 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class EmailNotSendError(APIErrors):
pass


class EmptyPictureSetError(APIErrors):
class BatchImportError(APIErrors):
pass


Expand Down Expand Up @@ -165,7 +165,7 @@ async def before_serving():
raise ServerError("Incorrect environment variable: PIPELINE_VERSION")

# Store the seeds names and ml structure in CACHE
CACHE["seeds"] = datastore.get_all_seeds_names()
CACHE["seeds"] = datastore.get_all_seeds()
CACHE["endpoints"] = await get_pipelines()

print(
Expand All @@ -181,7 +181,7 @@ async def before_serving():
raise


@app.get("/get-user-id")
@app.post("/get-user-id")
async def get_user_id() :
"""
Returns the user id
Expand Down Expand Up @@ -355,7 +355,7 @@ async def inference_request():
container_name = data["container_name"]
imageDims = data["imageDims"]
image_base64 = data["image"]
user_id = data["userId"]
user_id = container_name

area_ratio = data.get("area_ratio", 0.5)
color_format = data.get("color_format", "hex")
Expand Down Expand Up @@ -480,6 +480,7 @@ async def get_seeds():
Returns JSON containing the model seeds metadata
"""
seeds = await datastore.get_all_seeds()
CACHE["seeds"] = seeds
if seeds :
return jsonify(seeds), 200
else:
Expand All @@ -501,9 +502,12 @@ async def feedback_positive():
data = await request.get_json()
user_id = data["userId"]
inference_id = data["inferenceId"]
boxes_id = data["boxes"][0]
boxes_id = [item['boxId'] for item in data["boxes"]]
if inference_id and user_id and boxes_id:
await datastore.save_perfect_feedback(inference_id, user_id, boxes_id)
connection = datastore.get_connection()
cursor = datastore.get_cursor(connection)
await datastore.save_perfect_feedback(cursor, inference_id, user_id, boxes_id)
datastore.end_query(connection, cursor)
return jsonify([True]), 200
else:
raise APIErrors("missing argument(s)")
Expand All @@ -526,17 +530,104 @@ async def feedback_negative():
"""
try:
data = await request.get_json()
inference_feedback = data["inferenceFeedback"]
user_id = data["userId"]
inference_id = data["inferenceId"]
boxes_id = data["boxes"][0]
if inference_id and user_id and boxes_id and inference_feedback :
await datastore.save_annoted_feedback(inference_id, user_id, boxes_id, inference_feedback)
boxes = data["boxes"]
if inference_id and user_id and boxes :
connection = datastore.get_connection()
cursor = datastore.get_cursor(connection)
await datastore.save_annoted_feedback(inference_id, user_id, boxes)
datastore.end_query(connection, cursor)
return jsonify([True]), 200
else:
raise APIErrors("missing argument(s)")
except (KeyError, TypeError, APIErrors) as error:
return jsonify([f"APIErrors while sending the inference feedback: {str(error)}"]), 400


@app.post("/new-batch-import")
async def new_batch_import():
"""
Uploads pictures to the user's container
"""
try:
data = await request.get_json()

if not ("container_name" in data and "nb_pictures" in data):
raise BatchImportError(
"missing request arguments: either container_name or nb_pictures is missing")

container_name = data["container_name"]
user_id = container_name
nb_pictures = data["nb_pictures"]

if not container_name or not(isinstance(nb_pictures, int)) or nb_pictures <= 0 :
raise BatchImportError(
"wrong request arguments: either container_name or nb_pictures is wrong")

container_client = await azure_storage.mount_container(
CONNECTION_STRING, container_name, create_container=True
)

connection = datastore.get_connection()
cursor = datastore.get_cursor(connection)
picture_set_id = await datastore.create_picture_set(cursor, container_client, user_id, nb_pictures)
datastore.end_query(connection, cursor)
if picture_set_id:
return jsonify({"session_id" : picture_set_id}), 200
else:
raise APIErrors("failed to create picture set")

except (KeyError, TypeError, APIErrors, azure_storage.MountContainerError, datastore.DatastoreError) as error:
return jsonify([f"APIErrors while initiating the batch import: {str(error)}"]), 400


@app.post("/upload-picture")
async def upload_picture():
"""
Uploads pictures to the user's container
"""
try:
data = await request.get_json()

if not ("container_name" in data and "seed_name" in data and "image" in data and "session_id" in data):
raise BatchImportError(
"missing request arguments: either seed_name, session_id, container_name or image is missing")

container_name = data["container_name"]
user_id = container_name
seed_name = data["seed_name"]
zoom_level = data["zoom_level"]
nb_seeds = data["nb_seeds"]
image_base64 = data["image"]
picture_set_id = data["session_id"]

if not (container_name and seed_name and image_base64 and picture_set_id):
raise BatchImportError(
"wrong request arguments: either seed_name, session_id, container_name or image is wrong")

container_client = await azure_storage.mount_container(
CONNECTION_STRING, container_name, create_container=True
)

_, encoded_data = image_base64.split(",", 1)

image_bytes = base64.b64decode(encoded_data)
image_hash_value = await azure_storage.generate_hash(image_bytes)

connection = datastore.get_connection()
cursor = datastore.get_cursor(connection)
response = await datastore.upload_pictures(cursor, user_id, picture_set_id, container_client, [image_hash_value], seed_name, zoom_level, nb_seeds)
datastore.end_query(connection, cursor)

if response:
return jsonify([True]), 200
else:
raise APIErrors("failed to upload pictures")
except (KeyError, TypeError, APIErrors, azure_storage.MountContainerError, BatchImportError) as error:
return jsonify([f"APIErrors while uploading pictures: {str(error)}"]), 400


@app.get("/health")
async def health():
return "ok", 200
Expand Down
69 changes: 53 additions & 16 deletions docs/nachet-batch-import-documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,27 @@ also enhances the system’s overall efficiency and usability.

## Prerequisites

- The user must be signed in and have an Azure Storage Container
- The backend need to have a connection with the datastore

## Solution

To meet users' need to upload a batch of pictures in the blob storage using the
Nachet interface we need to implement different endpoints in the backend. First
of all, we need to create a folder in the user container. In the database this
will be related to the picture_set table. Once we have the identifier of a
picture_set, it will be used by the front-end to send each image, one by one, to
the second end-point, mentioning the picture_set it belongs to. Each image is
then uploaded to blob storage and a row is added to the database's picture
table.

As we're downloading images one by one, we could run into problems if we have to
import a very large number of images, which could take a long time. For the
moment, we're implementing a first version of batch import with the front-end
calling the back-end for each image, but we may have to cache the images in the
back-end and send them in batches to the datastore depending on the quantity of
images to be downloaded.

## Sequence Diagram

```mermaid
Expand All @@ -31,32 +50,50 @@ sequenceDiagram;

User ->>+Frontend: Upload session request
Frontend->>+Backend: HTTP Post Req.
Backend->>+Datastore: get_all_seeds_names(cursor)
Datastore-->>-Backend: seed_names res.
Backend-->>-Frontend: seedNames res.
Backend->>+Datastore: get_all_seeds(cursor)
Datastore-->>-Backend: seeds res.
Backend-->>-Frontend: seeds res.
Frontend -->>-User: Show session form
User -) User: Fill form :<br> Seed selection, nb Seeds/Pic, Zoom
User -)+Frontend: Upload: session folder
User -)+Frontend: Upload: folder of pictures
Frontend ->>+Backend: HTTP Post Req.
Backend->>+Datastore: is_user_registered(cursor, email)
Datastore-->>-Backend: user_id res.
Backend -)Datastore: upload_picture_set (cursor, pictures, user_id, **data)
Note over Backend, Datastore: data contains at least the <br> following value: seed_name, zoom_level, nb_seeds
Backend -)+ Datastore: create_picture_set (cursor, user_id, container_client, nb_pictures)
Datastore --)- Backend : picture_set_id
Backend -)Datastore: upload_picture(cursor, container_client, encoded_picture, picture_set_id, **data)
Note over Backend, Datastore: data contains at least the following <br> value: seed_name, zoom_level, nb_seeds
Backend -->>- Frontend : picture_set_id
loop for each picture to upload
Frontend ->>+Backend: HTTP Post Req. (with picture_set_id)
Backend -)Datastore: upload_picture(cursor, container_client, encoded_picture, picture_set_id, **data)
Note over Backend, Datastore: data contains at least the following <br> value: seed_name, zoom_level, nb_seeds
end
```

The complete diagram is part of the datastore documentation. You can see it
here:

[Trusted user upload process](https://github.com/ai-cfia/nachet-datastore/blob/issue13-create-process-to-upload-metadata-for-trusted-users/doc/trusted-user-upload.md)
[Trusted user upload
process](https://github.com/ai-cfia/nachet-datastore/blob/issue13-create-process-to-upload-metadata-for-trusted-users/doc/trusted-user-upload.md)

## API Routes

### /get-user-id

The `get-user-id` route retrieve the user-id for a given email.

### /seeds

### API Route
The `seeds` is the route to call to get the all the seeds names needed for the
frontend to build the form to upload the pictures to the database.

#### /picture-form
### /new-batch-import

The `picture-form` is the route to call to get all the information needed for
the frontend to build the form to upload the pictures to the database.
The `/new-batch-import` route is the endpoint that the frontend call to start a
batch import. It save the number of pictures of the import and it return the new
picture_set_id as a session id

#### /upload-pictures
### /upload-picture

The `/upload-pictures` route is the API endpoint responsible to assure the transit
of the picture to the database.
The `/upload-pictures` route is the API endpoint responsible to assure the
transit of the picture to the database. The frontend might send the session id
so the picture is associate to the right picture_set
4 changes: 2 additions & 2 deletions model/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ async def request_inference_from_test(model: namedtuple, previous_result: str):
"bottomX": 0.86,
"bottomY": 0.56
},
"label": "test_label",
"label": "Ambrosia artemisiifolia",
"score": 1.0,
"topN": [
{
"label": "test_label",
"label": "Ambrosia artemisiifolia",
"score": 1.0,
},
],
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ cryptography
pyyaml
pydantic
python-magic
nachet-datastore @git+https://github.com/ai-cfia/nachet-datastore.git@main
nachet-datastore @git+https://github.com/ai-cfia/nachet-datastore.git@main #TODO : set branch on main (actually on the last commit of the positive feedback issue)
43 changes: 22 additions & 21 deletions storage/datastore_storage_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,29 +74,36 @@ def get_user_id(email: str) -> str:
else :
raise UserNotFoundError("User not found")

async def validate_user(cursor, email: str, connection_string) -> datastore.User:
async def create_user(email: str, connection_string) -> datastore.User:
"""
Return True if user is valid, False otherwise
"""
if user_datastore.is_user_registered(cursor, email):
user = datastore.get_User(email, cursor)
else :
user = await datastore.new_user(cursor, email, connection_string)
connection = get_connection()
cursor = get_cursor(connection)
user = await datastore.new_user(cursor, email, connection_string)
end_query(connection, cursor)
return user


async def get_picture_id(cursor, user_id, image_hash_value, container_client) :
"""
Return the picture_id of the image
"""
picture_id = await datastore.upload_picture(cursor, str(user_id), image_hash_value, container_client)
picture_id = await datastore.upload_picture_unknown(cursor, str(user_id), image_hash_value, container_client)
return picture_id

def upload_picture_set(**kwargs):
connection = get_connection()
cursor = get_cursor(connection)
return datastore.bin.upload_picture_set.upload_picture_set(cursor, **kwargs)

def upload_pictures(cursor, user_id, picture_set_id, container_client, pictures, seed_name: str, zoom_level: float = None, nb_seeds: int = None) :
try :
return datastore.upload_pictures(cursor, user_id, picture_set_id, container_client, pictures, seed_name, zoom_level, nb_seeds)
except Exception as error:
raise DatastoreError(error)

async def create_picture_set(cursor, container_client, user_id: str, nb_pictures: int):
try :
return await datastore.create_picture_set(cursor, container_client, nb_pictures, user_id)
except Exception as error:
raise DatastoreError(error)

async def get_pipelines() -> list:

"""
Expand All @@ -113,14 +120,8 @@ async def get_pipelines() -> list:
async def save_inference_result(cursor, user_id:str, inference_dict, picture_id:str, pipeline_id:str, type:int):
return await datastore.register_inference_result(cursor, user_id, inference_dict, picture_id, pipeline_id, type)

async def save_perfect_feedback(inference_id:str, user_id:str):
# peut-être --> user_id = user.get_user_id(cursor, email) (genre j'ai l'email et pas le id direct)
connection = get_connection()
cursor = get_cursor(connection)
await datastore.register_perfect_inference_feeback(inference_id, user_id, cursor)
async def save_perfect_feedback(cursor, inference_id:str, user_id:str, boxes_id):
await datastore.new_perfect_inference_feeback(cursor, inference_id, user_id, boxes_id)

async def save_annoted_feedback(inference_id:str, user_id:str, inference_feedback:dict):
# peut-être --> user_id = user.get_user_id(cursor, email) (genre j'ai l'email et pas le id direct)
connection = get_connection()
cursor = get_cursor(connection)
await datastore.register_annoted_inference_feeback(inference_id, user_id, inference_feedback, cursor)
async def save_annoted_feedback(cursor, inference_id:str, user_id:str, boxes):
await datastore.new_annoted_inference_feeback(cursor, inference_id, user_id, boxes)
Loading
Loading