Skip to content

Commit

Permalink
Merge pull request hotosm#6577 from bshankar/download-csv-optimization
Browse files Browse the repository at this point in the history
Fix: Speed up CSV generation by minimizing DB probes
  • Loading branch information
ramyaragupathy authored Oct 1, 2024
2 parents cae8fb4 + e9b84d4 commit 6b3fb73
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 46 deletions.
4 changes: 2 additions & 2 deletions backend/models/postgis/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,13 @@ class Project(db.Model):
def percent_mapped(self):
return (
(self.tasks_mapped + self.tasks_validated)
/ (self.total_tasks - self.tasks_bad_imagery)
* 100
// (self.total_tasks - self.tasks_bad_imagery)
)

@hybrid_property
def percent_validated(self):
return self.tasks_validated / (self.total_tasks - self.tasks_bad_imagery) * 100
return self.tasks_validated * 100 // (self.total_tasks - self.tasks_bad_imagery)

# Mapped Objects
tasks = db.relationship(
Expand Down
147 changes: 103 additions & 44 deletions backend/services/project_search_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,30 +69,62 @@ def __init__(self, message):

class ProjectSearchService:
@staticmethod
def create_search_query(user=None):
query = (
db.session.query(
Project.id.label("id"),
Project.difficulty,
Project.priority,
Project.default_locale,
Project.centroid.ST_AsGeoJSON().label("centroid"),
Project.organisation_id,
Project.tasks_bad_imagery,
Project.tasks_mapped,
Project.tasks_validated,
Project.status,
Project.total_tasks,
Project.last_updated,
Project.due_date,
Project.country,
Organisation.name.label("organisation_name"),
Organisation.logo.label("organisation_logo"),
def create_search_query(user=None, as_csv: bool = False):
if as_csv:
query = (
db.session.query(
Project.id.label("id"),
ProjectInfo.name.label("project_name"),
Project.difficulty,
Project.priority,
Project.default_locale,
Project.centroid.ST_AsGeoJSON().label("centroid"),
Project.organisation_id,
Project.tasks_bad_imagery,
Project.tasks_mapped,
Project.tasks_validated,
Project.percent_mapped,
Project.percent_validated,
Project.status,
Project.total_tasks,
Project.last_updated,
Project.due_date,
Project.country,
Organisation.name.label("organisation_name"),
Organisation.logo.label("organisation_logo"),
Project.created.label("creation_date"),
func.coalesce(
func.sum(func.ST_Area(Project.geometry, True) / 1000000)
).label("total_area"),
)
.filter(Project.geometry is not None)
.outerjoin(Organisation, Organisation.id == Project.organisation_id)
.group_by(Organisation.id, Project.id, ProjectInfo.name)
)
else:
query = (
db.session.query(
Project.id.label("id"),
Project.difficulty,
Project.priority,
Project.default_locale,
Project.centroid.ST_AsGeoJSON().label("centroid"),
Project.organisation_id,
Project.tasks_bad_imagery,
Project.tasks_mapped,
Project.tasks_validated,
Project.status,
Project.total_tasks,
Project.last_updated,
Project.due_date,
Project.country,
Organisation.name.label("organisation_name"),
Organisation.logo.label("organisation_logo"),
)
.filter(Project.geometry is not None)
.outerjoin(Organisation, Organisation.id == Project.organisation_id)
.group_by(Organisation.id, Project.id)
)
.filter(Project.geometry is not None)
.outerjoin(Organisation, Organisation.id == Project.organisation_id)
.group_by(Organisation.id, Project.id)
)

# Get public projects only for anonymous user.
if user is None:
Expand Down Expand Up @@ -202,35 +234,62 @@ def get_total_contributions(paginated_results):
@staticmethod
@cached(csv_download_cache)
def search_projects_as_csv(search_dto: ProjectSearchDTO, user) -> str:
all_results, _ = ProjectSearchService._filter_projects(search_dto, user)
all_results, _ = ProjectSearchService._filter_projects(search_dto, user, True)
rows = [row._asdict() for row in all_results]
is_user_admin = user is not None and user.role == UserRole.ADMIN.value
results_as_dto = [
ProjectSearchService.create_result_dto(
p,
search_dto.preferred_locale,
Project.get_project_total_contributions(p[0]),
with_partner_names=is_user_admin,
with_author_name=False,
).to_primitive()
for p in all_results
]

df = pd.json_normalize(results_as_dto)
for row in rows:
row["priority"] = ProjectPriority(row["priority"]).name
row["difficulty"] = ProjectDifficulty(row["difficulty"]).name
row["status"] = ProjectStatus(row["status"]).name
row["total_area"] = round(row["total_area"], 3)
row["total_contributors"] = Project.get_project_total_contributions(
row["id"]
)

if is_user_admin:
partners_names = (
ProjectPartnership.query.with_entities(
ProjectPartnership.project_id, Partner.name
)
.join(Partner, ProjectPartnership.partner_id == Partner.id)
.filter(ProjectPartnership.project_id == row["id"])
.group_by(ProjectPartnership.project_id, Partner.name)
.all()
)
row["partner_names"] = [pn for (_, pn) in partners_names]

df = pd.json_normalize(rows)
columns_to_drop = [
"locale",
"shortDescription",
"organisationLogo",
"campaigns",
"default_locale",
"organisation_id",
"organisation_logo",
"tasks_bad_imagery",
"tasks_mapped",
"tasks_validated",
"total_tasks",
"centroid",
]
if not is_user_admin:
columns_to_drop.append("partnerNames")

colummns_to_rename = {
"id": "projectId",
"organisation_name": "organisationName",
"last_updated": "lastUpdated",
"due_date": "dueDate",
"percent_mapped": "percentMapped",
"percent_validated": "percentValidated",
"total_area": "totalArea",
"total_contributors": "totalContributors",
"partner_names": "partnerNames",
"project_name": "name",
}

df.drop(
columns=columns_to_drop,
inplace=True,
axis=1,
)

df.rename(columns=colummns_to_rename, inplace=True)
return df.to_csv(index=False)

@staticmethod
Expand Down Expand Up @@ -278,10 +337,10 @@ def search_projects(search_dto: ProjectSearchDTO, user) -> ProjectSearchResultsD
return dto

@staticmethod
def _filter_projects(search_dto: ProjectSearchDTO, user):
def _filter_projects(search_dto: ProjectSearchDTO, user, as_csv=False):
"""Filters all projects based on criteria provided by user"""

query = ProjectSearchService.create_search_query(user)
query = ProjectSearchService.create_search_query(user, as_csv)

query = query.join(ProjectInfo).filter(
ProjectInfo.locale.in_([search_dto.preferred_locale, "en"])
Expand Down

0 comments on commit 6b3fb73

Please sign in to comment.