Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add search API functionality #2

Closed
wants to merge 10 commits into from
5 changes: 4 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from flask import Flask
from flask_restful import Api
from flask_restx import Api
from flask_cors import CORS

from resources.Search import Search
from resources.User import User
from resources.Login import Login
from resources.RefreshSession import RefreshSession
Expand Down Expand Up @@ -41,6 +43,7 @@ def create_app() -> Flask:
(ResetPassword, "/reset-password"),
(ResetTokenValidation, "/reset-token-validation"),
(QuickSearch, "/quick-search/<search>/<location>"),
(Search, "/search/<string:coarse_record_type>/<string:location>"),
(Archives, "/archives"),
(DataSources, "/data-sources"),
(DataSourcesMap, "/data-sources-map"),
Expand Down
210 changes: 210 additions & 0 deletions middleware/search_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
"""

Check warning on line 1 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L1 <205>

1 blank line required between summary line and description
Raw output
./middleware/search_query.py:1:1: D205 1 blank line required between summary line and description

Check warning on line 1 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L1 <400>

First line should end with a period
Raw output
./middleware/search_query.py:1:1: D400 First line should end with a period
This module contains functions and helper functions
used by the `Search` resource
"""

import spacy
import json
import datetime
from typing import List, Dict, Any
from psycopg2.extensions import connection as PgConnection

# TODO: Create search_query_logs table to complement quick_search_query_logs


def expand_params(single_param: str, times: int):
"""

Check warning on line 16 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L16 <205>

1 blank line required between summary line and description
Raw output
./middleware/search_query.py:16:1: D205 1 blank line required between summary line and description
Expand the single parameter a given number of times.
Used for expanding parameters used repeatedly in SQL parameters
:param single_param: The single parameter to be repeated multiple times.
:param times: The number of times the single parameter should be repeated.
:return: A tuple containing the repeated single_param.
"""
return tuple([single_param] * times)


QUICK_SEARCH_COLUMNS = [
"airtable_uid",
"data_source_name",
"description",
"record_type",
"source_url",
"record_format",
"coverage_start",
"coverage_end",
"agency_supplied",
"agency_name",
"municipality",
"state_iso",
]

QUICK_SEARCH_SQL = """
SELECT
data_sources.airtable_uid,
data_sources.name AS data_source_name,
data_sources.description,
data_sources.record_type,
data_sources.source_url,
data_sources.record_format,
data_sources.coverage_start,
data_sources.coverage_end,
data_sources.agency_supplied,
agencies.name AS agency_name,
agencies.municipality,
agencies.state_iso
FROM
agency_source_link
INNER JOIN
data_sources ON
agency_source_link.airtable_uid = data_sources.airtable_uid
INNER JOIN
agencies ON
agency_source_link.agency_described_linked_uid = agencies.airtable_uid

Check failure on line 62 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L62 <501>

line too long (82 > 79 characters)
Raw output
./middleware/search_query.py:62:80: E501 line too long (82 > 79 characters)
INNER JOIN
state_names ON
agencies.state_iso = state_names.state_iso
WHERE
data_sources.record_type = ANY(%s) AND
(
agencies.county_name LIKE %s OR
substr(agencies.county_name,3,length(agencies.county_name)-4)
|| ' County' LIKE %s
OR agencies.state_iso LIKE %s
OR agencies.municipality LIKE %s
OR agencies.agency_type LIKE %s
OR agencies.jurisdiction_type LIKE %s
OR agencies.name LIKE %s
OR state_names.state_name LIKE %s
)
AND data_sources.approval_status = 'approved'
AND data_sources.url_status not in ('broken', 'none found')
"""

INSERT_LOG_QUERY = """
INSERT INTO search_query_logs
(search, location, results, result_count, created_at, datetime_of_request)
VALUES (%s, %s, %s, %s, %s, %s)
"""


class SearchQueryEngine:
"""

Check warning on line 91 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L91 <205>

1 blank line required between summary line and description
Raw output
./middleware/search_query.py:91:1: D205 1 blank line required between summary line and description

Check warning on line 91 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L91 <400>

First line should end with a period
Raw output
./middleware/search_query.py:91:1: D400 First line should end with a period
A search query engine to perform SQL queries
for searching records based on a search term and location.
"""

def __init__(self, connection: PgConnection):
"""

Check warning on line 97 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L97 <205>

1 blank line required between summary line and description
Raw output
./middleware/search_query.py:97:1: D205 1 blank line required between summary line and description

Check warning on line 97 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L97 <400>

First line should end with a period
Raw output
./middleware/search_query.py:97:1: D400 First line should end with a period

Check warning on line 97 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L97 <401>

First line should be in imperative mood; try rephrasing
Raw output
./middleware/search_query.py:97:1: D401 First line should be in imperative mood; try rephrasing
Setup connection to PostgreSQL database and spacy nlp object
:param connection: A PgConnection object
"""
self.conn = connection
self.nlp = spacy.load("en_core_web_sm")

def execute_query(
self, coarse_record_types: list[str], location: str
) -> List[Dict[str, Any]]:
"""

Check warning on line 107 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L107 <205>

1 blank line required between summary line and description
Raw output
./middleware/search_query.py:107:1: D205 1 blank line required between summary line and description

Check warning on line 107 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L107 <400>

First line should end with a period
Raw output
./middleware/search_query.py:107:1: D400 First line should end with a period
Execute a SQL query to search for records
based on a search term and location.

:param search_term: The search term to query for.
:param location: The location to search within.
:return: A list of dictionaries containing the fetched records.
"""
assert isinstance(
coarse_record_types, list
), "coarse_record_types must be a list"
with self.conn.cursor() as cursor:
cursor.execute(
QUICK_SEARCH_SQL, (coarse_record_types,) + expand_params(location, 8)

Check failure on line 120 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L120 <501>

line too long (85 > 79 characters)
Raw output
./middleware/search_query.py:120:80: E501 line too long (85 > 79 characters)
)
return cursor.fetchall()

def print_query_parameters(self, search_terms: list[str], location: str) -> None:

Check failure on line 124 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L124 <501>

line too long (85 > 79 characters)
Raw output
./middleware/search_query.py:124:80: E501 line too long (85 > 79 characters)
"""

Check warning on line 125 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L125 <205>

1 blank line required between summary line and description
Raw output
./middleware/search_query.py:125:1: D205 1 blank line required between summary line and description
:param search_terms: The search terms used in the query.
:param location: The location used in the query.
:return: None.
"""
print(f"Query parameters: '%{search_terms}%', '%{location}%'")

def lemmatize(self, entries: list[str]) -> list[str]:
"""

Check warning on line 133 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L133 <205>

1 blank line required between summary line and description
Raw output
./middleware/search_query.py:133:1: D205 1 blank line required between summary line and description

Check warning on line 133 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L133 <400>

First line should end with a period
Raw output
./middleware/search_query.py:133:1: D400 First line should end with a period
Lemmatize all entries -- removing inflected forms to better enable searching

Check failure on line 134 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L134 <501>

line too long (84 > 79 characters)
Raw output
./middleware/search_query.py:134:80: E501 line too long (84 > 79 characters)
:param entries: entries to be lemmatized
:return: lemmatized results
"""
lemmatized_terms = []
for entry in entries:
doc = self.nlp(entry.strip())
lemmatized_term = " ".join([token.lemma_ for token in doc])
lemmatized_terms.append(lemmatized_term)
return lemmatized_terms

def search_query(
self, record_types: list[str], location: str
) -> List[Dict[str, Any]]:
"""
Perform a search query based on the given parameters.

:param record_types: The record types to search for.
:param location: The location to search within.
:return: A list of dictionaries,
where each dictionary represents a search result.

"""
self.print_query_parameters(record_types, location)
results = self.execute_query(record_types, location)
return results

def quick_search(
self,
coarse_record_types: list[str] = None,
location: str = "",
) -> Dict[str, Any]:
"""
Perform a quick search based on the provided parameters.

:param coarse_record_types: The types of records to search for.
:param location: The location to search for records in.
:param test: A flag indicating whether this is a test search.
:return: A dictionary containing the search results.
"""
unaltered_results = self.search_query(coarse_record_types, location)
spacy_results = self.search_query(
record_types=self.lemmatize(coarse_record_types), location=location
)

results = max(spacy_results, unaltered_results, key=len)
data_sources = {"count": len(results), "data": results}

self.log_query_results(coarse_record_types, location, data_sources)

return data_sources

def log_query_results(

Check warning on line 186 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L186 <102>

Missing docstring in public method
Raw output
./middleware/search_query.py:186:1: D102 Missing docstring in public method
self,
coarse_record_types: list[str],
location: str,
data_sources: Dict[str, Any],
) -> None:
datetime_string = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# If both coarse_record_types and location are blank, all results will have been returned

Check failure on line 193 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L193 <501>

line too long (97 > 79 characters)
Raw output
./middleware/search_query.py:193:80: E501 line too long (97 > 79 characters)
if len(coarse_record_types) > 0 or location != "":
query_results = [record["airtable_uid"] for record in data_sources["data"]]

Check failure on line 195 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L195 <501>

line too long (87 > 79 characters)
Raw output
./middleware/search_query.py:195:80: E501 line too long (87 > 79 characters)
else:
query_results = ['ALL']
with self.conn.cursor() as cursor:
cursor.execute(
INSERT_LOG_QUERY,
(
coarse_record_types,
location,
json.dumps(query_results),
data_sources["count"],
datetime_string,
),
)
self.conn.commit()

Check warning on line 210 in middleware/search_query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] middleware/search_query.py#L210 <391>

blank line at end of file
Raw output
./middleware/search_query.py:210:1: W391 blank line at end of file
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ exceptiongroup==1.1.3
Flask==2.3.2
Flask-Cors==4.0.0
Flask-RESTful==0.3.10
flask-restx==1.3.0
gotrue==1.0.3
gunicorn==21.2.0
h11==0.14.0
Expand All @@ -46,7 +47,7 @@ pycparser==2.21
pydantic==2.2.1
pydantic_core==2.6.1
PyJWT==2.8.0
pytest==6.2.5
pytest~=8.1.2
python-dateutil==2.8.2
python-dotenv==1.0.0
pytz==2023.3
Expand Down Expand Up @@ -75,4 +76,5 @@ wasabi==1.1.2
websockets==10.4
Werkzeug==3.0.1
zipp==3.16.2
pytest-mock~=3.12.0
pytest-mock~=3.12.0
lorem~=0.1.1
5 changes: 3 additions & 2 deletions resources/PsycopgResource.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from flask_restful import Resource
from flask_restx import Resource

Check warning on line 1 in resources/PsycopgResource.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/PsycopgResource.py#L1 <100>

Missing docstring in public module
Raw output
./resources/PsycopgResource.py:1:1: D100 Missing docstring in public module


class PsycopgResource(Resource):
def __init__(self, **kwargs):
def __init__(self, *args, **kwargs):
"""
Initializes the resource with a database connection.
- kwargs (dict): Keyword arguments containing 'psycopg2_connection' for database connection.
"""
super().__init__(*args, **kwargs)
self.psycopg2_connection = kwargs["psycopg2_connection"]

def get(self):
Expand Down
84 changes: 84 additions & 0 deletions resources/Search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from middleware.search_query import SearchQueryEngine

Check warning on line 1 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L1 <100>

Missing docstring in public module
Raw output
./resources/Search.py:1:1: D100 Missing docstring in public module
from middleware.security import api_required
import requests
import json
import os
from middleware.initialize_psycopg2_connection import initialize_psycopg2_connection

Check failure on line 6 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L6 <501>

line too long (84 > 79 characters)
Raw output
./resources/Search.py:6:80: E501 line too long (84 > 79 characters)
from flask import request
from typing import Dict, Any

from resources.PsycopgResource import PsycopgResource


class Search(PsycopgResource):
"""

Check warning on line 14 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L14 <205>

1 blank line required between summary line and description
Raw output
./resources/Search.py:14:1: D205 1 blank line required between summary line and description

Check warning on line 14 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L14 <400>

First line should end with a period
Raw output
./resources/Search.py:14:1: D400 First line should end with a period
Provides a resource for performing quick searches in the database for data sources

Check failure on line 15 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L15 <501>

line too long (86 > 79 characters)
Raw output
./resources/Search.py:15:80: E501 line too long (86 > 79 characters)
based on user-provided search terms and location.
"""

def __init__(self, *args, **kwargs):

Check warning on line 19 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L19 <107>

Missing docstring in __init__
Raw output
./resources/Search.py:19:1: D107 Missing docstring in __init__
super().__init__(*args, **kwargs)
self.engine = SearchQueryEngine(connection=self.psycopg2_connection)

# api_required decorator requires the request"s header to include an "Authorization" key with the value formatted as "Bearer [api_key]"

Check failure on line 23 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L23 <501>

line too long (139 > 79 characters)
Raw output
./resources/Search.py:23:80: E501 line too long (139 > 79 characters)
# A user can get an API key by signing up and logging in (see User.py)
@api_required
def get(self, coarse_record_types: str, location: str) -> Dict[str, Any]:
"""

Check warning on line 27 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L27 <205>

1 blank line required between summary line and description
Raw output
./resources/Search.py:27:1: D205 1 blank line required between summary line and description

Check warning on line 27 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L27 <400>

First line should end with a period
Raw output
./resources/Search.py:27:1: D400 First line should end with a period

Check warning on line 27 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L27 <401>

First line should be in imperative mood
Raw output
./resources/Search.py:27:1: D401 First line should be in imperative mood
Performs a quick search using the provided search terms and location. It attempts to find relevant

Check failure on line 28 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L28 <501>

line too long (106 > 79 characters)
Raw output
./resources/Search.py:28:80: E501 line too long (106 > 79 characters)
data sources in the database. If no results are found initially, it re-initializes the database

Check failure on line 29 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L29 <501>

line too long (103 > 79 characters)
Raw output
./resources/Search.py:29:80: E501 line too long (103 > 79 characters)
connection and tries again.

Parameters:
- search (str): The search term provided by the user.
- location (str): The location provided by the user.

Returns:
- A dictionary containing a message about the search results and the data found, if any.

Check failure on line 37 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L37 <501>

line too long (96 > 79 characters)
Raw output
./resources/Search.py:37:80: E501 line too long (96 > 79 characters)
"""
try:
data = request.get_json()
test = data.get("test_flag")
except:

Check failure on line 42 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L42 <722>

do not use bare 'except'
Raw output
./resources/Search.py:42:9: E722 do not use bare 'except'
test = False

if isinstance(coarse_record_types, str):
course_record_types = [coarse_record_types]
try:
data_sources = self.engine.quick_search(course_record_types, location, test)

Check failure on line 48 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L48 <501>

line too long (88 > 79 characters)
Raw output
./resources/Search.py:48:80: E501 line too long (88 > 79 characters)

if data_sources["count"] == 0:
self.psycopg2_connection = initialize_psycopg2_connection()
data_sources = self.engine.quick_search(course_record_types, location)

Check failure on line 52 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L52 <501>

line too long (86 > 79 characters)
Raw output
./resources/Search.py:52:80: E501 line too long (86 > 79 characters)

if data_sources["count"] == 0:
return {
"count": 0,
"message": "No results found. Please considering requesting a new data source.",

Check failure on line 57 in resources/Search.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] resources/Search.py#L57 <501>

line too long (100 > 79 characters)
Raw output
./resources/Search.py:57:80: E501 line too long (100 > 79 characters)
}, 404

return {
"message": "Results for search successfully retrieved",
"data": data_sources,
}

except Exception as e:
self.psycopg2_connection.rollback()
print(str(e))
webhook_url = os.getenv("WEBHOOK_URL")
user_message = "There was an error during the search operation"
message = {
"content": user_message
+ ": "
+ str(e)
+ "\n"
+ f"Record Types: {course_record_types}\n"
+ f"Location: {location}"
}
requests.post(
webhook_url,
data=json.dumps(message),
headers={"Content-Type": "application/json"},
)

return {"count": 0, "message": user_message}, 500
Loading
Loading