From 5213701cc8e0c67213df52ee116d7e8f61a70f63 Mon Sep 17 00:00:00 2001 From: maxachis Date: Sat, 15 Jun 2024 18:14:08 -0400 Subject: [PATCH] Update search cache retrieval and update methods to use search cache endpoint rather than direct calls to database. --- agency_homepage_searcher/homepage_searcher.py | 47 +++++++++++++++---- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/agency_homepage_searcher/homepage_searcher.py b/agency_homepage_searcher/homepage_searcher.py index b2857a9..9b1e298 100644 --- a/agency_homepage_searcher/homepage_searcher.py +++ b/agency_homepage_searcher/homepage_searcher.py @@ -1,10 +1,16 @@ import csv +import json +import os import tempfile +from http import HTTPStatus + +import requests from dataclasses import dataclass from pathlib import Path from typing import List, Union from enum import Enum + from agency_homepage_searcher.agency_info import AgencyInfo from agency_homepage_searcher.google_searcher import GoogleSearcher, QuotaExceededError from util.huggingface_api_manager import HuggingFaceAPIManager @@ -12,7 +18,9 @@ from util.miscellaneous_functions import get_filename_friendly_timestamp MAX_SEARCHES = 100 # Maximum searches to perform at a time when searching for results - +BASE_URL = "https://data-sources.pdap.io/api/" +SEARCH_CACHE_ENDPOINT = "homepage-search-cache" +FULL_CACHE_ENDPOINT = f"{BASE_URL}{SEARCH_CACHE_ENDPOINT}" class SearchResultEnum(Enum): """ @@ -94,6 +102,17 @@ def __init__( self.database_manager = database_manager self.huggingface_api_manager = huggingface_api_manager self.us_state_reference = USStateReference(database_manager) + self.pdap_api_key = os.getenv("PDAP_API_KEY") + + def get_search_cache_header(self) -> dict: + """ + Returns a header for the search cache table. + Returns: dict + """ + return { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.pdap_api_key}" + } def create_agency_info(self, agency_row: list) -> AgencyInfo: """ @@ -123,8 +142,16 @@ def get_agencies_without_homepage_urls(self) -> list[AgencyInfo]: Retrieves a list of agencies without homepage URLs. Returns: list[AgencyInfo] """ - agency_rows = self.database_manager.execute(SQL_GET_AGENCIES_WITHOUT_HOMEPAGE_URLS) - return [self.create_agency_info(agency_row) for agency_row in agency_rows] + # TODO: Implement endpoint + response = requests.get( + url=FULL_CACHE_ENDPOINT, + headers=self.get_search_cache_header() + ) + if response.status_code != HTTPStatus.OK: + raise Exception(f"Failed to get search cache. Status code: {response.status_code}") + + return [self.create_agency_info(row) for row in response.json()] + def search(self, agency_info: AgencyInfo) -> Union[SearchResults, None]: """ @@ -260,11 +287,15 @@ def update_search_cache(self, search_results: list[SearchResults]) -> None: Args: search_results: """ - parameters = [] - for search_result in search_results: - parameter = (search_result.agency_id, search_result.search_result_status.value) - parameters.append(parameter) - self.database_manager.executemany(SQL_UPDATE_CACHE, parameters) + + response = requests.post( + url=FULL_CACHE_ENDPOINT, + data=json.dumps((search_result.search_result_status.value for search_result in search_results)), + headers=self.get_search_cache_header() + ) + + if response.status_code != HTTPStatus.OK: + raise Exception(f"Failed to update search cache. Status code: {response.status_code}") def _try_search_agency_info(self, agency_info: AgencyInfo) -> Union[SearchResults, List]: """