diff --git a/backend/app/app/source/NIPS.py b/backend/app/app/source/NIPS.py index d69f40c..3a136b9 100644 --- a/backend/app/app/source/NIPS.py +++ b/backend/app/app/source/NIPS.py @@ -4,12 +4,11 @@ class Nips(PaperRequestsTask): - def __init__(self): - url: str = "https://nips.cc/Conferences/2023/Schedule?type=Poster" - super().__init__(url) + url: str = "https://nips.cc/Conferences/2023/Schedule?type=Poster" + name: str = "Nips" @staticmethod - def get_urls(response: HtmlResponse) -> list[str]: + def parse_urls(response: HtmlResponse) -> list[str]: poster_ids = response.css(".maincard::attr(id)").getall() urls = [ f"https://nips.cc/Conferences/2023/Schedule?showEvent={poster_id.replace('maincard_', '')}" diff --git a/backend/app/app/source/__init__.py b/backend/app/app/source/__init__.py new file mode 100644 index 0000000..80b3ed0 --- /dev/null +++ b/backend/app/app/source/__init__.py @@ -0,0 +1,4 @@ +from app.source.base import PaperRequestsTask +from app.source.NIPS import Nips + +__all__ = ["PaperRequestsTask", "Nips"] diff --git a/backend/app/app/source/base.py b/backend/app/app/source/base.py index 0cb91f3..5469519 100644 --- a/backend/app/app/source/base.py +++ b/backend/app/app/source/base.py @@ -1,5 +1,4 @@ import logging -from abc import abstractmethod import requests from celery import Task @@ -7,23 +6,27 @@ class PaperRequestsTask(Task): - def __init__(self, url): - self.url = url + url: str - @abstractmethod @staticmethod - def get_urls(response: HtmlResponse) -> list[str]: + def parse_urls(response: HtmlResponse) -> list[str]: # you should return list of absolute urls raise NotImplementedError + @classmethod + def get_urls(cls) -> list[str]: + response = cls.requestx(cls.url) + if response is None: + return [] + return cls.parse_urls(response) + @staticmethod - @abstractmethod def parse(response: HtmlResponse) -> dict[str, str]: # you should return dict with fields: raise NotImplementedError @staticmethod - def request(url: str) -> HtmlResponse | None: + def requestx(url: str) -> HtmlResponse | None: try: response = requests.get(url) response.raise_for_status() @@ -35,7 +38,7 @@ def request(url: str) -> HtmlResponse | None: def run(self, urls: list[str]): results = [] for url in urls: - response = PaperRequestsTask.request(url) + response = PaperRequestsTask.requestx(url) if response is None: continue results.append(self.parse(response)) diff --git a/backend/app/app/worker.py b/backend/app/app/worker.py index 345b92e..ffcf915 100644 --- a/backend/app/app/worker.py +++ b/backend/app/app/worker.py @@ -1,13 +1,18 @@ -import logging - from app.core.celery_app import celery_app +from app.core.config import settings +from app.source import Nips + +celery_app.register_task(Nips()) -def run_paper_requests_task(source: str): - pass +def batch(iterable, n=1): + l = len(iterable) + for ndx in range(0, l, n): + yield iterable[ndx : min(ndx + n, l)] @celery_app.task(acks_late=True) def test_celery_worker(word: str) -> None: - logging.info("Celery worker is working") - logging.info(f"DONE: {word}") + urls = Nips.get_urls() + for url in batch(urls, settings.REQUESTS_BATCH_SIZE): + celery_app.send_task("Nips", kwargs={"urls": url})