diff --git a/backend/app/services/scrapper/schedule_scrapper.py b/backend/app/services/scrapper/schedule_scrapper.py index 84b419b..e945fd1 100644 --- a/backend/app/services/scrapper/schedule_scrapper.py +++ b/backend/app/services/scrapper/schedule_scrapper.py @@ -9,7 +9,7 @@ from models.major import Major from models.period import Period from models.user import User -from scraper.main import scrape_courses_with_credentials, AUTH_URL +from scraper.main import scrape_courses_with_credentials, AUTH_URL, generate_desc_prerequisite class ScheduleScrapperServices: @@ -47,6 +47,7 @@ def callback(ch, method, properties, body): period.last_update_at = now period.save() app.logger.info(f"Done scrapping kd_org: {method.routing_key}; period: {active_period}; at: {now} UTC") + generate_desc_prerequisite(period, username, password) channel.basic_consume( queue=queue_name, on_message_callback=callback, auto_ack=True diff --git a/backend/scraper/main.py b/backend/scraper/main.py index 4f4abdb..d1fc66d 100644 --- a/backend/scraper/main.py +++ b/backend/scraper/main.py @@ -33,7 +33,6 @@ def scrape_courses_with_credentials(period, username, password): r = req.get(CHANGEROLE_URL) r = req.get(DETAIL_SCHEDULE_URL.format(period=period)) courses = create_courses(r.text, is_detail=True) - generate_desc_prerequisite(courses, req) return courses @@ -95,12 +94,14 @@ def get_period_and_kd_org(html): return None, None -def generate_desc_prerequisite(courses, req): - print("=== generating desc and prereq ===") - now = datetime.datetime.now() - for course in courses: - html = req.get(DETAIL_COURSES_URL.format(course=course.course_code, curr=course.curriculum)).text - soup = BeautifulSoup(html, 'html.parser') +def generate_desc_prerequisite(period, username, password): + req = requests.Session() + r = req.post(AUTH_URL, data={'u': username, + 'p': password}, verify=False) + r = req.get(CHANGEROLE_URL) + for course in period.courses: + r = req.get(DETAIL_COURSES_URL.format(course=course.course_code, curr=course.curriculum)).text + soup = BeautifulSoup(r, 'html.parser') for textarea in soup.findAll('textarea'): if textarea.contents: textarea_content = textarea.contents[0] @@ -118,9 +119,7 @@ def generate_desc_prerequisite(courses, req): prerequisites += p.group().strip() + "," course.description = desc course.prerequisite = prerequisites[:-1] - end = datetime.datetime.now() - print("time elapsed ms :: "+ str((end-now).microseconds)) - print("time elapsed s :: "+ str((end-now).seconds)) + period.save() def create_courses(html, is_detail=False): soup = BeautifulSoup(html, 'html.parser')