diff --git a/src/lambda/syllabus-scraper/utils.py b/src/lambda/syllabus-scraper/utils.py index 344c936b9..08c9be744 100644 --- a/src/lambda/syllabus-scraper/utils.py +++ b/src/lambda/syllabus-scraper/utils.py @@ -82,17 +82,18 @@ def get_eval_criteria(parsed): # Case 2: 2 or more rows for r in rows[1:]: elem = r.getchildren() - kind = elem[0].text + kind = elem[0].text_context() percent = elem[1].text.strip()[:-1] or -1 try: percent = int(percent) except ValueError: logging.warning(f"Unable to parse percent: {percent}") - criteria = to_half_width(elem[2].text) + criteria = to_half_width(elem[2].text_context()) + cleaned_criteria = remove_format_chars(criteria) evals.append({ "t": to_enum(eval_type_map)(kind), "p": percent, - "c": criteria + "c": cleaned_criteria }) return evals @@ -350,3 +351,8 @@ def get_expire_date(): next_dt = cron_schedule[idx + 1].split('-') next_time = now.replace(month=int(next_dt[0]), day=int(next_dt[1])) return next_time + + +def remove_format_chars(line): + cleaned_line = re.sub(r'[\n\r\t]', ' ', line) + return cleaned_line