diff --git a/.github/actions/install-deps/action.yml b/.github/actions/install-deps/action.yml index be65095c1..aeec3f2a8 100644 --- a/.github/actions/install-deps/action.yml +++ b/.github/actions/install-deps/action.yml @@ -3,7 +3,7 @@ description: "Workflow for installing dependencies" runs: using: "composite" steps: - - uses: pnpm/action-setup@v2.2.2 + - uses: pnpm/action-setup@v2.2.4 with: version: latest - name: Use Node.js diff --git a/lib/configs/event/schedule.ts b/lib/configs/event/schedule.ts index 535fcda51..d71b76f2d 100644 --- a/lib/configs/event/schedule.ts +++ b/lib/configs/event/schedule.ts @@ -1,26 +1,81 @@ import * as events from 'aws-cdk-lib/aws-events'; export const syllabusSchedule: { [name: string]: events.Schedule } = { - 'regular': - events.Schedule.cron({ minute: '0', hour: '16', day: '1', month: '*', year: '*' }), - 'fall-pre': - events.Schedule.cron({ minute: '0', hour: '16', day: '19,21,23', month: 'JUL,AUG', year: '*' }), - 'fall-reg1': - events.Schedule.cron({ minute: '0', hour: '16', day: '4,7,10,13,15,17', month: 'SEP', year: '*' }), - 'fall-reg2': - events.Schedule.cron({ minute: '0', hour: '16', day: '20,23,25', month: 'SEP', year: '*' }), - 'fall-reg3': - events.Schedule.cron({ minute: '0', hour: '16', day: '28,30', month: 'SEP', year: '*' }), - 'fall-reg4': - events.Schedule.cron({ minute: '0', hour: '16', day: '3,5,8', month: 'OCT', year: '*' }), - 'spring-pre': - events.Schedule.cron({ minute: '0', hour: '16', day: '14,24', month: 'FEB', year: '*' }), - 'spring-reg1': - events.Schedule.cron({ minute: '0', hour: '16', day: '4,7,10,13,16,18,21,24,27', month: 'MAR', year: '*' }), - 'spring-reg2': - events.Schedule.cron({ minute: '0', hour: '16', day: '3,5,8', month: 'APR', year: '*' }), - 'spring-reg3': - events.Schedule.cron({ minute: '0', hour: '16', day: '16,20,24,26,28', month: 'APR', year: '*' }), - 'spring-reg4': - events.Schedule.cron({ minute: '0', hour: '16', day: '9,12,14,16', month: 'MAY', year: '*' }), + 'regular': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '1,12', + month: '*', + year: '*', + }), + 'fall-pre': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '19,21,23', + month: 'JUL,AUG', + year: '*', + }), + 'fall-reg1': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '4,7,10,13,15,17', + month: 'SEP', + year: '*', + }), + 'fall-reg2': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '20,23,25', + month: 'SEP', + year: '*', + }), + 'fall-reg3': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '28,30', + month: 'SEP', + year: '*', + }), + 'fall-reg4': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '3,5,8', + month: 'OCT', + year: '*', + }), + 'spring-pre': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '14,24', + month: 'FEB', + year: '*', + }), + 'spring-reg1': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '4,7,10,13,16,18,21,24,27', + month: 'MAR', + year: '*', + }), + 'spring-reg2': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '3,5,8', + month: 'APR', + year: '*', + }), + 'spring-reg3': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '16,20,24,26,28', + month: 'APR', + year: '*', + }), + 'spring-reg4': events.Schedule.cron({ + minute: '0', + hour: '16', + day: '9,12,14,16', + month: 'MAY', + year: '*', + }), }; diff --git a/lib/constructs/common/lambda-functions.ts b/lib/constructs/common/lambda-functions.ts index 6190515b6..403eebd93 100644 --- a/lib/constructs/common/lambda-functions.ts +++ b/lib/constructs/common/lambda-functions.ts @@ -584,7 +584,7 @@ export class ForumThreadFunctions extends Construct { functionName: 'get-single-thread', logRetention: logs.RetentionDays.ONE_MONTH, memorySize: 128, - role: dynamoDBReadRole, + role: dynamoDBPutRole, runtime: lambda.Runtime.PYTHON_3_9, timeout: Duration.seconds(3), environment: props.envVars, diff --git a/package.json b/package.json index 7d832ef69..374c21785 100644 --- a/package.json +++ b/package.json @@ -22,11 +22,11 @@ "@aws-cdk/assert": "2.68.0", "@commitlint/cli": "17.6.1", "@commitlint/config-conventional": "17.6.1", - "@types/jest": "29.5.0", + "@types/jest": "29.5.1", "@types/node": "18.15.12", "@types/pluralize": "0.0.29", - "@typescript-eslint/eslint-plugin": "5.55.0", - "@typescript-eslint/parser": "5.55.0", + "@typescript-eslint/eslint-plugin": "5.59.6", + "@typescript-eslint/parser": "5.59.6", "aws-cdk-lib": "2.74.0", "constructs": "10.1.312", "esbuild": "^0.17.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8973de003..73f7d14f6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -43,8 +43,8 @@ devDependencies: specifier: 17.6.1 version: 17.6.1 '@types/jest': - specifier: 29.5.0 - version: 29.5.0 + specifier: 29.5.1 + version: 29.5.1 '@types/node': specifier: 18.15.12 version: 18.15.12 @@ -52,11 +52,11 @@ devDependencies: specifier: 0.0.29 version: 0.0.29 '@typescript-eslint/eslint-plugin': - specifier: 5.55.0 - version: 5.55.0(@typescript-eslint/parser@5.55.0)(eslint@8.38.0)(typescript@4.9.5) + specifier: 5.59.6 + version: 5.59.6(@typescript-eslint/parser@5.59.6)(eslint@8.38.0)(typescript@4.9.5) '@typescript-eslint/parser': - specifier: 5.55.0 - version: 5.55.0(eslint@8.38.0)(typescript@4.9.5) + specifier: 5.59.6 + version: 5.59.6(eslint@8.38.0)(typescript@4.9.5) aws-cdk-lib: specifier: 2.74.0 version: 2.74.0(constructs@10.1.312) @@ -77,7 +77,7 @@ devDependencies: version: 3.5.2(eslint-plugin-import@2.26.0)(eslint@8.38.0) eslint-plugin-import: specifier: 2.26.0 - version: 2.26.0(@typescript-eslint/parser@5.55.0)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0) + version: 2.26.0(@typescript-eslint/parser@5.59.6)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0) husky: specifier: 8.0.3 version: 8.0.3 @@ -1387,8 +1387,8 @@ packages: dependencies: '@types/istanbul-lib-report': 3.0.0 - /@types/jest@29.5.0: - resolution: {integrity: sha512-3Emr5VOl/aoBwnWcH/EFQvlSAmjV+XtV9GGu5mwdYew5vhQh0IUZx/60x0TzHDu09Bi7HMx10t/namdJw5QIcg==} + /@types/jest@29.5.1: + resolution: {integrity: sha512-tEuVcHrpaixS36w7hpsfLBLpjtMRJUE09/MHXn923LOVojDwyC14cWcfc0rDs0VEfUyYmt/+iX1kxxp+gZMcaQ==} dependencies: expect: 29.5.0 pretty-format: 29.5.0 @@ -1437,8 +1437,8 @@ packages: dependencies: '@types/yargs-parser': 21.0.0 - /@typescript-eslint/eslint-plugin@5.55.0(@typescript-eslint/parser@5.55.0)(eslint@8.38.0)(typescript@4.9.5): - resolution: {integrity: sha512-IZGc50rtbjk+xp5YQoJvmMPmJEYoC53SiKPXyqWfv15XoD2Y5Kju6zN0DwlmaGJp1Iw33JsWJcQ7nw0lGCGjVg==} + /@typescript-eslint/eslint-plugin@5.59.6(@typescript-eslint/parser@5.59.6)(eslint@8.38.0)(typescript@4.9.5): + resolution: {integrity: sha512-sXtOgJNEuRU5RLwPUb1jxtToZbgvq3M6FPpY4QENxoOggK+UpTxUBpj6tD8+Qh2g46Pi9We87E+eHnUw8YcGsw==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: '@typescript-eslint/parser': ^5.0.0 @@ -1449,10 +1449,10 @@ packages: optional: true dependencies: '@eslint-community/regexpp': 4.5.0 - '@typescript-eslint/parser': 5.55.0(eslint@8.38.0)(typescript@4.9.5) - '@typescript-eslint/scope-manager': 5.55.0 - '@typescript-eslint/type-utils': 5.55.0(eslint@8.38.0)(typescript@4.9.5) - '@typescript-eslint/utils': 5.55.0(eslint@8.38.0)(typescript@4.9.5) + '@typescript-eslint/parser': 5.59.6(eslint@8.38.0)(typescript@4.9.5) + '@typescript-eslint/scope-manager': 5.59.6 + '@typescript-eslint/type-utils': 5.59.6(eslint@8.38.0)(typescript@4.9.5) + '@typescript-eslint/utils': 5.59.6(eslint@8.38.0)(typescript@4.9.5) debug: 4.3.4 eslint: 8.38.0 grapheme-splitter: 1.0.4 @@ -1465,8 +1465,8 @@ packages: - supports-color dev: true - /@typescript-eslint/parser@5.55.0(eslint@8.38.0)(typescript@4.9.5): - resolution: {integrity: sha512-ppvmeF7hvdhUUZWSd2EEWfzcFkjJzgNQzVST22nzg958CR+sphy8A6K7LXQZd6V75m1VKjp+J4g/PCEfSCmzhw==} + /@typescript-eslint/parser@5.59.6(eslint@8.38.0)(typescript@4.9.5): + resolution: {integrity: sha512-7pCa6al03Pv1yf/dUg/s1pXz/yGMUBAw5EeWqNTFiSueKvRNonze3hma3lhdsOrQcaOXhbk5gKu2Fludiho9VA==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: eslint: ^6.0.0 || ^7.0.0 || ^8.0.0 @@ -1475,9 +1475,9 @@ packages: typescript: optional: true dependencies: - '@typescript-eslint/scope-manager': 5.55.0 - '@typescript-eslint/types': 5.55.0 - '@typescript-eslint/typescript-estree': 5.55.0(typescript@4.9.5) + '@typescript-eslint/scope-manager': 5.59.6 + '@typescript-eslint/types': 5.59.6 + '@typescript-eslint/typescript-estree': 5.59.6(typescript@4.9.5) debug: 4.3.4 eslint: 8.38.0 typescript: 4.9.5 @@ -1485,16 +1485,16 @@ packages: - supports-color dev: true - /@typescript-eslint/scope-manager@5.55.0: - resolution: {integrity: sha512-OK+cIO1ZGhJYNCL//a3ROpsd83psf4dUJ4j7pdNVzd5DmIk+ffkuUIX2vcZQbEW/IR41DYsfJTB19tpCboxQuw==} + /@typescript-eslint/scope-manager@5.59.6: + resolution: {integrity: sha512-gLbY3Le9Dxcb8KdpF0+SJr6EQ+hFGYFl6tVY8VxLPFDfUZC7BHFw+Vq7bM5lE9DwWPfx4vMWWTLGXgpc0mAYyQ==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} dependencies: - '@typescript-eslint/types': 5.55.0 - '@typescript-eslint/visitor-keys': 5.55.0 + '@typescript-eslint/types': 5.59.6 + '@typescript-eslint/visitor-keys': 5.59.6 dev: true - /@typescript-eslint/type-utils@5.55.0(eslint@8.38.0)(typescript@4.9.5): - resolution: {integrity: sha512-ObqxBgHIXj8rBNm0yh8oORFrICcJuZPZTqtAFh0oZQyr5DnAHZWfyw54RwpEEH+fD8suZaI0YxvWu5tYE/WswA==} + /@typescript-eslint/type-utils@5.59.6(eslint@8.38.0)(typescript@4.9.5): + resolution: {integrity: sha512-A4tms2Mp5yNvLDlySF+kAThV9VTBPCvGf0Rp8nl/eoDX9Okun8byTKoj3fJ52IJitjWOk0fKPNQhXEB++eNozQ==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: eslint: '*' @@ -1503,8 +1503,8 @@ packages: typescript: optional: true dependencies: - '@typescript-eslint/typescript-estree': 5.55.0(typescript@4.9.5) - '@typescript-eslint/utils': 5.55.0(eslint@8.38.0)(typescript@4.9.5) + '@typescript-eslint/typescript-estree': 5.59.6(typescript@4.9.5) + '@typescript-eslint/utils': 5.59.6(eslint@8.38.0)(typescript@4.9.5) debug: 4.3.4 eslint: 8.38.0 tsutils: 3.21.0(typescript@4.9.5) @@ -1513,13 +1513,13 @@ packages: - supports-color dev: true - /@typescript-eslint/types@5.55.0: - resolution: {integrity: sha512-M4iRh4AG1ChrOL6Y+mETEKGeDnT7Sparn6fhZ5LtVJF1909D5O4uqK+C5NPbLmpfZ0XIIxCdwzKiijpZUOvOug==} + /@typescript-eslint/types@5.59.6: + resolution: {integrity: sha512-tH5lBXZI7T2MOUgOWFdVNUILsI02shyQvfzG9EJkoONWugCG77NDDa1EeDGw7oJ5IvsTAAGVV8I3Tk2PNu9QfA==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} dev: true - /@typescript-eslint/typescript-estree@5.55.0(typescript@4.9.5): - resolution: {integrity: sha512-I7X4A9ovA8gdpWMpr7b1BN9eEbvlEtWhQvpxp/yogt48fy9Lj3iE3ild/1H3jKBBIYj5YYJmS2+9ystVhC7eaQ==} + /@typescript-eslint/typescript-estree@5.59.6(typescript@4.9.5): + resolution: {integrity: sha512-vW6JP3lMAs/Tq4KjdI/RiHaaJSO7IUsbkz17it/Rl9Q+WkQ77EOuOnlbaU8kKfVIOJxMhnRiBG+olE7f3M16DA==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: typescript: '*' @@ -1527,8 +1527,8 @@ packages: typescript: optional: true dependencies: - '@typescript-eslint/types': 5.55.0 - '@typescript-eslint/visitor-keys': 5.55.0 + '@typescript-eslint/types': 5.59.6 + '@typescript-eslint/visitor-keys': 5.59.6 debug: 4.3.4 globby: 11.1.0 is-glob: 4.0.3 @@ -1539,8 +1539,8 @@ packages: - supports-color dev: true - /@typescript-eslint/utils@5.55.0(eslint@8.38.0)(typescript@4.9.5): - resolution: {integrity: sha512-FkW+i2pQKcpDC3AY6DU54yl8Lfl14FVGYDgBTyGKB75cCwV3KpkpTMFi9d9j2WAJ4271LR2HeC5SEWF/CZmmfw==} + /@typescript-eslint/utils@5.59.6(eslint@8.38.0)(typescript@4.9.5): + resolution: {integrity: sha512-vzaaD6EXbTS29cVH0JjXBdzMt6VBlv+hE31XktDRMX1j3462wZCJa7VzO2AxXEXcIl8GQqZPcOPuW/Z1tZVogg==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: eslint: ^6.0.0 || ^7.0.0 || ^8.0.0 @@ -1548,9 +1548,9 @@ packages: '@eslint-community/eslint-utils': 4.4.0(eslint@8.38.0) '@types/json-schema': 7.0.11 '@types/semver': 7.3.13 - '@typescript-eslint/scope-manager': 5.55.0 - '@typescript-eslint/types': 5.55.0 - '@typescript-eslint/typescript-estree': 5.55.0(typescript@4.9.5) + '@typescript-eslint/scope-manager': 5.59.6 + '@typescript-eslint/types': 5.59.6 + '@typescript-eslint/typescript-estree': 5.59.6(typescript@4.9.5) eslint: 8.38.0 eslint-scope: 5.1.1 semver: 7.3.8 @@ -1559,11 +1559,11 @@ packages: - typescript dev: true - /@typescript-eslint/visitor-keys@5.55.0: - resolution: {integrity: sha512-q2dlHHwWgirKh1D3acnuApXG+VNXpEY5/AwRxDVuEQpxWaB0jCDe0jFMVMALJ3ebSfuOVE8/rMS+9ZOYGg1GWw==} + /@typescript-eslint/visitor-keys@5.59.6: + resolution: {integrity: sha512-zEfbFLzB9ETcEJ4HZEEsCR9HHeNku5/Qw1jSS5McYJv5BR+ftYXwFFAH5Al+xkGaZEqowMwl7uoJjQb1YSPF8Q==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} dependencies: - '@typescript-eslint/types': 5.55.0 + '@typescript-eslint/types': 5.59.6 eslint-visitor-keys: 3.4.0 dev: true @@ -2540,7 +2540,7 @@ packages: debug: 4.3.4 enhanced-resolve: 5.12.0 eslint: 8.38.0 - eslint-plugin-import: 2.26.0(@typescript-eslint/parser@5.55.0)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0) + eslint-plugin-import: 2.26.0(@typescript-eslint/parser@5.59.6)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0) get-tsconfig: 4.5.0 globby: 13.1.3 is-core-module: 2.11.0 @@ -2550,7 +2550,7 @@ packages: - supports-color dev: true - /eslint-module-utils@2.7.4(@typescript-eslint/parser@5.55.0)(eslint-import-resolver-node@0.3.6)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0): + /eslint-module-utils@2.7.4(@typescript-eslint/parser@5.59.6)(eslint-import-resolver-node@0.3.6)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0): resolution: {integrity: sha512-j4GT+rqzCoRKHwURX7pddtIPGySnX9Si/cgMI5ztrcqOPtk5dDEeZ34CQVPphnqkJytlc97Vuk05Um2mJ3gEQA==} engines: {node: '>=4'} peerDependencies: @@ -2571,7 +2571,7 @@ packages: eslint-import-resolver-webpack: optional: true dependencies: - '@typescript-eslint/parser': 5.55.0(eslint@8.38.0)(typescript@4.9.5) + '@typescript-eslint/parser': 5.59.6(eslint@8.38.0)(typescript@4.9.5) debug: 3.2.7 eslint: 8.38.0 eslint-import-resolver-node: 0.3.6 @@ -2580,7 +2580,7 @@ packages: - supports-color dev: true - /eslint-plugin-import@2.26.0(@typescript-eslint/parser@5.55.0)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0): + /eslint-plugin-import@2.26.0(@typescript-eslint/parser@5.59.6)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0): resolution: {integrity: sha512-hYfi3FXaM8WPLf4S1cikh/r4IxnO6zrhZbEGz2b660EJRbuxgpDS5gkCuYgGWg2xxh2rBuIr4Pvhve/7c31koA==} engines: {node: '>=4'} peerDependencies: @@ -2590,14 +2590,14 @@ packages: '@typescript-eslint/parser': optional: true dependencies: - '@typescript-eslint/parser': 5.55.0(eslint@8.38.0)(typescript@4.9.5) + '@typescript-eslint/parser': 5.59.6(eslint@8.38.0)(typescript@4.9.5) array-includes: 3.1.6 array.prototype.flat: 1.3.1 debug: 2.6.9 doctrine: 2.1.0 eslint: 8.38.0 eslint-import-resolver-node: 0.3.6 - eslint-module-utils: 2.7.4(@typescript-eslint/parser@5.55.0)(eslint-import-resolver-node@0.3.6)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0) + eslint-module-utils: 2.7.4(@typescript-eslint/parser@5.59.6)(eslint-import-resolver-node@0.3.6)(eslint-import-resolver-typescript@3.5.2)(eslint@8.38.0) has: 1.0.3 is-core-module: 2.11.0 is-glob: 4.0.3 diff --git a/src/lambda/syllabus-scraper/const.py b/src/lambda/syllabus-scraper/const.py index 87cf93e4c..d4dbd31be 100644 --- a/src/lambda/syllabus-scraper/const.py +++ b/src/lambda/syllabus-scraper/const.py @@ -237,6 +237,6 @@ cron_schedule = ["01-01", "02-01", "02-14", "02-24", "03-01", "03-04", "03-07", "03-10", "03-16", "03-18", "03-21", "03-24", "03-27", "04-01", "04-03", "04-05", "04-08", "04-16", "04-20", "04-24", "04-26", "04-28", - "05-01", "05-09", "05-12", "05-14", "05-16", "06-01", "07-01", "07-19", "07-21", "07-23", "08-01", + "05-01", "05-09", "05-12", "05-14", "05-16", "06-01", "06-12", "07-01", "07-19", "07-21", "07-23", "08-01", "08-19", "08-21", "08-23", "09-01", "09-04", "09-07", "09-10", "09-13", "09-15", "09-17", "09-20", "09-23", "09-25", "09-28", "09-30", "10-01", "10-03", "10-05", "10-08", "11-01", "12-01"] diff --git a/src/lambda/syllabus-scraper/crawler.py b/src/lambda/syllabus-scraper/crawler.py index 30c3411f0..94f440467 100644 --- a/src/lambda/syllabus-scraper/crawler.py +++ b/src/lambda/syllabus-scraper/crawler.py @@ -28,7 +28,8 @@ def execute(self): :return: list of courses """ pages = self.get_max_page() - course_pages = run_concurrently(self.scrape_catalog, range(pages), self.worker) + course_pages = run_concurrently( + self.scrape_catalog, range(pages), self.worker) course_ids = (course_id for page in course_pages for course_id in page) results = run_concurrently(self.scrape_course, course_ids, self.worker) return results @@ -84,10 +85,14 @@ def scrape_course(self, course_id): "n": 'array', # eval "o": 'string', # code "p": 'string', # subtitle + "q": 'string', #category + "r": 'string', #modality } """ - req_en = requests.Request(url=build_url(lang='en', course_id=course_id), headers=header) - req_jp = requests.Request(url=build_url(lang='jp', course_id=course_id), headers=header) + req_en = requests.Request(url=build_url( + lang='en', course_id=course_id), headers=header) + req_jp = requests.Request(url=build_url( + lang='jp', course_id=course_id), headers=header) parsed_en = html.fromstring(requests.urlopen(req_en).read()) parsed_jp = html.fromstring(requests.urlopen(req_jp).read()) info_en = parsed_en.xpath(query["info_table"])[0] diff --git a/src/lambda/syllabus-scraper/utils.py b/src/lambda/syllabus-scraper/utils.py index fc49930a8..e58d3a6ac 100644 --- a/src/lambda/syllabus-scraper/utils.py +++ b/src/lambda/syllabus-scraper/utils.py @@ -61,6 +61,11 @@ def to_half_width(s): return unicodedata.normalize('NFKC', s) +def remove_format_chars(line): + cleaned_line = re.sub(r'[\n\r\t]', ' ', line) + return cleaned_line + + def get_eval_criteria(parsed): """ Get the evaluation criteria from course detail page @@ -82,17 +87,18 @@ def get_eval_criteria(parsed): # Case 2: 2 or more rows for r in rows[1:]: elem = r.getchildren() - kind = elem[0].text + kind = elem[0].text_content() percent = elem[1].text.strip()[:-1] or -1 try: percent = int(percent) except ValueError: logging.warning(f"Unable to parse percent: {percent}") - criteria = to_half_width(elem[2].text) + criteria = to_half_width(elem[2].text_content()) + cleaned_criteria = remove_format_chars(criteria) evals.append({ "t": to_enum(eval_type_map)(kind), "p": percent, - "c": criteria + "c": cleaned_criteria }) return evals @@ -143,12 +149,27 @@ def merge_period_location(periods, locations): for p in periods: p["l"] = locations[0] return periods - # TODO find other cases # Case 2: More no. of periods than no. of locations zipped = list(itertools.zip_longest(periods, locations)) for (p, loc) in zipped: - p["l"] = loc + if p is None: + logging.error(f"Unexpected None in periods. loc={loc}") + continue + + if loc is not None: + p["l"] = loc + else: + logging.warning( + f"Missing location for period {p}. Assigning default value.") + p["l"] = "undecided" + occurrences.append(p) + + # Case 3: Logging error for unusual scenarios + if not occurrences: + logging.error( + f"merge_period_location resulted in no occurrences for input periods={periods}, locations={locations}") + return occurrences @@ -191,15 +212,15 @@ def parse_location(loc): rooms = [] locations = loc.split('/') for l in locations: - match = re.search(r'0(\d):(.*)', l) - count, classroom = int(match.group(1)) - 1, match.group(2) - classroom = rename_location(classroom) - # Sub-case: two location records for same period - if count >= len(rooms): - rooms.append(classroom) - else: - rooms.__setitem__(count, rooms[count] + "/" + classroom) - return rooms + matches = re.findall(r'0(\d):(.*)', l) + for match in matches: + count, classroom = int(match[0]) - 1, match[1] + classroom = rename_location(classroom) + if count >= len(rooms): + rooms.append([classroom]) + else: + rooms[count].append(classroom) + return [room for sublist in rooms for room in sublist] def parse_lang(lang): @@ -243,7 +264,8 @@ def parse_period(schedule): return [{"d": -1, "p": -1}] if occ == "othersOn demand": return [{"d": -1, "p": 0}] - occ_matches = re.finditer(r'(Mon|Tues|Wed|Thur|Fri|Sat|Sun)\.(\d-\d|\d|On demand)', occ) + occ_matches = re.finditer( + r'(Mon|Tues|Wed|Thur|Fri|Sat|Sun)\.(\d-\d|\d|On demand)', occ) occurrences = [] for match in occ_matches: day, period = match.group(1), match.group(2) @@ -300,8 +322,10 @@ def upload_to_s3(syllabus, school): 'RequestCharged': 'requester' } """ - s3 = boto3.resource('s3', region_name="ap-northeast-1", verify=False, config=Config(signature_version='s3v4')) - syllabus_object = s3.Object(os.getenv('BUCKET_NAME'), os.getenv('OBJECT_PATH') + school + '.json') + s3 = boto3.resource('s3', region_name="ap-northeast-1", + verify=False, config=Config(signature_version='s3v4')) + syllabus_object = s3.Object( + os.getenv('BUCKET_NAME'), os.getenv('OBJECT_PATH') + school + '.json') body = bytes(json.dumps(list(syllabus)).encode('UTF-8')) resp = syllabus_object.put( ACL='private',