Skip to content

Commit

Permalink
Add error filter & add program and session codes params to routes (#17)
Browse files Browse the repository at this point in the history
* add exception filter #7

* add program and session params to routes #11

* add error when empty courses

* change name of parameter

* change planification param

* remove unused variables

* feat: Add initial program data for scraping

* update programs code
  • Loading branch information
mhd-hi authored May 19, 2024
1 parent e5a8a6d commit 9eed67e
Show file tree
Hide file tree
Showing 10 changed files with 181 additions and 55 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"axios": "^1.6.8",
"class-transformer": "^0.5.1",
"class-validator": "^0.14.0",
"express": "^4.19.2",
"pdf2json": "^3.0.5",
"reflect-metadata": "^0.1.13",
"rxjs": "^7.8.1",
Expand Down
87 changes: 87 additions & 0 deletions prisma/seeds/programs/programs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
[
{
"code": "7625",
"type": "premier-cycle",
"url": "baccalaureat-genie-construction"
},
{
"code": "7694",
"type": "premier-cycle",
"url": "baccalaureat-genie-electrique"
},
{
"code": "7084",
"type": "premier-cycle",
"url": "baccalaureat-genie-logiciel"
},
{
"code": "7684",
"type": "premier-cycle",
"url": "baccalaureat-genie-mecanique"
},
{
"code": "6556",
"type": "premier-cycle",
"url": "baccalaureat-genie-operations-logistique"
},
{
"code": "6557",
"type": "premier-cycle",
"url": "baccalaureat-genie-production-automatisee"
},
{
"code": "7086",
"type": "premier-cycle",
"url": "baccalaureat-genie-des-ti"
},
{
"code": "6646",
"type": "premier-cycle",
"url": "baccalaureat-informatique-distribuee"
},
{
"code": "5766",
"type": "premier-cycle",
"url": "cheminement-universitaire-technologie"
},
{
"code": "4567",
"type": "premier-cycle",
"url": "certificat-economie-estimation"
},
{
"code": "4412",
"type": "premier-cycle",
"url": "certificat-gestion-assurance-qualite"
},
{
"code": "4563",
"type": "premier-cycle",
"url": "certificat-gestion-construction"
},
{
"code": "4684",
"type": "premier-cycle",
"url": "certificat-gestion-immobiliere"
},
{
"code": "4329",
"type": "premier-cycle",
"url": "certificat-production-industrielle"
},
{
"code": "4288",
"type": "premier-cycle",
"url": "certificat-telecommunications"
},
{
"code": "1822",
"type": "deuxieme-cycle",
"url": "maitrise-genie-logiciel"
},
{
"code": "3178",
"type": "deuxieme-cycle",
"url": "dess-technologies-information"
}
]
1 change: 0 additions & 1 deletion prisma/seeds/seed.ts
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
//This will contain the seed data for university programs
6 changes: 6 additions & 0 deletions src/constants/error-messages.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export const ERROR_MESSAGES = {
ERROR_PARSING_PDF: 'An error occurred while parsing the PDF.',
REQUIRED_PDF_URL: 'PDF URL is required and must be valid.',
REQUIRED_SESSION_AND_PROGRAM_CODE:
'Session code and program code are required.',
};
24 changes: 24 additions & 0 deletions src/http-exception.filter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import {
ArgumentsHost,
Catch,
ExceptionFilter,
HttpException,
} from '@nestjs/common';
import { Request, Response } from 'express';

@Catch(HttpException)
export class HttpExceptionFilter implements ExceptionFilter {
public catch(exception: HttpException, host: ArgumentsHost) {
const ctx = host.switchToHttp();
const response = ctx.getResponse<Response>();
const request = ctx.getRequest<Request>();
const status = exception.getStatus();

response.status(status).json({
statusCode: status,
timestamp: new Date().toISOString(),
path: request.url,
message: exception.message || 'Internal Server Error',
});
}
}
2 changes: 2 additions & 0 deletions src/main.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import { NestFactory } from '@nestjs/core';

import { AppModule } from './app.module';
import { HttpExceptionFilter } from './http-exception.filter';

async function bootstrap() {
const app = await NestFactory.create(AppModule);
app.useGlobalFilters(new HttpExceptionFilter());
await app.listen(process.env.PORT ? parseInt(process.env.PORT) : 3000);
}
bootstrap();
20 changes: 12 additions & 8 deletions src/pdf/pdf-parser/horaire/horaire-cours.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ import { Period } from './Period';

@Injectable()
export class HoraireCoursService {
private readonly PREALABLE_X_AXIS = 29.86;

private readonly START_PAGE_CONTENT_Y_AXIS = 14.019;
private readonly END_PAGE_CONTENT_Y_AXIS = 59;
private readonly PREALABLE_X_AXIS = 29.86;

constructor(private httpService: HttpService) {}

Expand All @@ -23,7 +21,10 @@ export class HoraireCoursService {
const response = await firstValueFrom(
this.httpService.get(pdfUrl, { responseType: 'arraybuffer' }),
);
return await this.parseHoraireCoursPdf(Buffer.from(response.data));
return await this.parseHoraireCoursPdf(
Buffer.from(response.data),
pdfUrl,
);
} catch (error) {
throw new Error('Error fetching PDF from URL ' + error);
}
Expand All @@ -32,15 +33,15 @@ export class HoraireCoursService {
// Parses the PDF buffer to extract course information
private async parseHoraireCoursPdf(
pdfBuffer: Buffer,
pdfUrl: string,
): Promise<HoraireCours[]> {
return PdfParserUtil.parsePdfBuffer(
pdfBuffer,
this.processPdfData.bind(this),
return PdfParserUtil.parsePdfBuffer(pdfBuffer, (pdfData) =>
this.processPdfData(pdfData, pdfUrl),
);
}

// Processes the raw PDF data to extract course information
private processPdfData(pdfData: Output): HoraireCours[] {
private processPdfData(pdfData: Output, pdfUrl: string): HoraireCours[] {
try {
const courses: HoraireCours[] = [];
let currentCourse: HoraireCours = new HoraireCours();
Expand All @@ -62,6 +63,9 @@ export class HoraireCoursService {
currentCourse.addOrUpdateCourse(courses);
}

if (courses.length === 0)
throw new Error(`No courses found in the PDF located at ${pdfUrl}.`);

const serializedCourses: HoraireCours[] = courses.map((course) =>
course.serialize(),
) as unknown as HoraireCours[];
Expand Down
42 changes: 18 additions & 24 deletions src/pdf/pdf-parser/planification/planification-cours.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,8 @@ import { CourseCodeValidationPipe } from '../../pipes/course-code-validation-pip
import { PlanificationCours } from './planification-cours.types';
import { Row } from './Row';

//TODO Add title to the course (cycles superieurs ont plusieurs cours avec le meme code)
//https://horaire.etsmtl.ca/Horairepublication/Planification-CyclesSuperieurs.pdf

@Injectable()
export class PlanificationCoursService {
private readonly COURS_X_AXIS = 1.648;
private readonly BORDER_OFFSET = 0.124;

private courseCodeValidationPipe = new CourseCodeValidationPipe();
Expand All @@ -26,32 +22,37 @@ export class PlanificationCoursService {
const response = await firstValueFrom(
this.httpService.get(pdfUrl, { responseType: 'arraybuffer' }),
);
return await this.parsePlanificationCoursPdf(Buffer.from(response.data));
return await this.parsePlanificationCoursPdf(
Buffer.from(response.data),
pdfUrl,
);
} catch (error) {
throw new Error('Error fetching pdf from URL ' + error);
}
}

private parsePlanificationCoursPdf(
pdfBuffer: Buffer,
pdfUrl: string,
): Promise<PlanificationCours[]> {
return PdfParserUtil.parsePdfBuffer(
pdfBuffer,
this.processPdfData.bind(this),
return PdfParserUtil.parsePdfBuffer(pdfBuffer, (pdfData) =>
this.processPdfData(pdfData, pdfUrl),
);
}

private processPdfData(pdfData: Output): PlanificationCours[] {
private processPdfData(
pdfData: Output,
pdfUrl: string,
): PlanificationCours[] {
try {
const headerCells: Row[] = this.parseHeaderCells(pdfData);
const courses: PlanificationCours[] = [];
let currentCourse: PlanificationCours = this.initializeCourse();

pdfData.Pages.forEach((page: Page) => {
page.Texts.forEach((textItem: Text) => {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { textContent, xPos, yPos } =
TextExtractor.extractTextDetails(textItem); //TODO Check yPos later
const { textContent, xPos } =
TextExtractor.extractTextDetails(textItem);

const currentColumn = Row.getColumnHeaderName(headerCells, xPos);
// Process course code
Expand Down Expand Up @@ -81,27 +82,20 @@ export class PlanificationCoursService {
}
});
});

if (currentCourse.code !== '') {
courses.push(currentCourse);
}

if (courses.length === 0)
throw new Error(`No courses found in the PDF located at ${pdfUrl}.`);

return courses;
} catch (err) {
console.error('Error parsing pdf data: ' + err);
throw new Error('Error processing PDF data');
}
}

private extractTextDetails(textItem: Text): {
textContent: string;
xPos: number;
yPos: number;
} {
const textContent = decodeURIComponent(textItem.R[0].T).trim();
const xPos: number = textItem.x;
const yPos: number = textItem.y;
return { textContent, xPos, yPos };
}

private initializeCourse(): PlanificationCours {
return {
code: '',
Expand Down
50 changes: 29 additions & 21 deletions src/pdf/pdf.controller.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import {
BadRequestException,
Controller,
Get,
HttpException,
HttpStatus,
InternalServerErrorException,
Query,
} from '@nestjs/common';

import { isValidUrl } from '../utils/url/urlUtils';
import { ERROR_MESSAGES } from '../constants/error-messages';
import { HoraireCoursService } from './pdf-parser/horaire/horaire-cours.service';
import { IHoraireCours } from './pdf-parser/horaire/horaire-cours.types';
import { PlanificationCoursService } from './pdf-parser/planification/planification-cours.service';
Expand All @@ -22,38 +21,47 @@ export class PdfController {

@Get('horaire-cours')
public async parseHoraireCoursPdf(
@Query('pdfUrl') pdfUrl: string,
@Query('session') sessionCode: string,
@Query('program') programCode: string,
): Promise<IHoraireCours[]> {
if (!sessionCode || !programCode) {
throw new HttpException(
ERROR_MESSAGES.REQUIRED_SESSION_AND_PROGRAM_CODE,
HttpStatus.BAD_REQUEST,
);
}

const pdfUrl = `https://horaire.etsmtl.ca/HorairePublication/HorairePublication_${sessionCode}_${programCode}.pdf`;

try {
console.log('Controller file', pdfUrl);
if (!pdfUrl || !isValidUrl(pdfUrl)) {
throw new BadRequestException('PDF URL is required');
}
return await this.horaireCoursService.parsePdfFromUrl(pdfUrl);
} catch (error) {
throw new InternalServerErrorException(
'Error parsing Horaire Cours PDF' + error,
throw new HttpException(
ERROR_MESSAGES.ERROR_PARSING_PDF,
HttpStatus.INTERNAL_SERVER_ERROR,
);
}
}

@Get('planification-cours')
public async parsePlanificationCoursPdf(
@Query('pdfUrl') pdfUrl: string,
@Query('program') programCode: string,
): Promise<PlanificationCours[]> {
try {
console.log('Controller file', pdfUrl);
if (!pdfUrl) {
console.log('PDF URL is required', HttpStatus.BAD_REQUEST, pdfUrl);
throw new BadRequestException('pdfUrl attribute is required');
} else if (!isValidUrl(pdfUrl)) {
throw new BadRequestException('pdfUrl is not valid');
}
if (!programCode) {
throw new HttpException(
ERROR_MESSAGES.REQUIRED_PDF_URL,
HttpStatus.BAD_REQUEST,
);
}

const pdfUrl = `https://horaire.etsmtl.ca/Horairepublication/Planification-${programCode}.pdf`;

try {
return await this.planificationCoursService.parsePdfFromUrl(pdfUrl);
} catch (error) {
throw new InternalServerErrorException(
'Error parsing Planification Cours PDF',
throw new HttpException(
ERROR_MESSAGES.ERROR_PARSING_PDF,
HttpStatus.INTERNAL_SERVER_ERROR,
);
}
}
Expand Down
3 changes: 2 additions & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3000,7 +3000,7 @@ expect@^29.0.0, expect@^29.7.0:
jest-message-util "^29.7.0"
jest-util "^29.7.0"

[email protected]:
[email protected], express@^4.19.2:
version "4.19.2"
resolved "https://registry.yarnpkg.com/express/-/express-4.19.2.tgz#e25437827a3aa7f2a827bc8171bbbb664a356465"
integrity sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==
Expand Down Expand Up @@ -6348,6 +6348,7 @@ which@^2.0.1:
isexe "^2.0.0"

"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
name wrap-ansi-cjs
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
Expand Down

0 comments on commit 9eed67e

Please sign in to comment.