Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRAFT: Re-implement matching algorithim in NodeJS #1

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ module.exports = {
plugins: [
'@typescript-eslint',
],
rules: {
'linebreak-style': "off"
}
};
71 changes: 71 additions & 0 deletions src/match/findMatching.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { MatchingExternal, MatchingInternal, ProjectData } from './matchingTypes';
import {
matchingStats,
placeStudentsOfChoicesBalanced,
range,
} from './matchingHelpers';

/**
* Assigns students of choice starting from start going to limit in batches of size batch using the balanced
* algorithm to break ties. Examples of use:
* step3(sampleData, 1, 1, 1) - Matches all first choice students it can
* step3(sampleData, 2, 2, 1) - Matches all second choice students it can
* step3(sampleData, 3, 20, 3) - Matches 3,4,5 then 5,6,7 then 8,9,10... etc until 20.
*
* @param allProjectData
* @param start - Starting number, inclusive
* @param end - The number to process until, inclusive
* @param batch - The size of the batches of choices to work on at once.
*/
function matchChoices(allProjectData: ProjectData, start: number, end: number, batch: number): void {
for (let startChoice = start; startChoice <= end; startChoice += batch) {
// Avoid going over the limit in the last iteration
const choices = startChoice + batch < end
? range(startChoice, startChoice + batch)
: range(startChoice, end + 1);
Object.values(allProjectData)
.forEach((project) => {
placeStudentsOfChoicesBalanced(allProjectData, project.projectId, choices, project.projSizeRemaining);
});
}
}

/**
* Generates a single match of all students to projects. May have missing students.
* @param data
*/
function generateMatch(data: ProjectData): MatchingInternal {
matchChoices(data, 1, 1, 2);
matchChoices(data, 3, 20, 1);
return {
match: data,
stats: matchingStats(data),
};
}

/**
* Generates a match that probably has no unassigned students (very likely but not guaranteed, call again if it fails)
* @param {ProjectData} data - The project information to create a match for. Will not be mutated.
*/
export function generateReliableMatch(data: ProjectData): MatchingExternal {
const startTime = process.hrtime();
let copyOfData = JSON.parse(JSON.stringify(data));
let bestMatch: MatchingInternal = generateMatch(copyOfData);
for (let i = 0; i < 50; i += 1) {
copyOfData = JSON.parse(JSON.stringify(data));
const match = generateMatch(copyOfData);
if (match.stats.unassignedStudents < bestMatch.stats.unassignedStudents
|| (match.stats.unassignedStudents === bestMatch.stats.unassignedStudents
&& match.stats.matchingScore < bestMatch.stats.matchingScore)) {
bestMatch = match;
}
}
const endTime = process.hrtime(startTime);
return {
match: bestMatch.match,
stats: {
...bestMatch.stats,
runtimeMs: endTime[0] * 1000 + endTime[1] / 1000000,
},
};
}
288 changes: 288 additions & 0 deletions src/match/matchingHelpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
import assert from 'assert';
import { Project, ProjectPreference } from '@prisma/client';
import {
MatchingExternal,
MatchingStatsInternal,
ProjectData,
StudentChoice,
StudentChoices,
} from './matchingTypes';
import { MatchTuple, MatchingResult } from '../types/Matching';

/* Randomize array in-place using Durstenfeld shuffle algorithm https://stackoverflow.com/a/12646864 */
function shuffleArray<T>(array: Array<T>): void {
for (let i = array.length - 1; i > 0; i -= 1) {
const j = Math.floor(Math.random() * (i + 1));
// eslint-disable-next-line no-param-reassign
[array[i], array[j]] = [array[j], array[i]];
}
}

/**
* A very basic implementation of python's range function, used to generate an array of numbers from a start
* (inclusive) and end (exclusive)
* @param start - The start of the range, inclusive
* @param stop - The end of the range, exclusive
*/
export function range(start: number, stop: number): number[] {
const arr = [];
for (let i = start; i < stop; i += 1) {
arr.push(i);
}
return arr;
}

/**
* Checks to see if a StudentChoice matches either a choice number or an array of choice numbers
* @param choice
* @param studentChoice
*/
function compareChoice(choice: number[] | number, studentChoice: StudentChoice): boolean {
return Array.isArray(choice)
? (choice as number[]).includes(studentChoice.choice)
: studentChoice.choice === (choice as number);
}

/**
* Marks a student in all project's `studentsSelected` as having been matched somewhere already
* @param projectData - The project data, mutated in place
* @param studentId - Student to mark
*/
function markStudent(projectData: ProjectData, studentId: string): void {
Object.values(projectData)
.forEach((value) => {
if (value.studentsSelected[studentId] !== undefined) {
// eslint-disable-next-line no-param-reassign
value.studentsSelected[studentId].matched = true;
}
});
}

/**
* Places a student into a project and then marks them as matched in all projects. Handles correctly updating the
* number of needed students and decrementing num_first_choice
* @param projectData
* @param projectId
* @param studentId
*/
function placeStudent(projectData: ProjectData, projectId: string, studentId: string): void {
const project = projectData[projectId];
const student = project.studentsSelected[studentId];
const firstChoice: boolean = student.choice === 1;
// We should never try to match a student that's already been matched
assert(
student.matched !== true,
`Tried to place a student that's already been matched ${JSON.stringify(student)}`,
);
// eslint-disable-next-line no-param-reassign
projectData[projectId].studentsMatched[student.studentId] = student;
markStudent(projectData, studentId);
project.projSizeRemaining -= 1;
if (firstChoice) project.numFirstChoice -= 1;
}

/**
* Counts the number of votes a certain student has remaining in non-filled projects.
* @param projectData
* @param studentId
*/
function countStudentVotes(projectData: ProjectData, studentId: string): number {
return Object.values(projectData)
// Only look at non-filled projects
.filter((project) => project.projSizeRemaining > 0)
// Sum the number of these projects that contain the given studentId
.reduce((previousCount, project) => {
// Check if the student exists in the studentSelected
const doesStudentExist = Object.keys(
project.studentsSelected,
)
.some((id) => id === studentId);
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
// eslint-disable-next-line no-bitwise
return previousCount + (doesStudentExist | 0);
// Prev line casts a bool to an int very fast (2 orders of mag faster than other methods)
}, 0);
}

/**
* Counts the number of unmarked students on a project who voted for it with a given choice.
* @param studentsSelected
* @param choice
*/
export function countStudentsOfChoices(studentsSelected: StudentChoices, choice: number[] | number): number {
return Object.values(studentsSelected)
.filter(
(student) => student.matched !== true,
// eslint-disable-next-line arrow-body-style
)
// eslint-disable-next-line arrow-body-style
.reduce((secondChoiceCounter, student) => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
// eslint-disable-next-line no-bitwise
return secondChoiceCounter + ((compareChoice(choice, student)) | 0);
}, 0);
}

/**
* Places students similar to placeStudentsOfChoice, however it is smarter and will break ties by removing the
* student who appears least frequently in other remaining votes. This is based on the assumption that this person
* will be the most likely to accidentally have all their voted projects filled up on then.
* @param projectData
* @param projectId
* @param choice
* @param count
*/
export function placeStudentsOfChoicesBalanced(
projectData: ProjectData,
projectId: string,
choice: number[] | number,
count: number,
): void {
// An array of students on this project who have only one remaining project
const singleStudents: [string, number][] = Object.values(projectData[projectId].studentsSelected)
.filter((student) => student.matched !== true)
.map(
(student): [string, number] => [student.studentId, countStudentVotes(projectData, student.studentId)],
)
.filter(
(studentVotes) => studentVotes[1] === 1,
);

// Randomize the array and then match as many single students as possible.
let counter = 0;
shuffleArray(singleStudents);
singleStudents.slice(0, count)
.forEach((student) => {
placeStudent(projectData, projectId, student[0]);
counter += 1;
});

// An array of mappings from student IDs to the number of votes they have on non-filled projects for unmarked
// students matching the choice
const studentFrequency: [string, number][] = Object.values(projectData[projectId].studentsSelected)
.filter((student) => compareChoice(choice, student) && student.matched !== true)
.map(
(student) => [student.studentId, countStudentVotes(projectData, student.studentId)],
);
// Sort with least occurrences at the start and randomize the order within blocks of the same number of occurrences
studentFrequency.sort((a, b) => {
let value = a[1] - b[1];
if (value === 0) {
value = 0.5 - Math.random();
}
return value;
});

// Get the first few count students and apply them to any remaining slots. This count and counter thing will always
// work, slice handles all this very nicely.
studentFrequency.slice(0, count - counter)
.forEach((student) => placeStudent(projectData, projectId, student[0]));
}

/**
* Counts the number of open spots left in projects
* @param projectData
*/
function countUnfilled(projectData: ProjectData) {
return Object.values(projectData)
.reduce((prevVal, currentVal) => prevVal + currentVal.projSizeRemaining, 0);
}

/**
* Counts the number of unmarked students (students that didn't get applied to anything)
* @param projectData
*/
function unassignedStudentsCount(projectData: ProjectData): number {
const countedStudentIds: string[] = [];
return Object.values(projectData)
.reduce((previousValue, currentValue) => {
const unmatchedStudents = Object.values(
currentValue.studentsSelected,
)
.filter((student) => student.matched !== true && !countedStudentIds.includes(student.studentId));
countedStudentIds.push(...unmatchedStudents.map((student) => student.studentId));
return previousValue + unmatchedStudents.length;
}, 0);
}

/**
* Measures the effectiveness of a match, or the choice rank number that all students got divided by the number of
* students
* @param projectData
* @param totalStudents
*/
function measureMatchEffectiveness(projectData: ProjectData, totalStudents: number) {
const rawScore = Object.values(projectData)
.reduce((sumScoreOverall, currentProject) => sumScoreOverall
+ Object.values(currentProject.studentsMatched)
.reduce((sumScore, currentStudent) => sumScore + currentStudent.choice, 0), 0);
return rawScore / totalStudents;
}

/**
* Combines a bunch of relevant stats about a matching to check how it's doing
* @param projectData
* @constructor
*/
export function matchingStats(projectData: ProjectData): MatchingStatsInternal {
// eslint-disable-next-line func-names
const totalStudents = (function () {
const totalStudentsSet = new Set<string>();
Object.values(projectData)
.forEach((project) => {
Object.values(project.studentsSelected)
.forEach((student) => {
totalStudentsSet.add(student.studentId);
});
});
return totalStudentsSet.size;
}());
return {
totalProjects: Object.keys(projectData).length,
totalStudents,
unassignedStudents: unassignedStudentsCount(projectData),
unfilledSlots: countUnfilled(projectData),
matchingScore: measureMatchEffectiveness(projectData, totalStudents),
};
}

export function parsePrismaData(prismaData: (Project & { projectPreferences: ProjectPreference[] })[]): ProjectData {
const projectData: ProjectData = {};
prismaData.forEach((prismaProject) => {
// Generate the student choices
const studentsSelected: StudentChoices = {};
prismaProject.projectPreferences.forEach((prismaStudentChoice) => {
studentsSelected[prismaStudentChoice.studentId] = {
studentId: prismaStudentChoice.studentId,
choice: prismaStudentChoice.ranking,
};
});
// Fill in the rest of the data
projectData[prismaProject.id] = {
studentsSelected,
projectId: prismaProject.id,
numFirstChoice: countStudentsOfChoices(studentsSelected, 1),
projSizeRemaining: Object.keys(studentsSelected).length,
studentsMatched: {},
};
});
return projectData;
}

/**
* Marshals data from internal structure to datatype required for export
*/
export function prepareDataForExport(matchingData: MatchingExternal): MatchingResult {
const matchingProjectData: MatchTuple[] = Object
.values(matchingData.match)
.flatMap((projectData) => Object.keys(projectData.studentsMatched).map((student) => ({
studentId: student,
projectId: projectData.projectId,
})));
return {
stats: matchingData.stats,
match: matchingProjectData,
};
}
Loading