diff --git a/bulkimport_scripts/add_users_for_bulk_import/.gitignore b/bulkimport_scripts/add_users_for_bulk_import/.gitignore new file mode 100644 index 000000000..4a79b3fbe --- /dev/null +++ b/bulkimport_scripts/add_users_for_bulk_import/.gitignore @@ -0,0 +1,3 @@ +node_modules +./usersHavingInvalidSchema.json +./remainingUsers.json diff --git a/bulkimport_scripts/add_users_for_bulk_import/README.md b/bulkimport_scripts/add_users_for_bulk_import/README.md new file mode 100644 index 000000000..b443c742d --- /dev/null +++ b/bulkimport_scripts/add_users_for_bulk_import/README.md @@ -0,0 +1,34 @@ +# Add Users For Bulk Import Script + +The `/bulk-import/users` POST API endpoint in SuperTokens Core allows to add users to the database to bulk import the users. However, the API only allows importing 10,000 users at once. This script can take a JSON file containing a large number of users and call the API in batches of 10,000. + +## How to Run + +1. Ensure you have Node.js (v16 or higher) installed on your system. +2. Open a terminal window and navigate to the directory where the script is located. +3. Run `npm install` to install necessary dependencies. +4. Run the script using the following command: + + ``` + node index.js --core-endpoint --input-file [--invalid-schema-file ] [--remaining-users-file ] + ``` + + - Replace `` with the URL of the core API endpoint. + - Replace `` with the path to the input JSON file containing user data. + - Optionally, you can specify the paths for the output files: + - `--invalid-schema-file ` specifies the path to the file storing users with invalid schema (default is `./usersHavingInvalidSchema.json`). + - `--remaining-users-file ` specifies the path to the file storing remaining users (default is `./remainingUsers.json`). + +## Format of Input File + +The input file should be a JSON file with the same format as requested by the `/bulk-import/users` POST API endpoint. An example file named `example_input_file.json` is provided in the same directory. + +## Expected Outputs + +- Upon successful execution, the script will output a summary message indicating the number of users processed, any remaining users, and any users with invalid schema. +- If there are remaining users to be processed, the file specified by `--remaining-users-file` (default `remainingUsers.json`) will be generated, containing the details of remaining users. +- If there are users with invalid schema, the file specified by `--invalid-schema-file` (default `usersHavingInvalidSchema.json`) will be generated, containing the details of users with invalid schema. + +## Note + +The script would re-write the files specified by `--remaining-users-file` and `--invalid-schema-file` options on each run. Ensure to back up these files if needed. \ No newline at end of file diff --git a/bulkimport_scripts/add_users_for_bulk_import/example_input_file.json b/bulkimport_scripts/add_users_for_bulk_import/example_input_file.json new file mode 100644 index 000000000..4db904400 --- /dev/null +++ b/bulkimport_scripts/add_users_for_bulk_import/example_input_file.json @@ -0,0 +1,145 @@ +{ + "users": [ + { + "id": "5e1ea023-8787-4c38-8409-ea0b0230662e", + "externalUserId": "b263cfe8-f27f-4264-a405-de1527906fb7", + "userMetadata": { + "key1": "value1", + "key2": { + "key3": "value3" + } + }, + "userRoles": [], + "loginMethods": [ + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": true, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "emailpassword", + "email": "user0@example.com", + "passwordHash": "$2a", + "hashingAlgorithm": "BCRYPT" + }, + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": false, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "thirdparty", + "email": "user0@example.com", + "thirdPartyId": "thirdPartyId0", + "thirdPartyUserId": "thirdPartyUserId0" + }, + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": false, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "passwordless", + "email": "user0@example.com" + } + ] + }, + { + "id": "2e66fcec-09ea-4cd3-ad4f-817099872b5c", + "externalUserId": "0afd9119-d613-43e6-82a0-556d93d61421", + "userMetadata": { + "key1": "value1", + "key2": { + "key3": "value3" + } + }, + "userRoles": [], + "loginMethods": [ + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": true, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "emailpassword", + "email": "user1@example.com", + "passwordHash": "$2a", + "hashingAlgorithm": "BCRYPT" + }, + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": false, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "thirdparty", + "email": "user1@example.com", + "thirdPartyId": "thirdPartyId1", + "thirdPartyUserId": "thirdPartyUserId1" + }, + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": false, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "passwordless", + "email": "user1@example.com" + } + ] + }, + { + "id": "a9c828d1-a8db-4eb3-8e0a-1c985dba9fc9", + "externalUserId": "5a6fccfb-5778-40b1-be1c-2d8c25421253", + "userMetadata": { + "key1": "value1", + "key2": { + "key3": "value3" + } + }, + "userRoles": [], + "loginMethods": [ + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": true, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "emailpassword", + "email": "user2@example.com", + "passwordHash": "$2a", + "hashingAlgorithm": "BCRYPT" + }, + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": false, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "thirdparty", + "email": "user2@example.com", + "thirdPartyId": "thirdPartyId2", + "thirdPartyUserId": "thirdPartyUserId2" + }, + { + "tenantIds": [ + "public" + ], + "isVerified": true, + "isPrimary": false, + "timeJoinedInMSSinceEpoch": 1712067215922, + "recipeId": "passwordless", + "email": "user2@example.com" + } + ] + } + ] +} \ No newline at end of file diff --git a/bulkimport_scripts/add_users_for_bulk_import/index.js b/bulkimport_scripts/add_users_for_bulk_import/index.js new file mode 100644 index 000000000..e229015d6 --- /dev/null +++ b/bulkimport_scripts/add_users_for_bulk_import/index.js @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2024, VRAI Labs and/or its affiliates. All rights reserved. + * + * This software is licensed under the Apache License, Version 2.0 (the + * "License") as published by the Apache Software Foundation. + * + * You may not use this file except in compliance with the License. You may + * obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +const fs = require('fs/promises'); +const yargs = require('yargs'); +const process = require('process'); + +const BATCH_SIZE = 10000; + +async function parseInputArgs() { + const argv = await yargs + .option('core-endpoint', { + alias: 'c', + type: 'string', + describe: 'Core API URL endpoint', + demandOption: true, + }) + .option('input-file', { + alias: 'i', + type: 'string', + describe: 'Path to the input file', + demandOption: true, + }) + .option('invalid-schema-file', { + alias: 's', + type: 'string', + describe: 'Path to the file storing users with invalid schema', + default: './usersHavingInvalidSchema.json', + }) + .option('remaining-users-file', { + alias: 'r', + type: 'string', + describe: 'Path to the file storing remaining users', + default: './remainingUsers.json', + }) + .argv; + + return { + coreAPIUrl: argv['core-endpoint'], + inputFileName: argv['input-file'], + usersHavingInvalidSchemaFileName: argv['invalid-schema-file'], + remainingUsersFileName: argv['remaining-users-file'], + }; +} + +async function getUsersFromInputFile({ inputFileName }) { + try { + const inputFileDataString = await fs.readFile(inputFileName, 'utf8'); + const inputFileData = JSON.parse(inputFileDataString); + + if (!inputFileData.users || !Array.isArray(inputFileData.users) || inputFileData.users.length === 0) { + throw new Error('Expected users array in the input file.'); + } + + return inputFileData.users; + } catch (error) { + console.error('Error reading or parsing input file:', error.message); + process.exit(1); + } +} + +async function deleteUsersHavingInvalidSchemaFileIfExists({ usersHavingInvalidSchemaFileName }) { + try { + await fs.rm(usersHavingInvalidSchemaFileName); + } catch (error) { + if (error.code !== 'ENOENT') { + console.error(`Failed to delete ${usersHavingInvalidSchemaFileName}:`, error.message); + } + } +} + +async function addInvalidSchemaUsersToFile({ errors, users, usersHavingInvalidSchemaFileName }) { + let parsedData = null; + try { + const existingData = await fs.readFile(usersHavingInvalidSchemaFileName, 'utf8'); + parsedData = JSON.parse(existingData); + } catch (error) { + if (error.code === 'ENOENT') { + parsedData = { users: [] }; + } else { + console.error(`Failed to read output file. Error: ${error.message}`); + throw error; + } + } + + parsedData.users.push(...errors.map((err) => ({ user: users[err.index], errors: err.errors }))); + + await fs.writeFile(usersHavingInvalidSchemaFileName, JSON.stringify(parsedData, null, 2)); + + return users.filter((_, index) => !errors.some(err => err.index === index)); +} + +async function updateRemainingUsersFile({ users, index, remainingUsersFileName }) { + const remainingUsers = users.slice(index + 1); + await fs.writeFile(remainingUsersFileName, JSON.stringify({ users: remainingUsers }, null, 2)); +} + +async function removeRemainingUsersFile({ remainingUsersFileName }) { + try { + await fs.rm(remainingUsersFileName); + } catch (error) { + if (error.code !== 'ENOENT') { + console.error(`Failed to delete ${remainingUsersFileName}:`, error.message); + } + } +} + +async function main() { + const { coreAPIUrl, inputFileName, usersHavingInvalidSchemaFileName, remainingUsersFileName } = await parseInputArgs(); + + const users = await getUsersFromInputFile({ inputFileName }); + + await deleteUsersHavingInvalidSchemaFileIfExists({ usersHavingInvalidSchemaFileName }); + await updateRemainingUsersFile({ users, index: 0, remainingUsersFileName }); + + let usersToProcessInBatch = []; + let usersHavingInvalidSchemaCount = 0; + let i = 0; + + try { + while (i < users.length || usersToProcessInBatch.length > 0) { + let remainingBatchSize = usersToProcessInBatch.length > BATCH_SIZE ? 0 : BATCH_SIZE - usersToProcessInBatch.length; + remainingBatchSize = Math.min(remainingBatchSize, users.length - i); + + usersToProcessInBatch.push(...users.slice(i, i + remainingBatchSize)); + + const res = await fetch(`${coreAPIUrl}/bulk-import/users`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ users: usersToProcessInBatch }), + }); + + if (!res.ok && res.status !== 400) { + const text = await res.text(); + console.error(`Failed to add users. API response - status: ${res.status} body: ${text}`); + break; + } + + if (res.status === 400) { + const errors = await res.json(); + usersHavingInvalidSchemaCount += errors.users.length; + usersToProcessInBatch = await addInvalidSchemaUsersToFile({ errors: errors.users, users: usersToProcessInBatch, usersHavingInvalidSchemaFileName }); + } else { + await updateRemainingUsersFile({ users, index: i, remainingUsersFileName }); + usersToProcessInBatch = []; + } + + i += remainingBatchSize; + } + } catch (error) { + console.log(`Got an unexpected Error: `, error); + } + + const result = { + totalUsers: users.length, + processedUsers: i, + remainingUsers: users.length - i, + usersHavingInvalidSchema: usersHavingInvalidSchemaCount, + ...(users.length - i > 0 && { remainingUsersFileName }), + ...(usersHavingInvalidSchemaCount > 0 && { usersHavingInvalidSchemaFileName }), + }; + + if (i < users.length) { + const message = usersHavingInvalidSchemaCount > 0 ? + `We processed ${i} users and ${usersHavingInvalidSchemaCount} users have invalid schema! Remaining users can be processed again by processing the ${remainingUsersFileName} file and users having invalid schema needs to be fixed and processed again by processing the ${usersHavingInvalidSchemaFileName} file.` + : `We processed ${i} users and ${users.length - i} users are remaining to be processed! Remaining users can be processed again by processing the ${remainingUsersFileName} file.`; + console.log({ message, ...result }); + } else { + await removeRemainingUsersFile({ remainingUsersFileName }); + const message = usersHavingInvalidSchemaCount > 0 ? + `All users processed but ${usersHavingInvalidSchemaCount} users have invalid schema! Users having invalid schema needs to be fixed and processed again by processing the ${usersHavingInvalidSchemaFileName} file.` : `All users processed successfully!`; + console.log({ message, ...result }); `` + } +} + +main() diff --git a/bulkimport_scripts/add_users_for_bulk_import/package-lock.json b/bulkimport_scripts/add_users_for_bulk_import/package-lock.json new file mode 100644 index 000000000..5efb40989 --- /dev/null +++ b/bulkimport_scripts/add_users_for_bulk_import/package-lock.json @@ -0,0 +1,180 @@ +{ + "name": "add_users_for_bulk_import", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "add_users_for_bulk_import", + "version": "1.0.0", + "license": "Apache-2.0", + "dependencies": { + "yargs": "^17.7.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + }, + "node_modules/escalade": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", + "engines": { + "node": ">=6" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "engines": { + "node": ">=8" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "engines": { + "node": ">=12" + } + } + } +} diff --git a/bulkimport_scripts/add_users_for_bulk_import/package.json b/bulkimport_scripts/add_users_for_bulk_import/package.json new file mode 100644 index 000000000..94e519e30 --- /dev/null +++ b/bulkimport_scripts/add_users_for_bulk_import/package.json @@ -0,0 +1,18 @@ +{ + "name": "add_users_for_bulk_import", + "version": "1.0.0", + "engines": { + "node": ">=16.0.0" + }, + "description": "A script that takes a JSON file containing user data and calls the bulk import API to add users to the database for processing.", + "main": "index.js", + "scripts": { + "start": "node index.js" + }, + "keywords": [], + "author": "", + "license": "Apache-2.0", + "dependencies": { + "yargs": "^17.7.2" + } +}