Skip to content

Commit

Permalink
Merge pull request #71 from Makespace/reduce_memory_usage
Browse files Browse the repository at this point in the history
Reduce memory usage when pulling training events
  • Loading branch information
Lan2u authored Sep 28, 2024
2 parents 1c7f1e4 + 7788d8a commit 9cb72a4
Show file tree
Hide file tree
Showing 20 changed files with 809 additions and 369 deletions.
10 changes: 1 addition & 9 deletions src/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,7 @@ import * as tt from 'io-ts-types';
import * as E from 'fp-ts/Either';
import {pipe} from 'fp-ts/lib/function';
import {formatValidationErrors} from 'io-ts-reporters';

const withDefaultIfEmpty = <C extends t.Any>(codec: C, ifEmpty: t.TypeOf<C>) =>
tt.withValidate(codec, (input, context) =>
pipe(
tt.NonEmptyString.validate(input, context),
E.orElse(() => t.success(String(ifEmpty))),
E.chain(nonEmptyString => codec.validate(nonEmptyString, context))
)
);
import {withDefaultIfEmpty} from './util';

const LogLevel = t.keyof({
trace: null,
Expand Down
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ const periodicExternalReadModelRefresh = setInterval(() => {
'Unexpected error when refreshing read model with external sources'
)
);
}, 60_000);
}, 30_000);
server.on('close', () => {
clearInterval(periodicReadModelRefresh);
clearInterval(periodicExternalReadModelRefresh);
Expand Down
160 changes: 149 additions & 11 deletions src/init-dependencies/google/pull_sheet_data.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,83 @@
import {Logger} from 'pino';
import * as TE from 'fp-ts/TaskEither';
import {Failure} from '../../types';
import * as t from 'io-ts';
import * as tt from 'io-ts-types';
import * as E from 'fp-ts/Either';

import {pipe} from 'fp-ts/lib/function';
import {sheets, sheets_v4} from '@googleapis/sheets';
import {sheets} from '@googleapis/sheets';
import {GoogleAuth} from 'google-auth-library';
import {columnIndexToLetter} from '../../training-sheets/extract-metadata';
import {formatValidationErrors} from 'io-ts-reporters';
import {DateTime} from 'luxon';

export const pullGoogleSheetData =
const DEFAULT_TIMEZONE = 'Europe/London';

// Not all the google form sheets are actually in Europe/London.
// Issue first noticed because CI is in a different zone (UTC) than local test machine (BST).
export const GoogleTimezone = tt.withValidate(t.string, (input, context) =>
pipe(
t.string.validate(input, context),
E.chain(timezoneRaw =>
DateTime.local().setZone(timezoneRaw).isValid
? E.right(timezoneRaw)
: E.left([])
),
E.orElse(() => t.success(DEFAULT_TIMEZONE))
)
);

export const GoogleSpreadsheetInitialMetadata = t.strict({
properties: t.strict({
timeZone: GoogleTimezone,
}),
sheets: t.array(
t.strict({
properties: t.strict({
title: t.string,
gridProperties: t.strict({
rowCount: t.number,
}),
}),
})
),
});
export type GoogleSpreadsheetInitialMetadata = t.TypeOf<
typeof GoogleSpreadsheetInitialMetadata
>;

// Contains only a single sheet. Structure is a little verbose to match the part of the
// google api it is taken from.
export const GoogleSpreadsheetDataForSheet = t.strict({
sheets: tt.nonEmptyArray(
// Array always has length = 1 because this is data for a single sheet.
t.strict({
data: tt.nonEmptyArray(
t.strict({
rowData: tt.nonEmptyArray(
t.strict({
values: tt.nonEmptyArray(
t.strict({
formattedValue: tt.withFallback(t.string, ''),
})
),
})
),
})
),
})
),
});
export type GoogleSpreadsheetDataForSheet = t.TypeOf<
typeof GoogleSpreadsheetDataForSheet
>;

export const pullGoogleSheetDataMetadata =
(auth: GoogleAuth) =>
(
logger: Logger,
trainingSheetId: string
): TE.TaskEither<Failure, sheets_v4.Schema$Spreadsheet> =>
): TE.TaskEither<string, GoogleSpreadsheetInitialMetadata> =>
pipe(
TE.tryCatch(
() =>
Expand All @@ -20,15 +86,87 @@ export const pullGoogleSheetData =
auth,
}).spreadsheets.get({
spreadsheetId: trainingSheetId,
includeGridData: true,
includeGridData: false, // Only the metadata.
fields: 'sheets(properties),properties(timeZone)', // Only the metadata about the sheets.
}),
reason => {
logger.error(reason, 'Failed to get spreadsheet');
return {
// Expand failure reasons.
message: `Failed to get training spreadsheet ${trainingSheetId}`,
};
logger.error(reason, 'Failed to get spreadsheet metadata');
return `Failed to get training spreadsheet metadata ${trainingSheetId}`;
}
),
TE.map(resp => resp.data),
TE.chain(data =>
TE.fromEither(
pipe(
data,
GoogleSpreadsheetInitialMetadata.decode,
E.mapLeft(
e =>
`Failed to get google spreadsheet metadata from API response: ${formatValidationErrors(e).join(',')}`
)
)
)
)
);

export const pullGoogleSheetData =
(auth: GoogleAuth) =>
(
logger: Logger,
trainingSheetId: string,
sheetName: string,
rowStart: number, // 1 indexed.
rowEnd: number,
columnStartIndex: number, // 0 indexed, converted to a letter.
columnEndIndex: number
): TE.TaskEither<string, GoogleSpreadsheetDataForSheet> =>
pipe(
TE.tryCatch(
() => {
const ranges = [
`${sheetName}!${columnIndexToLetter(columnStartIndex)}${rowStart}:${columnIndexToLetter(columnEndIndex)}${rowEnd}`,
];
const fields = 'sheets(data(rowData(values(formattedValue))))';
logger.info(
'Querying sheet %s for fields %s range %s',
trainingSheetId,
fields,
ranges
);
return sheets({
version: 'v4',
auth,
}).spreadsheets.get({
spreadsheetId: trainingSheetId,
fields,
ranges,
});
},
reason => {
logger.error(
reason,
'Failed to get training spreadsheet %s',
trainingSheetId
);
return `Failed to get training spreadsheet ${trainingSheetId}`;
}
),
TE.map(resp => resp.data)
TE.map(resp => resp.data),
TE.chain(data =>
TE.fromEither(
pipe(
data,
GoogleSpreadsheetDataForSheet.decode,
E.mapLeft(
e =>
`Failed to get all required google spreadsheet data from API response: ${formatValidationErrors(e).join(',')}`
)
)
)
)
);

export interface GoogleHelpers {
pullGoogleSheetData: ReturnType<typeof pullGoogleSheetData>;
pullGoogleSheetDataMetadata: ReturnType<typeof pullGoogleSheetDataMetadata>;
}
36 changes: 21 additions & 15 deletions src/init-dependencies/init-dependencies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ import {commitEvent} from './event-store/commit-event';
import {getAllEvents, getAllEventsByType} from './event-store/get-all-events';
import {getResourceEvents} from './event-store/get-resource-events';
import {Client} from '@libsql/client';
import {pullGoogleSheetData} from './google/pull_sheet_data';
import {
GoogleHelpers,
pullGoogleSheetData,
pullGoogleSheetDataMetadata,
} from './google/pull_sheet_data';
import {initSharedReadModel} from '../read-models/shared-state';
import {GoogleAuth} from 'google-auth-library';

Expand Down Expand Up @@ -57,24 +61,26 @@ export const initDependencies = (
})
);

const googleAuth =
conf.GOOGLE_SERVICE_ACCOUNT_KEY_JSON.toLowerCase().trim() === 'disabled'
? O.none
: O.some(
pullGoogleSheetData(
new GoogleAuth({
// Google issues the credentials file and validates it.
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
credentials: JSON.parse(conf.GOOGLE_SERVICE_ACCOUNT_KEY_JSON),
scopes: ['https://www.googleapis.com/auth/spreadsheets.readonly'],
})
)
);
let googleHelpers: O.Option<GoogleHelpers> = O.none;
if (
conf.GOOGLE_SERVICE_ACCOUNT_KEY_JSON.toLowerCase().trim() !== 'disabled'
) {
const googleAuth = new GoogleAuth({
// Google issues the credentials file and validates it.
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
credentials: JSON.parse(conf.GOOGLE_SERVICE_ACCOUNT_KEY_JSON),
scopes: ['https://www.googleapis.com/auth/spreadsheets.readonly'],
});
googleHelpers = O.some({
pullGoogleSheetData: pullGoogleSheetData(googleAuth),
pullGoogleSheetDataMetadata: pullGoogleSheetDataMetadata(googleAuth),
});
}

const sharedReadModel = initSharedReadModel(
dbClient,
logger,
googleAuth,
googleHelpers,
conf.GOOGLE_RATELIMIT_MS
);

Expand Down
Loading

0 comments on commit 9cb72a4

Please sign in to comment.