forked from AIObjectives/talk-to-the-city-reports
-
Notifications
You must be signed in to change notification settings - Fork 0
/
multi_cluster_extraction_v0.test.ts
114 lines (104 loc) · 2.79 KB
/
multi_cluster_extraction_v0.test.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import { vi, describe, it, expect, beforeEach } from 'vitest';
import MultiClusterExtractionNode, {
multi_cluster_extraction_node_data
} from '$lib/compute/multi_cluster_extraction_v0';
import deepCopy from 'deep-copy';
import csv_data from '$lib/mock_data/csv/csv.json';
import _ from 'lodash';
import { getEncoding } from 'js-tiktoken';
const encoding = getEncoding('cl100k_base');
describe('MultiClusterExtractionNode class', () => {
let node;
let inputData;
const timeout = 60000;
beforeEach(() => {
node = new MultiClusterExtractionNode(deepCopy(multi_cluster_extraction_node_data));
inputData = {
open_ai_key: 'sk-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
csv: csv_data
};
}, timeout);
it(
'should split CSV into chunks and process each chunk',
async () => {
vi.mock('$lib/utils', () => ({
readFileFromGCS: vi.fn(() => Promise.resolve()),
uploadJSONToGCS: vi.fn(() => Promise.resolve())
}));
node.data.context_limit = 6;
let expectedNumTokens = 0;
_.forEach(csv_data, (csv) => {
expectedNumTokens += encoding.encode(csv['comment-body']).length;
});
expect(expectedNumTokens).toEqual(12);
const expectedNumChunks = Math.ceil(expectedNumTokens / node.data.context_limit);
expect(expectedNumChunks).toEqual(2);
const output = await node.compute(
inputData,
'run',
console.log,
console.error,
console.log,
'test_slug',
null
);
expect(node.data.num_tokens).toEqual(expectedNumTokens);
expect(node.data.num_chunks).toEqual(expectedNumChunks);
expect(output).toEqual([
{
topics: [
{
topicName: 'Weather',
subtopics: ['Current Conditions']
}
]
},
{
topics: [
{
topicName: 'Weather',
subtopics: ['Precipitation Duration']
}
]
}
]);
},
timeout
);
it(
'should handle empty CSV input',
async () => {
inputData.csv = [];
const output = await node.compute(
inputData,
'run',
console.log,
console.error,
console.log,
'test_slug',
null
);
expect(node.data.num_tokens).toEqual(0);
expect(node.data.num_chunks).toEqual(0);
expect(output).toEqual([]);
},
timeout
);
it(
'should not process if no open_ai_key is provided',
async () => {
delete inputData.open_ai_key;
const output = await node.compute(
inputData,
'run',
console.log,
console.error,
console.log,
'test_slug',
null
);
expect(output).toEqual([undefined]);
},
timeout
);
});