-
Notifications
You must be signed in to change notification settings - Fork 0
/
load-csv.js
80 lines (77 loc) · 2.05 KB
/
load-csv.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
const fs = require("fs");
const _ = require("lodash");
const shuffleSeed = require("shuffle-seed");
function extractColumns(data, columnNames) {
const headers = _.first(data);
const indexes = _.map(columnNames, column => headers.indexOf(column));
const extracted = _.map(data, row => _.pullAt(row, indexes));
return extracted;
}
function loadCSV(
filename,
{
converters = {},
dataColumns = [],
labelColumns = [],
shuffle = true,
splitTest = false
}
) {
let data = fs.readFileSync(filename, { encoding: "utf-8" });
data = data.split("\n").map(row => row.split(","));
data = data.map(row => _.dropRightWhile(row, val => val === ""));
const headers = _.first(data);
data = data.map((row, index) => {
if (index === 0) {
return row;
}
return row.map((element, index) => {
if (converters[headers[index]]) {
const converted = converters[headers[index]](element);
return _.isNaN(converted) ? element : converted;
}
const result = parseFloat(element);
return _.isNaN(result) ? element : result;
});
});
let labels = extractColumns(data, labelColumns);
data = extractColumns(data, dataColumns);
data.shift();
labels.shift();
if (shuffle) {
data = shuffleSeed.shuffle(data, "phrase");
labels = shuffleSeed.shuffle(labels, "phrase");
}
if (splitTest) {
const trainSize = _.isNumber(splitTest)
? splitTest
: Math.floor(data.length / 2);
return {
features: data.slice(trainSize),
labels: labels.slice(trainSize),
testFeatures: data.slice(0, trainSize),
testLabels: data.slice(0, trainSize)
};
} else {
return { features: data, labels: labels };
}
}
const { features, labels, testFeatures, testLabels } = loadCSV("data.csv", {
dataColumns: ["height", "value"],
labelColumns: ["passed"],
shuffle: true,
splitTest: 1,
converters: {
passed: val => (val === "TRUE" ? true : false)
}
});
console.log(
"features",
features,
"labels",
labels,
"testFeatures",
testFeatures,
"testLabels",
testLabels
);