-
Notifications
You must be signed in to change notification settings - Fork 1
/
getTrainData.js
133 lines (120 loc) · 6.29 KB
/
getTrainData.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"use strict";
const path = require("path");
const fs = require("fs");
const axios = require("axios");
const gunzip = require('gunzip-file');
const downloadFile = async (link, file) => {
const response = await axios({
method: "GET",
url: link,
responseType: 'stream'
});
response.data.pipe(fs.createWriteStream(file));
return new Promise((resolve, reject) => {
response.data.on("end", () => {
resolve();
});
response.data.on("error", (error) => {
reject(error);
});
});
};
const checkTrainData = async () => {
console.time("Total time taken to setup project is: ");
fs.existsSync(path.join(__dirname, "routes", "trainedData")) || fs.mkdirSync(path.join(__dirname, "routes", "trainedData"));
try {
if (!fs.existsSync(path.join(__dirname, "routes", "trainedData", "eng.traineddata.gz"))) {
console.time("total time taken for english file to download is: ");
// const trainFile = ;
try {
await downloadFile("https://github.com/naptha/tessdata/blob/gh-pages/4.0.0/eng.traineddata.gz?raw=true", path.join(__dirname, "routes", "trainedData", "eng.traineddata.gz"));
// console.log("Extracting eng.trainedData file");
// gunzip(path.join(__dirname, "routes", "trainedData", "eng.traineddata.gz"), path.join(__dirname, "routes", "trainedData", "eng.traineddata"), () => {
// console.log('Extraction of eng.traineddata done!');
// });
// console.log("File download completed for english trained data");
// console.log("Deleting the eng.traineddata.gz file");
// fs.unlinkSync(path.join(__dirname, "routes", "trainedData", "eng.traineddata.gz"));
} catch (error) {
console.log(error);
}
console.timeEnd("total time taken for english file to download is: ");
}
if (!fs.existsSync(path.join(__dirname, "routes", "trainedData", "hin.traineddata.gz"))) {
console.time("total time taken for hindi file to download is: ");
// const trainFile = ;
console.log("File download started for file hindi trained data");
try {
await downloadFile("https://github.com/naptha/tessdata/blob/gh-pages/4.0.0/hin.traineddata.gz?raw=true", path.join(__dirname, "routes", "trainedData", "hin.traineddata.gz"));
// console.log("Extracting hin.trainedData file");
// gunzip(path.join(__dirname, "routes", "trainedData", "hin.traineddata.gz"), path.join(__dirname, "routes", "trainedData", "hin.traineddata"), () => {
// console.log('Extraction of hin.traineddata done!');
// });
// console.log("File download completed for hindi trained data");
// console.log("Deleting the eng.traineddata.gz file");
// fs.unlinkSync(path.join(__dirname, "routes", "trainedData", "hin.traineddata.gz"));
} catch (error) {
console.log(error);
}
console.timeEnd("total time taken for hindi file to download is: ");
}
if (!fs.existsSync(path.join(__dirname, "routes", "trainedData", "tel.traineddata.gz"))) {
console.time("total time taken for telugu file to download is: ");
// const trainFile = ;
console.log("File download started for file telugu trained data");
try {
await downloadFile("https://github.com/naptha/tessdata/blob/gh-pages/4.0.0/tel.traineddata.gz?raw=true", path.join(__dirname, "routes", "trainedData", "tel.traineddata.gz"));
// console.log("Extracting tel.trainedData file");
// gunzip(path.join(__dirname, "routes", "trainedData", "tel.traineddata.gz"), path.join(__dirname, "routes", "trainedData", "tel.traineddata"), () => {
// console.log('Extraction of tel.traineddata done!');
// });
// console.log("File download completed for telugu trained data");
// console.log("Deleting the eng.traineddata.gz file");
// fs.unlinkSync(path.join(__dirname, "routes", "trainedData", "tel.traineddata.gz"));
} catch (error) {
console.log(error);
}
console.timeEnd("total time taken for telugu file to download is: ");
}
if (!fs.existsSync(path.join(__dirname, "routes", "trainedData", "urd.traineddata.gz"))) {
console.time("total time taken for urdu file to download is: ");
// const trainFile = ;
console.log("File download started for file urdu trained data");
try {
await downloadFile("https://github.com/naptha/tessdata/blob/gh-pages/4.0.0/urd.traineddata.gz?raw=true", path.join(__dirname, "routes", "trainedData", "urd.traineddata.gz"));
// console.log("Extracting urd.trainedData file");
// gunzip(path.join(__dirname, "routes", "trainedData", "urd.traineddata.gz"), path.join(__dirname, "routes", "trainedData", "urd.traineddata"), () => {
// console.log('Extraction of urd.traineddata done!');
// });
// console.log("File download completed for urdu trained data");
// console.log("Deleting the eng.traineddata.gz file");
// fs.unlinkSync(path.join(__dirname, "routes", "trainedData", "urd.traineddata.gz"));
} catch (error) {
console.log(error);
}
console.timeEnd("total time taken for urdu file to download is: ");
}
if (!fs.existsSync(path.join(__dirname, "routes", "trainedData", "osd.traineddata"))) {
console.time("Time taken for osd.traineddata is: ");
try {
await downloadFile("https://github.com/naptha/tessdata/blob/gh-pages/4.0.0/osd.traineddata.gz?raw=true", path.join(__dirname, "routes", "trainedData", "osd.traineddata.gz"));
console.log("Extracting osd.trainedData file");
gunzip(path.join(__dirname, "routes", "trainedData", "osd.traineddata.gz"), path.join(__dirname, "routes", "trainedData", "osd.traineddata"), () => {
console.log('Extraction of osd.traineddata done!');
});
console.log("File download completed for osd trained data");
console.log("Deleting the osd.traineddata.gz file");
fs.unlinkSync(path.join(__dirname, "routes", "trainedData", "osd.traineddata.gz"));
} catch (error) {
console.log(error);
}
console.timeEnd("Time taken for osd.traineddata is: ");
}
} catch (error) {
console.log(error);
} finally {
console.log("getTrainData completed");
console.timeEnd("Total time taken to setup project is: ");
}
};
module.exports = checkTrainData;