-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
144 lines (122 loc) · 3.24 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
const fs = require('fs')
const {exit} = require('process')
const xml = require('xml2js')
// Process arguments
if (process.argv.length !== 4) {
console.error('Usage: [biosamples directory path] [TSV reference file path]')
exit(1)
}
const biosamples_dir = process.argv[2]
const tsv_path = process.argv[3]
// Create the app
class app {
constructor() {
this.tsv = null
this.biosamples = null
this.xmlOptions = {
attrkey: 'attributes',
charkey: 'text',
}
this.readTSV()
}
/**
* Read the giant tsv reference.
*/
readTSV() {
fs.readFile(tsv_path, {}, (err, content) => {
if (err) {
console.error('Unable to open tsv file', tsv_path, err)
exit(1)
}
this.tsv = []
content
.toString()
.split('\n')
.forEach((line) => {
this.tsv.push(line.split('\t'))
})
// Ok we parsed the CSV, we can more to reading the XMLs
this.readBiosamples()
})
}
/**
* Read and parse all the XML files.
*/
readBiosamples() {
this.biosamples = []
fs.readdir(biosamples_dir, {}, (err, files) => {
if (err) {
console.error(err)
exit(1)
}
// these calls are asynchronous so we need a way to figure out when
// we are ready to move to the next step. Once i == 0, we know we processed
// all the XML files in the biosamples directory and we can move to the next step
let i = files.length
files.forEach((file) => {
// Allow only XML files. Ignore anything else.
if (file.indexOf('.xml') === -1) {
i--
if (i === 0) {
this.generateXML()
}
return
}
// Construct the path to the file
const path = biosamples_dir + '/' + file
fs.readFile(path, {}, (err, content) => {
if (err) {
console.error(err)
exit(1)
}
// Parse the XML
xml.parseString(content.toString(), this.xmlOptions, (err, data) => {
if (err) {
console.error(err)
exit(1)
}
this.biosamples.push(data)
i--
if (i === 0) {
this.generateXML()
}
})
})
})
})
}
/**
* Generate the final XML.
*/
generateXML() {
const results = []
for (let i = 0; i < this.biosamples.length; i++) {
const biosample_set = this.biosamples[i].BioSampleSet.BioSample
for (let j = 0; j < biosample_set.length; j++) {
const biosample = biosample_set[j]
results.push(this.processSample(biosample))
}
}
const builder = new xml.Builder(this.xmlOptions)
const built = builder.buildObject({
BioSampleSet: {
BioSample: results
}
})
console.log(built)
}
/**
* Correct the biosample XML structure and return it.
*
* @param biosample
* @returns {Object} The sample with the right attributes in the right place.
*/
processSample(biosample) {
// Check if the Id with db_label is in 2nd position (i = 1)
// Sometimes the name is in the attributes section
console.log(biosample.Ids[0].Id)
return biosample
}
}
// Run the app
new app()