-
Notifications
You must be signed in to change notification settings - Fork 0
/
process-csv.py
32 lines (26 loc) · 1.13 KB
/
process-csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# main file for processing the initial data-set
import csv
from process_post import feature
import control
class post:
item = dict((i, 0) for i in control.post_columns)
def __init__(self, post_data):
self.item = post_data
obj = feature(post_data)
self.exFeatures = obj.get_features()
def get_features(self):
return self.exFeatures
if __name__ == '__main__':
with open(control.processed_data_path, mode='w', encoding=control.ENCODING) as write_file:
writer = csv.DictWriter(write_file, fieldnames=control.feature_keys)
writer.writeheader()
with open(control.csv_reading_path, mode='r', encoding=control.ENCODING) as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter=',')
line_count = 0
# each questions is processed and then written to processed data file
for row in csv_reader:
postObj = post(row)
exFeatures = postObj.get_features() # feature extraction
writer.writerow(exFeatures)
line_count += 1
print(line_count, 'sample processed')