-
Notifications
You must be signed in to change notification settings - Fork 0
/
combine_and_count_demo_info.py
58 lines (52 loc) · 2.57 KB
/
combine_and_count_demo_info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# # Count
# with open('example/sub_id_hits.csv', 'w') as wf:
# with open('example/demographics.csv', 'r') as rf:
# header = rf.readline()
# wf.write('{},{}\n'.format(header.split(',')[0],'NumberOfHits'))
# demo_questions = header.split(',')
# demo_questions[-1] = demo_questions[-1].strip()
# for line in rf:
# line_list = line.split(',')
# num_hits = len(line_list) // 10
# wf.write('{},{}\n'.format(line_list[0],num_hits))
# Combine
with open('consolidated_demographics.csv', 'w') as wf:
with open('example/demographics.csv', 'r') as rf:
header = rf.readline()
header_list = header.split(',')
header_list[0] = 'AnswerMismatch'
header_list = ['SubjectID'] + header_list
wf.write(','.join(header_list))
for line in rf:
line_list = line.split(',')
subject_id = line_list[0]
line_list = line_list[1:]
line_list[-1] = line_list[-1].strip()
mismatched_answer = 'N/A'
num_questions = 10
for i in range(0, len(line_list), num_questions):
question_set = line_list[i:i+num_questions]
count = 0
for j in range(len(question_set)):
if (line_list[j] == '' or line_list[j] == '{}') and question_set[j] != '' and question_set[j] != '{}':
line_list[j] = question_set[j]
count = 0
else:
if question_set[j] != '' and question_set[j] != '{}':
if line_list[j].strip() == question_set[j].strip():
if i != 0:
count += 1
else:
if type(mismatched_answer) == set:
mismatched_answer.add(header_list[j+2].strip())
else:
mismatched_answer = set([header_list[j+2].strip()])
count += 0
if count == len(question_set):
for k in range(i,i+num_questions):
line_list[k] = None
if type(mismatched_answer) == set:
line_list = [';'.join(mismatched_answer)] + line_list
else:
line_list = [mismatched_answer] + line_list
wf.write(subject_id + ',' + ','.join([x for x in line_list if x is not None ]) + '\n')