-
Notifications
You must be signed in to change notification settings - Fork 3
/
merge_files.py
52 lines (46 loc) · 1.74 KB
/
merge_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import array
import io
import json
def merge_json_files(files, output_name, output_folder):
duplicates = 0
# Initialize empty lists for the verses and references
all_data = []
found = False
# Loop through each JSON file and extract the verses and references
for file in files:
with open(file, 'r', encoding='utf-8') as f:
data = json.load(f)
for i in range(len(data)):
data_topic = data[i]['topic']
data_verse = data[i]['verse']
data_ref = data[i]['reference']
# Make sure it doesn't exist yet
for curr_data in all_data:
if curr_data['reference'] == data_ref:
found = True
elif curr_data['verse'] == data_verse:
found = True
# Add if it doesn't exist
if not found:
all_data.append(data[i])
found = False
'''
len_before = len(all_verses)
all_verses.add(verses_data[i])
len_now = len(all_verses)
if len_before != len_now:
len_before = len(all_references)
all_references.add(refs_data[i])
len_now = len(all_references)
if len_before == len_now:
all_verses.remove(verses_data[i])
duplicates += 1
else:
duplicates += 1
'''
print(f"There were {duplicates} duplicated verses.")
# Create combined file
with io.open(f'{output_folder}/{output_name}_data.json', 'w', encoding='utf8') as outfile:
str_ = json.dumps(all_data, ensure_ascii=False, indent=4)
outfile.write(str_)
return outfile.name, (len(all_data) + 1)