-
Notifications
You must be signed in to change notification settings - Fork 26
/
reparseprops.py
95 lines (86 loc) · 2.53 KB
/
reparseprops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
"""
Usage: reparseprops.py DATAPATH
input=yodaqa csv outputs (properties)
output=jacana formated files for use in save.py
"""
import os
import sys
import glob
QPATH="data/Qtrain.txt"
PPATH="data/Ptrain.txt"
NPATH="data/Ntrain.txt"
CPATH1="data/Clues1train.txt"
CPATH0="data/Clues0train.txt"
#TPATH="data/curated-test"
#TQPATH="data/Qtest.txt"
#TPPATH="data/Ptest.txt"
#TNPATH="data/Ntest.txt"
#TCPATH1="data/Clues1test.txt"
#TCPATH0="data/Clues0test.txt"
def notNumber(s):
try:
float(s)
return False
except ValueError:
return True
def reparseProps(PATH,QPATH,PPATH,NPATH,CPATH1,CPATH0):
q=open(QPATH,'w')
p=open(PPATH,'w')
n=open(NPATH,'w')
cp=open(CPATH1,'w')
cn=open(CPATH0,'w')
qnum=0
for path in glob.glob(PATH + '/*'):
i=0
p.write("<A "+str(qnum)+">\n")
n.write("<A "+str(qnum)+">\n")
propdict=dict()
propset=set()
with open(path,'r') as f:
for line in f:
s=line.split(" ")
if(s[0]!="<Q>"):
s=line.split(" ")
text=" ".join(s[2:]).lower()
if text in propdict:
if(s[0]=='1'):
propdict[text]='1'
continue
propdict[text]=s[0]
with open(path,'r') as f:
for line in f:
s=line.split(" ")
if(s[0]=="<Q>" and i==0):
q.write("<Q "+str(qnum)+">\n")
q.write(" ".join(s[1:]))
q.write("</Q>\n")
i+=1
qnum+=1
continue
elif(s[0]=="<Q>" and i!=0):
continue
if notNumber(s[0]) or notNumber(s[1]):
continue
# print s
text=" ".join(s[2:]).lower()
if text not in propset:
# print text
if(propdict[text]=='1'):
p.write(text)
cp.write(" ".join(s[1:2])+"\n")
else:
n.write(text)
cn.write(" ".join(s[1:2])+"\n")
propset.add(text)
p.write("</A>\n")
n.write("</A>\n")
print ".",
q.close()
p.close()
n.close()
cp.close()
cn.close()
PATH = sys.argv[1]
reparseProps(PATH,QPATH,PPATH,NPATH,CPATH1,CPATH0)
#reparseProps(TPATH,TQPATH,TPPATH,TNPATH,TCPATH1,TCPATH0)