forked from lysistic/fw
-
Notifications
You must be signed in to change notification settings - Fork 0
/
jioaben.py
151 lines (136 loc) · 4.57 KB
/
jioaben.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import requests
import time
import csv
import re
isTest = 0
num = 0
def has_test_case(line):
assert isinstance(line, str)
pattern = "^diff --git.*Test.*$"
return bool(re.match(pattern, line))
def is_func(str):
# 方法的修饰符用来判断一个方法
funcs = ['public','protected','private','static','final','synchronized','native','abstract']
for s in funcs:
index = str.find(s)
if (index != -1):
return index
return -1
def get_repo_contents(prj, path=""):
url = f"https://api.github.com/repos/{prj}/contents/{path}"
time.sleep(6)
print(123)
print("[prj]:",prj)
print("[path]:",path)
response = requests.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"Failed to retrieve repository contents for {path}.")
return None
def get_test_files(prj, path="", goal = ""):
def match_test_file(filename):
assert isinstance(filename, str)
pattern = "^Test" + re.escape(str) + "$"
return bool(re.match(pattern, filename))
# return True
contents = get_repo_contents(prj, path)
# file = open(output_file, "a")
if contents is not None:
for item in contents:
if item['type'] == 'file' and match_test_file(item['name']):
return True
elif item['type'] == 'dir':
return get_test_files(prj, item['path'])
# file.write(item['name'])
# file.close()
return False
def deal_with_url( url ):
res = requests.get(url)
time.sleep(6)
# 初始化部分
s = res.text.splitlines(keepends = False)
file = 0
file_java = 0
func = 0
hunk = 0
ischange = 0
isTestcase = 0
iscomment = 0
# 使用set去重
funcset = set()
# 计算file,func,hunk
for st in s:
# 删除前导空格,方便后面的操作
st = re.sub(r' {2,}', '', st)
# file
if (st.startswith("diff") == True):
isTestcase = 0
match = re.search(r'/([^/]*)$', st)
if match:
filename = match.group(1)
match = re.search(r'/([^/]*)$', url)
if match:
html_url = match.group(1)
print(html_url)
print(filename)
if has_test_case(st) == False:
file = file + 1
# 单独计数java文件
if (st.endswith(".java") == True):
file_java = file_java + 1
# 如果不是test文件,就扫描仓库,找一下test文件
if get_test_files(prj, html_url, filename) ==True:
isTest = 1
else :
isTestcase = 1
isTest = 1
# 跳过test文件不计算func和hunk
if isTestcase == 1:
continue
# func
if st.startswith('-') == False and is_func(st) != -1 and st.find('(') != -1 and st.find(')') != -1 :
index1 = is_func(st)
index2 = st.find(')')
stg = st[index1 : index2 + 1]
funcset.add(stg)
# hunk
# 不是修改行
if (st.startswith('+') == 0) and (st.startswith('-') == 0):
ischange = 0
# 是修改行
if ((st.startswith("+") == True and st.startswith("+++") == False)
or (st.startswith("-") == True and st.startswith("---") == False)) and (ischange == 0):
# 是import
if (st.find("import") != -1) :
pass
# 跳过注释
# 一行注释
elif (st.find("//") == 1 ):
pass
# 是一段注释的开始
elif (st.find('/**') == 1) or (st.find('/*') == 1):
iscomment = 1
pass
# 正在一段注释中
elif (iscomment == 1):
pass
# 一段注释结束
elif (st.find('*/') != -1) and iscomment == 1:
iscomment = 0
pass
# 跳过空语句
elif (len(st) == 1):
pass
# 跳过无效修改
# 阿巴阿巴现在还没有很好的想法
# 考虑多种无效修改
# 是有效注释
else :
ischange = 1
hunk = hunk + 1
func = len(funcset)
url='https://github.com/alibaba/fastjson/commit/f5903fa56497c00ed0703ac875b511f9bd5f1d8e'
prj='alibaba/fastjson'
# 输出file,func,hunk
deal_with_url(url + ".diff")