/
automatic_generate_code_fragment.py
158 lines (130 loc) · 6.19 KB
/
automatic_generate_code_fragment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import re
import csv
# 函数分割
def split_function(filepath):
function_list = []
f = open(filepath, 'r')
lines = f.readlines()
f.close()
flag = -1 # 作为记号
for line in lines:
text = line.strip() # strip是 trim 掉字符串两边的空格。
if len(text) > 0 and text != "\n":
if text.split()[0] == "function" or text.split()[0] == "constructor":
function_list.append([text])
flag += 1
elif len(function_list) > 0 and ("function" or "constructor" in function_list[flag][0]):
function_list[flag].append(text)
return function_list
# 定位文件中 call.value 的位置
def find_location(filepath):
allFunctionList = split_function(filepath) # 存放所有的函数
code_fragments = [] # code gadget 代码块
callValueList = [] # 存放调用 call.value 的 W 函数
CFunctionList = [] # 存放调用W函数的所有C函数
withdrawNameList = [] # 存放调用 call.value 的 W 函数名
otherFunctionList = [] # 存储 call.value 以外的函数
params = [] # 存储 W 函数的参数
# 存储 call.value 以外的函数
for i in range(len(allFunctionList)):
flag = 0
for j in range(len(allFunctionList[i])):
text = allFunctionList[i][j]
if '.call.value' in text:
flag += 1
if flag == 0:
otherFunctionList.append(allFunctionList[i])
# (1)遍历所有函数, 找到 call.value 关键字; 将包含 call.value 关键字的函数存入 callValueList & code_fragments;
for i in range(len(allFunctionList)):
for j in range(len(allFunctionList[i])):
text = allFunctionList[i][j]
if '.call.value' in text:
location_i, location_j = i, j # call.value 所处的位置
print("Call Value Location: ", allFunctionList[location_i]) # allFunctionList[location_i]
callValueList.append(allFunctionList[location_i])
# 获取 W 函数的参数
ss = allFunctionList[location_i][0]
p = re.compile(r'[(](.*?)[)]', re.S) # 最小匹配
result = re.findall(p, ss)
result_params = result[0].split(",")
for n in range(len(result_params)):
params.append(result_params[n].strip().split(" ")[-1])
tmp = re.compile(r'\b([_A-Za-z]\w*)\b(?:(?=\s*\w+\()|(?!\s*\w+))')
result_withdraw = tmp.findall(allFunctionList[location_i][0])
withdrawNameTmp = result_withdraw[1]
if withdrawNameTmp == "payable":
withdrawName = withdrawNameTmp
else:
withdrawName = withdrawNameTmp + "("
withdrawNameList.append(withdrawName) # 将所有可能的 W 函数存在数组中
for i in range(len(callValueList)):
result = callValueList[i]
code_fragments.append(result)
# 遍历调用 call.value 关键字的函数名列表 withdrawNameList;
# 处理调用 call.value 关键句的函数是构造函数的情况,即 function() payable, 此时直接跳出循环, 不存在 C 函数;
for k in range(len(withdrawNameList)):
if "payable" in withdrawNameList[k]:
print("There is no C function")
continue
withdraw = withdrawNameList[k]
# 遍历所有函数,找到调用 W 函数的 C 函数, 存入 code_fragments;
for i in range(len(otherFunctionList)):
for j in range(1, len(otherFunctionList[i])):
if len(otherFunctionList[i]) > 2:
text = otherFunctionList[i][j]
if withdraw in text:
p = re.compile(r'[(](.*?)[)]', re.S) # 最小匹配
result1 = re.findall(p, text)
result1_params = result1[0].split(",")
if result1_params[0] != "" and len(result1_params) == len(params):
CFunctionList.append(otherFunctionList[i])
print("==============================================================>")
for k in range(len(withdrawNameList)):
for i in range(len(CFunctionList)):
result = CFunctionList[i]
code_fragments.append(result)
print("Code Fragments: ", code_fragments)
return code_fragments
def printResult(filepath, code_fragments):
# 将结果 node_feature_list 和 edge_list 输入到相应的文件中
base = filepath.split('/')[-1]
f_code = open(filepath, 'a')
f_code.write(base + '\n')
for i in range(len(code_fragments)):
for j in range(len(code_fragments[i])):
if code_fragments[i][j] == '{' or code_fragments[i][j] == '}':
continue
else:
f_code.write(str(code_fragments[i][j]) + '\n')
print()
f_code.close()
def write2csv(contract_csv, filepath):
f_code = open(filepath, 'r')
lines = f_code.readlines()
f_code.close()
contracts = ""
for i in range(1, len(lines)):
contracts += lines[i]
contract = [lines[0], contracts]
out = open(contract_csv, 'a', newline='')
csv_write = csv.writer(out, dialect='excel')
csv_write.writerow(contract)
out.close()
if __name__ == "__main__":
# test_contract = "../SmartContractDataSet/train_data_formatted_fragment/22902.sol"
# find_location(test_contract)
result = "../SmartContractDataSet/code_fragment/"
contract_csv = "../SmartContractDataSet/code_fragment_csv/contract_csv.csv"
out = open(contract_csv, 'a', newline='')
csv_write = csv.writer(out, dialect='excel')
csv_write.writerow(['id', 'sequence_text', 'reentrancy', 'noreentrancy'])
dirs = os.listdir("../SmartContractDataSet/train_data_formatted_fragment")
print(len(dirs))
for file in dirs:
print('./SmartContractDataSet/train_data_formatted_fragment/' + file)
code_fragments = find_location('../SmartContractDataSet/train_data_formatted_fragment/' + file)
printResult(result + file, code_fragments)
dir = os.listdir("../SmartContractDataSet/code_fragment")
for file in dir:
write2csv(contract_csv, result + file)