-
Notifications
You must be signed in to change notification settings - Fork 282
/
Decoder.py
executable file
·242 lines (181 loc) · 8.56 KB
/
Decoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Decoder module
Dectect Binary and decode it
"""
import time
import os
import redis
import base64
from hashlib import sha1
import magic
import json
import datetime
from pubsublogger import publisher
from Helper import Process
from packages import Paste
from packages import Item
import re
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def hex_decoder(hexStr):
#hexStr = ''.join( hex_string.split(" ") )
return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
def binary_decoder(binary_string):
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
def base64_decoder(base64_string):
return base64.b64decode(base64_string)
def decode_string(content, message, date, encoded_list, decoder_name, encoded_min_size):
find = False
for encoded in encoded_list:
if len(encoded) >= encoded_min_size:
decode = decoder_function[decoder_name](encoded)
find = True
save_hash(decoder_name, message, date, decode)
#remove encoded from paste content
content = content.replace(encoded, '', 1)
if(find):
set_out_paste(decoder_name, message)
return content
# # TODO: FIXME check db
def save_hash(decoder_name, message, date, decoded):
print(decoder_name)
type = magic.from_buffer(decoded, mime=True)
hash = sha1(decoded).hexdigest()
print(hash)
data = {}
data['name'] = hash
data['date'] = datetime.datetime.now().strftime("%d/%m/%y")
data['origin'] = message
data['estimated type'] = type
json_data = json.dumps(data)
date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])
date_key = date[0:4] + date[4:6] + date[6:8]
serv_metadata.incrby(decoder_name+'_decoded:'+date_key, 1)
serv_metadata.zincrby('hash_date:'+date_key, hash, 1)
serv_metadata.zincrby(decoder_name+'_date:'+date_key, hash, 1)
# first time we see this hash
if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'):
serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste)
serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
else:
serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste)
# first time we see this hash (all encoding) on this paste
if serv_metadata.zscore('nb_seen_hash:'+hash, message) is None:
serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1)
serv_metadata.sadd('hash_paste:'+message, hash) # paste - hash map
# create hash metadata
serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type)
serv_metadata.sadd('hash_all_type', type)
# first time we see this hash encoding on this paste
if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None:
print('first '+decoder_name)
serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map
# create hash metadata
serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type)
# first time we see this hash today
#if serv_metadata.zscore('hash_date:'+date_key, hash) is None:
# serv_metadata.zincrby('hash_type:'+type, date_key, 1)
# first time we see this hash encoding today
if serv_metadata.zscore(decoder_name+'_date:'+date_key, hash) is None:
serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1)
save_hash_on_disk(decoded, type, hash, json_data)
print('found {} '.format(type))
serv_metadata.hincrby('metadata_hash:'+hash, decoder_name+'_decoder', 1)
serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1)
serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map
serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste
# Domain Object
if Item.is_crawled(message):
domain = Item.get_item_domain(message)
serv_metadata.sadd('hash_domain:{}'.format(domain), hash) # domain - hash map
serv_metadata.sadd('domain_hash:{}'.format(hash), domain) # hash - domain map
def save_hash_on_disk(decode, type, hash, json_data):
local_filename_hash = os.path.join(p.config.get("Directories", "hash"), type, hash[:2], hash)
filename_hash = os.path.join(os.environ['AIL_HOME'], local_filename_hash)
filename_json = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "hash"), type, hash[:2], hash + '.json')
dirname = os.path.dirname(filename_hash)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filename_hash, 'wb') as f:
f.write(decode)
# create hash metadata
serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_hash)
serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_hash))
with open(filename_json, 'w') as f:
f.write(json_data)
def set_out_paste(decoder_name, message):
publisher.warning(decoder_name+' decoded')
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
msg = 'infoleak:automatic-detection="'+decoder_name+'";{}'.format(message)
p.populate_set_out(msg, 'Tags')
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = 'Decoder'
# Setup the I/O queues
p = Process(config_section)
serv_metadata = redis.StrictRedis(
host=p.config.get("ARDB_Metadata", "host"),
port=p.config.getint("ARDB_Metadata", "port"),
db=p.config.getint("ARDB_Metadata", "db"),
decode_responses=True)
# Sent to the logging a description of the module
publisher.info("Decoder started")
regex_binary = '[0-1]{40,}'
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
regex_hex = '[A-Fa-f0-9]{40,}'
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
re.compile(regex_binary)
re.compile(regex_hex)
re.compile(regex_base64)
# map decoder function
decoder_function = {'binary':binary_decoder,'hexadecimal':hex_decoder, 'base64':base64_decoder}
hex_max_execution_time = p.config.getint("Hex", "max_execution_time")
binary_max_execution_time = p.config.getint("Binary", "max_execution_time")
base64_max_execution_time = p.config.getint("Base64", "max_execution_time")
# list all decoder yith regex,
decoder_binary = {'name': 'binary', 'regex': regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
decoder_base64 = {'name': 'base64', 'regex': regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
for decoder in decoder_order:
serv_metadata.sadd('all_decoder', decoder['name'])
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
filename = message
paste = Paste.Paste(filename)
# Do something with the message from the queue
content = paste.get_p_content()
date = str(paste._get_p_date())
for decoder in decoder_order: # add threshold and size limit
# max execution time on regex
signal.alarm(decoder['max_execution_time'])
try:
encoded_list = re.findall(decoder['regex'], content)
except TimeoutException:
encoded_list = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(paste.p_rel_path))
continue
else:
signal.alarm(0)
if(len(encoded_list) > 0):
content = decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size'])