Skip to content

Commit

Permalink
chg: [update + show decoded items] add background update
Browse files Browse the repository at this point in the history
  • Loading branch information
Terrtia committed Nov 8, 2019
1 parent 6b9ba9d commit 880c351
Show file tree
Hide file tree
Showing 11 changed files with 288 additions and 8 deletions.
26 changes: 25 additions & 1 deletion bin/lib/Domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import Correlation
from Cryptocurrency import cryptocurrency
from Pgp import pgp
import Decoded
import Item
import Tag

Expand Down Expand Up @@ -197,6 +198,14 @@ def get_domain_pgp(domain, currencies_type=None, get_nb=False):
'''
return pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb)

def get_domain_decoded(domain):
'''
Retun all decoded item of a given domain.
:param domain: crawled domain
'''
return Decoded.get_domain_decoded_item(domain)

def get_domain_all_correlation(domain, correlation_type=None, get_nb=False):
'''
Retun all correlation of a given domain.
Expand All @@ -214,6 +223,9 @@ def get_domain_all_correlation(domain, correlation_type=None, get_nb=False):
res = get_domain_pgp(domain, get_nb=get_nb)
if res:
domain_correl['pgp'] = res
res = get_domain_decoded(domain)
if res:
domain_correl['decoded'] = res
return domain_correl

# TODO: handle port
Expand Down Expand Up @@ -271,6 +283,12 @@ def __init__(self, domain, port=None):
if self.is_domain_up():
self.current_port = sanathyse_port(port, self.domain, self.type)

def get_domain_name(self):
return self.domain

def get_domain_type(self):
return self.type

def get_current_port(self):
return self.current_port

Expand Down Expand Up @@ -361,10 +379,16 @@ def get_domain_correlation(self):
'''
return get_domain_all_correlation(self.domain, get_nb=True)

def get_domain_history_with_status(self):
def get_domain_history(self):
'''
Retun the full history of a given domain and port.
'''
return get_domain_history(self.domain, self.type, 80)

def get_domain_history_with_status(self):
'''
Retun the full history (with status) of a given domain and port.
'''
return get_domain_history_with_status(self.domain, self.type, 80)

def get_domain_items_crawled(self, port=None, epoch=None, items_link=False, item_screenshot=False, item_tag=False):
Expand Down
44 changes: 44 additions & 0 deletions bin/packages/Correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,50 @@ def get_domain_correlation_dict(self, domain, correlation_type=None, get_nb=Fals
dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl])
return dict_correlation

def _get_item_correlation_obj(self, item_id, correlation_type):
'''
Return correlation of a given item id.
:param item_id: item id
:type item_id: str
:param correlation_type: correlation type
:type correlation_type: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(self.correlation_name, correlation_type, item_id))
if res:
return list(res)
else:
return []

def get_item_correlation_dict(self, item_id, correlation_type=None, get_nb=False):
'''
Return all correlation of a given item id.
:param item_id: item id
:param correlation_type: list of correlation types
:type correlation_type: list, optional
:return: a dictionnary of all the requested correlations
:rtype: dict
'''
correlation_type = self.sanythise_correlation_types(correlation_type)
dict_correlation = {}
for correl in correlation_type:
res = self._get_item_correlation_obj(item_id, correl)
if res:
dict_correlation[correl] = res
if get_nb:
dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl])
return dict_correlation



def save_domain_correlation(self, domain, correlation_type, correlation_value):
r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain), correlation_value)
r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, correlation_value), domain)

######## API EXPOSED ########

Expand Down
3 changes: 2 additions & 1 deletion bin/packages/Cryptocurrency.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def get_cryptocurrency(request_dict, cryptocurrency_type):

return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name)

# # TODO: refractor/move me in Correlation
def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address):
# create basic medata
if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)):
Expand Down Expand Up @@ -89,7 +90,7 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc
r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address)

# domain
if Item.is_crawled(item_path):
if Item.is_crawled(item_path): # # TODO: use save_domain_correlation
domain = Item.get_item_domain(item_path)
r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address)
r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain)
62 changes: 62 additions & 0 deletions bin/packages/Item.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@

sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Decoded

sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Date
import Tag
from Cryptocurrency import cryptocurrency
from Pgp import pgp

config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
Expand Down Expand Up @@ -126,26 +129,85 @@ def get_item(request_dict):
###
### correlation
###
def get_item_cryptocurrency(item_id, currencies_type=None, get_nb=False):
'''
Return all cryptocurrencies of a given item.
:param item_id: item id
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
return cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb)

def get_item_pgp(item_id, currencies_type=None, get_nb=False):
'''
Return all pgp of a given item.
:param item_id: item id
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
return pgp.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb)

def get_item_decoded(item_id):
'''
Return all pgp of a given item.
:param item_id: item id
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
return Decoded.get_item_decoded(item_id)

def get_item_all_correlation(item_id, correlation_type=None, get_nb=False):
'''
Retun all correlation of a given item id.
:param item_id: item id
:type domain: str
:return: a dict of all correlation for a item id
:rtype: dict
'''
item_correl = {}
res = get_item_cryptocurrency(item_id, get_nb=get_nb)
if res:
item_correl['cryptocurrency'] = res
res = get_item_pgp(item_id, get_nb=get_nb)
if res:
item_correl['pgp'] = res
res = get_item_decoded(item_id)
if res:
item_correl['decoded'] = res
return item_correl



## TODO: REFRACTOR
def _get_item_correlation(correlation_name, correlation_type, item_id):
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id))
if res:
return list(res)
else:
return []

## TODO: REFRACTOR
def get_item_bitcoin(item_id):
return _get_item_correlation('cryptocurrency', 'bitcoin', item_id)

## TODO: REFRACTOR
def get_item_pgp_key(item_id):
return _get_item_correlation('pgpdump', 'key', item_id)

## TODO: REFRACTOR
def get_item_pgp_name(item_id):
return _get_item_correlation('pgpdump', 'name', item_id)

## TODO: REFRACTOR
def get_item_pgp_mail(item_id):
return _get_item_correlation('pgpdump', 'mail', item_id)

## TODO: REFRACTOR
def get_item_pgp_correlation(item_id):
pass

Expand Down
8 changes: 5 additions & 3 deletions bin/packages/Tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,11 @@ def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"):
res_dict['type'] = item_type
return (res_dict, 200)

def add_domain_tag(tag, domain, item_date):
r_serv_metadata.sadd('tag:{}'.format(domain), tag)
r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain)

def add_item_tag(tag, item_path, item_type="paste"):
def add_item_tag(tag, item_path, item_type="paste", tag_date=None):

if item_type=="paste":
item_date = int(Item.get_item_date(item_path))
Expand All @@ -189,8 +192,7 @@ def add_item_tag(tag, item_path, item_type="paste"):
# domain item
else:
item_date = int(Domain.get_domain_last_check(item_path, r_format="int"))
r_serv_metadata.sadd('tag:{}'.format(item_path), tag)
r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), item_path)
add_domain_tag(tag, item_path, item_date)

r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1)

Expand Down
21 changes: 19 additions & 2 deletions bin/update-background.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,22 @@
r_serv.delete('ail:current_background_script_stat')
r_serv.delete('ail:current_background_update')

if r_serv.scard('ail:update_v2.4') != 1:
pass
if r_serv.get('ail:current_background_update') == 'v2.4':
r_serv.delete('ail:update_error')
r_serv.set('ail:update_in_progress', 'v2.4')
r_serv.set('ail:current_background_update', 'v2.4')
r_serv.set('ail:current_background_script', 'domain update')

update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v2.4', 'Update_domain.py')
process = subprocess.run(['python' ,update_file])


if int(r_serv.get('ail:current_background_script_stat')) != 100:
r_serv.set('ail:update_error', 'Update v2.4 Failed, please relaunch the bin/update-background.py script')
else:
r_serv.delete('ail:update_in_progress')
r_serv.delete('ail:current_background_script')
r_serv.delete('ail:current_background_script_stat')
r_serv.delete('ail:current_background_update')
r_serv.delete('update:nb_elem_to_convert')
r_serv.delete('update:nb_elem_converted')
5 changes: 5 additions & 0 deletions update/v2.4/Update.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@

config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None

#Set current update_in_progress
r_serv.set('ail:update_in_progress', new_version)
r_serv.set('ail:current_background_update', new_version)

r_serv_onion.sunionstore('domain_update_v2.4', 'full_onion_up', 'full_regular_up')
r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v2.4'))
r_serv.set('update:nb_elem_converted',0)

#Set current ail version
r_serv.set('ail:version', new_version)

Expand Down
77 changes: 77 additions & 0 deletions update/v2.4/Update_domain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*

import os
import re
import sys
import time
import redis
import datetime

sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Item
import Tag
from Cryptocurrency import cryptocurrency
from Pgp import pgp

sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Decoded
import Domain

def update_update_stats():
nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
progress = int((nb_updated * 100) / nb_elem_to_update)
print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress))
r_serv_db.set('ail:current_background_script_stat', progress)

def update_domain_by_item(domain_obj, item_id):
domain_name = domain_obj.get_domain_name()
# update domain tags
for tag in Tag.get_item_tags(item_id):
if tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"':
Tag.add_domain_tag(tag, domain_name, Item.get_item_date(item_id))

# update domain correlation
item_correlation = Item.get_item_all_correlation(item_id)

for correlation_name in item_correlation:
for correlation_type in item_correlation[correlation_name]:
if correlation_name in ('pgp', 'cryptocurrency'):
for correl_value in item_correlation[correlation_name][correlation_type]:
if correlation_name=='pgp':
pgp.save_domain_correlation(domain_name, correlation_type, correl_value)
if correlation_name=='cryptocurrency':
cryptocurrency.save_domain_correlation(domain_name, correlation_type, correl_value)
if correlation_name=='decoded':
for decoded_item in item_correlation['decoded']:
Decoded.save_domain_decoded(domain_name, decoded_item)

if __name__ == '__main__':

start_deb = time.time()

config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None

nb_elem_to_update = int( r_serv_db.get('update:nb_elem_to_convert') )

while True:
domain = r_serv_onion.spop('domain_update_v2.4')
if domain is not None:
print(domain)
domain = Domain.Domain(domain)
for domain_history in domain.get_domain_history():

domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag
if "items" in domain_item:
for item_dict in domain_item['items']:
update_domain_by_item(domain, item_dict['id'])

r_serv_db.incr('update:nb_elem_converted')
update_update_stats()

else:
sys.exit(0)
2 changes: 2 additions & 0 deletions var/www/blueprints/crawler_splash.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,7 @@ def showDomain():
dict_domain['crawler_history'] = domain.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port
dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items'])

print(dict_domain)

return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label,
modal_add_tags=get_modal_add_tags(dict_domain['domain'], tag_type="domain"))
4 changes: 3 additions & 1 deletion var/www/modules/Flask_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']

dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_message': 'An Update is running on the background. Some informations like Tags, screenshot can be',
'update_warning_message_notice_me': 'missing from the UI.'}
'update_warning_message_notice_me': 'missing from the UI.'},
'v2.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be',
'update_warning_message_notice_me': 'missing from the UI.'}
}

UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted')
Expand Down

0 comments on commit 880c351

Please sign in to comment.