From 48abb89d2870a74a1d9ba565ff620c66d235fcd4 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 17 Oct 2019 16:39:43 +0200 Subject: [PATCH 01/14] chg: [Domain] add domain object: tag + correlation (decoded items + tags + pgp + cryptocurrency) --- OVERVIEW.md | 37 ++++++++++++++- bin/Decoder.py | 7 +++ bin/PgpDump.py | 33 ++----------- bin/Tags.py | 8 ++++ bin/packages/Correlation.py | 54 ++++++++++++++++++--- bin/packages/Cryptocurrency.py | 31 +++++++++++-- bin/packages/Domain.py | 85 ++++++++++++++++++++++++++++++++++ bin/packages/Item.py | 29 +++++++++++- bin/packages/Pgp.py | 39 +++++++++++++++- bin/packages/Tag.py | 5 ++ 10 files changed, 286 insertions(+), 42 deletions(-) create mode 100755 bin/packages/Domain.py diff --git a/OVERVIEW.md b/OVERVIEW.md index 68efa81f..14aac71b 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -261,6 +261,9 @@ Redis and ARDB overview | set_pgpdump_name:*name* | *item_path* | | | | | set_pgpdump_mail:*mail* | *item_path* | +| | | +| | | +| set_domain_pgpdump_**pgp_type**:**key** | **domain** | ##### Hset date: | Key | Field | Value | @@ -288,11 +291,20 @@ Redis and ARDB overview | item_pgpdump_name:*item_path* | *name* | | | | | item_pgpdump_mail:*item_path* | *mail* | +| | | +| | | +| domain_pgpdump_**pgp_type**:**domain** | **key** | #### Cryptocurrency Supported cryptocurrency: - bitcoin +- bitcoin-cash +- dash +- etherum +- litecoin +- monero +- zcash ##### Hset: | Key | Field | Value | @@ -303,7 +315,8 @@ Supported cryptocurrency: ##### set: | Key | Value | | ------ | ------ | -| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | +| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | PASTE +| domain_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **domain** | DOMAIN ##### Hset date: | Key | Field | Value | @@ -318,8 +331,14 @@ Supported cryptocurrency: ##### set: | Key | Value | | ------ | ------ | -| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | +| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | PASTE +| domain_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | DOMAIN +#### HASH +| Key | Value | +| ------ | ------ | +| hash_domain:**domain** | **hash** | +| domain_hash:**hash** | **domain** | ## DB9 - Crawler: @@ -362,6 +381,20 @@ Supported cryptocurrency: } ``` +##### CRAWLER QUEUES: +| SET - Key | Value | +| ------ | ------ | +| onion_crawler_queue | **url**;**item_id** | RE-CRAWL +| regular_crawler_queue | - | +| | | +| onion_crawler_priority_queue | **url**;**item_id** | USER +| regular_crawler_priority_queue | - | +| | | +| onion_crawler_discovery_queue | **url**;**item_id** | DISCOVER +| regular_crawler_discovery_queue | - | + +##### TO CHANGE: + ARDB overview ----------------------------------------- SENTIMENT ------------------------------------ diff --git a/bin/Decoder.py b/bin/Decoder.py index 76228dfb..82133de7 100755 --- a/bin/Decoder.py +++ b/bin/Decoder.py @@ -18,6 +18,7 @@ from Helper import Process from packages import Paste +from packages import Item import re import signal @@ -120,6 +121,12 @@ def save_hash(decoder_name, message, date, decoded): serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste + # Domain Object + if Item.is_crawled(message): + domain = Item.get_item_domain(message) + serv_metadata.sadd('hash_domain:{}'.format(domain), hash) # domain - hash map + serv_metadata.sadd('domain_hash:{}'.format(hash), domain) # hash - domain map + def save_hash_on_disk(decode, type, hash, json_data): diff --git a/bin/PgpDump.py b/bin/PgpDump.py index 4b7ec629..a269734f 100755 --- a/bin/PgpDump.py +++ b/bin/PgpDump.py @@ -21,6 +21,8 @@ from Helper import Process from packages import Paste +from packages import Pgp + class TimeoutException(Exception): pass @@ -117,31 +119,6 @@ def extract_id_from_output(pgp_dump_outpout): key_id = key_id.replace(key_id_str, '', 1) set_key.add(key_id) -def save_pgp_data(type_pgp, date, item_path, data): - # create basic medata - if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)): - serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date) - serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) - else: - last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen') - if not last_seen: - serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) - else: - if int(last_seen) < int(date): - serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) - - # global set - serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path) - - # daily - serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1) - - # all type - serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1) - - # item_metadata - serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data) - if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) @@ -236,12 +213,12 @@ def save_pgp_data(type_pgp, date, item_path, data): for key_id in set_key: print(key_id) - save_pgp_data('key', date, message, key_id) + Pgp.save_pgp_data('key', date, message, key_id) for name_id in set_name: print(name_id) - save_pgp_data('name', date, message, name_id) + Pgp.save_pgp_data('name', date, message, name_id) for mail_id in set_mail: print(mail_id) - save_pgp_data('mail', date, message, mail_id) + Pgp.save_pgp_data('mail', date, message, mail_id) diff --git a/bin/Tags.py b/bin/Tags.py index 2bf30d87..46c63d46 100755 --- a/bin/Tags.py +++ b/bin/Tags.py @@ -16,6 +16,8 @@ from pubsublogger import publisher from Helper import Process from packages import Paste +from packages import Item + def get_item_date(item_filename): l_directory = item_filename.split('/') @@ -84,6 +86,12 @@ def set_tag_metadata(tag, date): set_tag_metadata(tag, item_date) server_metadata.sadd('tag:{}'.format(path), tag) + # Domain Object + if Item.is_crawled(path): + domain = Item.get_item_domain(path) + server_metadata.sadd('tag:{}'.format(domain), tag) + server.sadd('domain:{}:{}'.format(tag, item_date), domain) + curr_date = datetime.date.today().strftime("%Y%m%d") server.hincrby('daily_tags:{}'.format(item_date), tag, 1) p.populate_set_out(message, 'MISP_The_Hive_feeder') diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index b769600b..cf923049 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -2,8 +2,10 @@ # -*-coding:UTF-8 -* import os +import sys import redis +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/')) import Flask_config r_serv_metadata = Flask_config.r_serv_metadata @@ -14,9 +16,11 @@ class Correlation(object): def __init__(self, correlation_name): self.correlation_name = correlation_name - def _exist_corelation_field(self, correlation_type, field_name): - return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) - + def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'): + if type=='paste': + return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) + else: + return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) def _get_items(self, correlation_type, field_name): res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) @@ -25,6 +29,12 @@ def _get_items(self, correlation_type, field_name): else: return [] + def _get_domains(self, correlation_type, field_name): + res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) + if res: + return list(res) + else: + return [] def _get_metadata(self, correlation_type, field_name): meta_dict = {} @@ -35,14 +45,14 @@ def _get_metadata(self, correlation_type, field_name): def _get_correlation_by_date(self, correlation_type, date): return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date)) - def verify_correlation_field_request(self, request_dict, correlation_type): + def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'): if not request_dict: - return Response({'status': 'error', 'reason': 'Malformed JSON'}, 400) + return ({'status': 'error', 'reason': 'Malformed JSON'}, 400) field_name = request_dict.get(correlation_type, None) if not field_name: return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 ) - if not self._exist_corelation_field(correlation_type, field_name): + if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type): return ( {'status': 'error', 'reason': 'Item not found'}, 404 ) def get_correlation(self, request_dict, correlation_type, field_name): @@ -58,7 +68,37 @@ def get_correlation(self, request_dict, correlation_type, field_name): return (dict_resp, 200) + def get_correlation_domain(self, request_dict, correlation_type, field_name): + dict_resp = {} + + dict_resp['domain'] = self._get_domains(correlation_type, field_name) + + #if request_dict.get('metadata'): + # dict_resp['metadata'] = self._get_metadata(correlation_type, field_name) + + dict_resp[correlation_type] = field_name + + return (dict_resp, 200) + +######## INTERNAL ######## + +def _get_domain_correlation_obj(correlation_name, correlation_type, domain): + print('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain)) + res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain)) + if res: + return list(res) + else: + return [] + +######## ######## + +######## API EXPOSED ######## +def get_domain_correlation_obj(request_dict, correlation_name, correlation_type, domain): + dict_resp = {} + dict_resp[correlation_type] = _get_domain_correlation_obj(correlation_name, correlation_type, domain) + dict_resp['domain'] = domain + return (dict_resp, 200) -#cryptocurrency_all:cryptocurrency name cryptocurrency address nb seen +######## ######## diff --git a/bin/packages/Cryptocurrency.py b/bin/packages/Cryptocurrency.py index 995ada9e..eb5c00e6 100755 --- a/bin/packages/Cryptocurrency.py +++ b/bin/packages/Cryptocurrency.py @@ -10,11 +10,13 @@ sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) import Flask_config from Correlation import Correlation +import Item r_serv_metadata = Flask_config.r_serv_metadata +all_cryptocurrency = ['bitcoin', 'etherum'] + digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' -#address_validation = {'bitcoin': 'base58', 'dash': 'base58'} cryptocurrency = Correlation('cryptocurrency') @@ -52,6 +54,21 @@ def get_cryptocurrency(request_dict, cryptocurrency_type): return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name) +# # TODO: add get all cryptocurrency option +def get_cryptocurrency_domain(request_dict, cryptocurrency_type): + res = cryptocurrency.verify_correlation_field_request(request_dict, cryptocurrency_type, item_type='domain') + if res: + return res + field_name = request_dict.get(cryptocurrency_type) + if not verify_cryptocurrency_address(cryptocurrency_type, field_name): + return ( {'status': 'error', 'reason': 'Invalid Cryptocurrency address'}, 400 ) + + return cryptocurrency.get_correlation_domain(request_dict, cryptocurrency_type, field_name) + +def get_domain_cryptocurrency(request_dict, cryptocurrency_type): + return cryptocurrency.get_domain_correlation_obj(self, request_dict, cryptocurrency_type, domain) + + def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address): # create basic medata if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)): @@ -65,7 +82,8 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc if int(last_seen) < int(date): r_serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date) - # global set + ## global set + # item r_serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path) # daily @@ -74,5 +92,12 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc # all type r_serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1) - # item_metadata + ## object_metadata + # item r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address) + + # domain + if Item.is_crawled(item_path): + domain = Item.get_item_domain(item_path) + r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address) + r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain) diff --git a/bin/packages/Domain.py b/bin/packages/Domain.py new file mode 100755 index 00000000..76f97735 --- /dev/null +++ b/bin/packages/Domain.py @@ -0,0 +1,85 @@ +#!/usr/bin/python3 + +""" +The ``Domain`` +=================== + + +""" + +import os +import sys +import time +import redis + +import Item + +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/')) +import Flask_config + +r_serv_onion = Flask_config.r_serv_onion + +def get_domain_type(domain): + if str(domain).endswith('.onion'): + return 'onion' + else: + return 'regular' + +def get_all_domain_up_by_type(domain_type): + if domain_type in domains: + list_domain = list(r_serv_onion.smembers('full_{}_up'.format(domain_type))) + return ({'type': domain_type, 'domains': list_domain}, 200) + else: + return ({"status": "error", "reason": "Invalid domain type"}, 400) + +def get_domain_items(domain, root_item_id): + dom_item = get_domain_item_children(domain, root_item_id) + dom_item.append(root_item_id) + return dom_item + +def get_domain_item_children(domain, root_item_id): + all_items = [] + for item_id in Item.get_item_children(root_item_id): + if Item.is_item_in_domain(domain, item_id): + all_items.append(item_id) + all_items.extend(get_domain_item_children(domain, item_id)) + return all_items + +def get_link_tree(): + pass + + +### +### correlation +### + +def _get_domain_correlation(domain, correlation_name=None, correlation_type=None): + res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) + if res: + return list(res) + else: + return [] + +def get_item_bitcoin(item_id): + return _get_item_correlation('cryptocurrency', 'bitcoin', item_id) + +def get_item_pgp_key(item_id): + return _get_item_correlation('pgpdump', 'key', item_id) + +def get_item_pgp_name(item_id): + return _get_item_correlation('pgpdump', 'name', item_id) + +def get_item_pgp_mail(item_id): + return _get_item_correlation('pgpdump', 'mail', item_id) + +def get_item_pgp_correlation(item_id): + pass + + +class Domain(object): + """docstring for Domain.""" + + def __init__(self, domain, port=80): + self.domain = str(domain) + ## TODO: handle none port + self.type = get_domain_type(domain) diff --git a/bin/packages/Item.py b/bin/packages/Item.py index 4f7aa851..680b8f97 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -125,7 +125,6 @@ def get_item(request_dict): ### def _get_item_correlation(correlation_name, correlation_type, item_id): - print('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) if res: return list(res) @@ -144,6 +143,8 @@ def get_item_pgp_name(item_id): def get_item_pgp_mail(item_id): return _get_item_correlation('pgpdump', 'mail', item_id) +def get_item_pgp_correlation(item_id): + pass ### ### GET Internal Module DESC @@ -153,3 +154,29 @@ def get_item_list_desc(list_item_id): for item_id in list_item_id: desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} ) return desc_list + +# # TODO: add an option to check the tag +def is_crawled(item_id): + return item_id.startswith('crawled') + +def is_onion(item_id): + is_onion = False + if len(is_onion) > 62: + if is_crawled(item_id) and item_id[-42:-36] == '.onion': + is_onion = True + return is_onion + +def is_item_in_domain(domain, item_id): + is_in_domain = False + domain_lenght = len(domain) + if len(item_id) > (domain_lenght+48): + if item_id[-36-domain_lenght:-36] == domain: + is_in_domain = True + return is_in_domain + +def get_item_domain(item_id): + return item_id[19:-36] + + +def get_item_children(item_id): + return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id))) diff --git a/bin/packages/Pgp.py b/bin/packages/Pgp.py index 9c7b0ec4..12ff34fa 100755 --- a/bin/packages/Pgp.py +++ b/bin/packages/Pgp.py @@ -2,14 +2,18 @@ # -*-coding:UTF-8 -* import os +import sys import redis from hashlib import sha256 +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) import Flask_config + from Correlation import Correlation +import Item -r_serv_metadata = Flask_config.r_serv_metadata +serv_metadata = Flask_config.r_serv_metadata pgpdump = Correlation('pgpdump') @@ -23,3 +27,36 @@ def get_pgp(request_dict, pgp_type): field_name = request_dict.get(pgp_type) return pgpdump.get_correlation(request_dict, pgp_type, field_name) + +def save_pgp_data(type_pgp, date, item_path, data): + # create basic medata + if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)): + serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date) + serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + else: + last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen') + if not last_seen: + serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + else: + if int(last_seen) < int(date): + serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + + # global set + serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path) + + # daily + serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1) + + # all type + serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1) + + ## object_metadata + # paste + serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data) + + + # domain object + if Item.is_crawled(item_path): + domain = Item.get_item_domain(item_path) + serv_metadata.sadd('domain_pgpdump_{}:{}'.format(type_pgp, domain), data) + serv_metadata.sadd('set_domain_pgpdump_{}:{}'.format(type_pgp, data), domain) diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index 70d7e72e..f1147715 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -121,6 +121,11 @@ def add_item_tag(tag, item_path): r_serv_metadata.sadd('tag:{}'.format(item_path), tag) r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path) + if Item.is_crawled(item_path): + domain = Item.get_item_domain(item_path) + r_serv_metadata.sadd('tag:{}'.format(domain), tag) + r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) + r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') From f1def65c8994a38cc7f564d4e969432c2648ce76 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 28 Oct 2019 13:48:43 +0100 Subject: [PATCH 02/14] chg: [lib ConfigLoader] add configLoader class --- bin/Helper.py | 1 - bin/lib/ConfigLoader.py | 48 +++++++ var/www/Flask_server.py | 31 ++--- var/www/modules/Flask_config.py | 129 ++++-------------- .../modules/PasteSubmit/Flask_PasteSubmit.py | 1 - var/www/modules/Tags/Flask_Tags.py | 1 - var/www/modules/dashboard/Flask_dashboard.py | 6 +- .../modules/hashDecoded/Flask_hashDecoded.py | 1 - .../hiddenServices/Flask_hiddenServices.py | 1 - var/www/modules/hunter/Flask_hunter.py | 1 - .../modules/rawSkeleton/Flask_rawSkeleton.py | 1 - var/www/modules/restApi/Flask_restApi.py | 1 - var/www/modules/search/Flask_search.py | 7 +- var/www/modules/sentiment/Flask_sentiment.py | 1 - var/www/modules/settings/Flask_settings.py | 1 - var/www/modules/showpaste/Flask_showpaste.py | 1 - var/www/modules/terms/Flask_terms.py | 1 - .../trendingcharts/Flask_trendingcharts.py | 8 +- .../trendingmodules/Flask_trendingmodules.py | 1 - 19 files changed, 97 insertions(+), 145 deletions(-) create mode 100755 bin/lib/ConfigLoader.py diff --git a/bin/Helper.py b/bin/Helper.py index 52097ef6..2942b415 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -58,7 +58,6 @@ def setup_subscribe(self, conn_name): for address in addresses.split(','): new_sub = context.socket(zmq.SUB) new_sub.connect(address) - # bytes64 encode bytes to ascii only bytes new_sub.setsockopt_string(zmq.SUBSCRIBE, channel) self.subscribers.append(new_sub) diff --git a/bin/lib/ConfigLoader.py b/bin/lib/ConfigLoader.py new file mode 100755 index 00000000..51508daa --- /dev/null +++ b/bin/lib/ConfigLoader.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 + +""" +The ``Domain`` +=================== + + +""" + +import os +import sys +import time +import redis +import configparser + +# Get Config file +config_dir = os.path.join(os.environ['AIL_HOME'], 'configs') +config_file = os.path.join(config_dir, 'core.cfg') +if not os.path.exists(config_file): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + + # # TODO: create sphinx doc + + # # TODO: add config_field to reload + +class ConfigLoader(object): + """docstring for Config_Loader.""" + + def __init__(self): + self.cfg = configparser.ConfigParser() + self.cfg.read(config_file) + + def get_redis_conn(self, redis_name, decode_responses=True): ## TODO: verify redis name + return redis.StrictRedis( host=self.cfg.get(redis_name, "host"), + port=self.cfg.getint(redis_name, "port"), + db=self.cfg.getint(redis_name, "db"), + decode_responses=decode_responses ) + + def get_config_str(self, section, key_name): + return self.cfg.get(section, key_name) + + def get_config_int(self, section, key_name): + return self.cfg.getint(section, key_name) + + def get_config_boolean(self, section, key_name): + return self.cfg.getboolean(section, key_name) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 0b6b7fa6..a4518a00 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -24,6 +24,10 @@ from User import User +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + + from pytaxonomies import Taxonomies # Import config @@ -31,33 +35,21 @@ # Import Blueprint from blueprints.root import root +from blueprints.crawler_splash import crawler_splash Flask_dir = os.environ['AIL_FLASK'] # CONFIG # -cfg = Flask_config.cfg -baseUrl = cfg.get("Flask", "baseurl") +config_loader = ConfigLoader.ConfigLoader() +baseUrl = config_loader.get_config_str("Flask", "baseurl") baseUrl = baseUrl.replace('/', '') if baseUrl != '': baseUrl = '/'+baseUrl # ========= REDIS =========# -r_serv_db = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) -r_serv_tags = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), - decode_responses=True) - -r_cache = redis.StrictRedis( - host=cfg.get("Redis_Cache", "host"), - port=cfg.getint("Redis_Cache", "port"), - db=cfg.getint("Redis_Cache", "db"), - decode_responses=True) +r_serv_db = config_loader.get_redis_conn("ARDB_DB") +r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") +r_cache = config_loader.get_redis_conn("Redis_Cache") # logs log_dir = os.path.join(os.environ['AIL_HOME'], 'logs') @@ -88,6 +80,7 @@ # ========= BLUEPRINT =========# app.register_blueprint(root, url_prefix=baseUrl) +app.register_blueprint(crawler_splash, url_prefix=baseUrl) # ========= =========# # ========= session ======== @@ -199,7 +192,7 @@ def add_header(response): @app.errorhandler(405) def _handle_client_error(e): - if request.path.startswith('/api/'): ## # TODO: add baseUrl + if request.path.startswith('/api/'): ## # TODO: add baseUrl res_dict = {"status": "error", "reason": "Method Not Allowed: The method is not allowed for the requested URL"} anchor_id = request.path[8:] anchor_id = anchor_id.replace('/', '_') diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 49f2919c..6525cb5e 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -4,109 +4,34 @@ ''' Flask global variables shared accross modules ''' -import configparser -import redis import os import re import sys +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + # FLASK # app = None # CONFIG # -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - -cfg = configparser.ConfigParser() -cfg.read(configfile) +config_loader = ConfigLoader.ConfigLoader() # REDIS # -r_serv = redis.StrictRedis( - host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db"), - decode_responses=True) - -r_cache = redis.StrictRedis( - host=cfg.get("Redis_Cache", "host"), - port=cfg.getint("Redis_Cache", "port"), - db=cfg.getint("Redis_Cache", "db"), - decode_responses=True) - -r_serv_log = redis.StrictRedis( - host=cfg.get("Redis_Log", "host"), - port=cfg.getint("Redis_Log", "port"), - db=cfg.getint("Redis_Log", "db"), - decode_responses=True) - -r_serv_log_submit = redis.StrictRedis( - host=cfg.get("Redis_Log_submit", "host"), - port=cfg.getint("Redis_Log_submit", "port"), - db=cfg.getint("Redis_Log_submit", "db"), - decode_responses=True) - -r_serv_charts = redis.StrictRedis( - host=cfg.get("ARDB_Trending", "host"), - port=cfg.getint("ARDB_Trending", "port"), - db=cfg.getint("ARDB_Trending", "db"), - decode_responses=True) - -r_serv_sentiment = redis.StrictRedis( - host=cfg.get("ARDB_Sentiment", "host"), - port=cfg.getint("ARDB_Sentiment", "port"), - db=cfg.getint("ARDB_Sentiment", "db"), - decode_responses=True) - -r_serv_term = redis.StrictRedis( - host=cfg.get("ARDB_Tracker", "host"), - port=cfg.getint("ARDB_Tracker", "port"), - db=cfg.getint("ARDB_Tracker", "db"), - decode_responses=True) - -r_serv_cred = redis.StrictRedis( - host=cfg.get("ARDB_TermCred", "host"), - port=cfg.getint("ARDB_TermCred", "port"), - db=cfg.getint("ARDB_TermCred", "db"), - decode_responses=True) - -r_serv_pasteName = redis.StrictRedis( - host=cfg.get("Redis_Paste_Name", "host"), - port=cfg.getint("Redis_Paste_Name", "port"), - db=cfg.getint("Redis_Paste_Name", "db"), - decode_responses=True) - -r_serv_tags = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), - decode_responses=True) - -r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - -r_serv_db = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - -r_serv_statistics = redis.StrictRedis( - host=cfg.get("ARDB_Statistics", "host"), - port=cfg.getint("ARDB_Statistics", "port"), - db=cfg.getint("ARDB_Statistics", "db"), - decode_responses=True) - -r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) +r_serv = config_loader.get_redis_conn("Redis_Queues") +r_cache = config_loader.get_redis_conn("Redis_Cache") +r_serv_log = config_loader.get_redis_conn("Redis_Log") +r_serv_log_submit = config_loader.get_redis_conn("Redis_Log_submit") +r_serv_charts = config_loader.get_redis_conn("ARDB_Trending") +r_serv_sentiment = config_loader.get_redis_conn("ARDB_Sentiment") +r_serv_term = config_loader.get_redis_conn("ARDB_Tracker") +r_serv_cred = config_loader.get_redis_conn("ARDB_TermCred") +r_serv_pasteName = config_loader.get_redis_conn("Redis_Paste_Name") +r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +r_serv_db = config_loader.get_redis_conn("ARDB_DB") +r_serv_statistics = config_loader.get_redis_conn("ARDB_Statistics") +r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") sys.path.append('../../configs/keys') # MISP # @@ -146,17 +71,17 @@ print('The Hive not connected') #### VARIABLES #### -baseUrl = cfg.get("Flask", "baseurl") +baseUrl = config_loader.get_config_str("Flask", "baseurl") baseUrl = baseUrl.replace('/', '') if baseUrl != '': baseUrl = '/'+baseUrl -max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip -max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal +max_preview_char = int(config_loader.get_config_str("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip +max_preview_modal = int(config_loader.get_config_str("Flask", "max_preview_modal")) # Maximum number of character to display in the modal max_tags_result = 50 -DiffMaxLineLength = int(cfg.get("Flask", "DiffMaxLineLength"))#Use to display the estimated percentage instead of a raw value +DiffMaxLineLength = int(config_loader.get_config_str("Flask", "DiffMaxLineLength"))#Use to display the estimated percentage instead of a raw value bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] @@ -166,14 +91,14 @@ UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') -PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' -SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"), 'screenshot') +PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' +SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') REPO_ORIGIN = 'https://github.com/CIRCL/AIL-framework.git' -max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs")) +max_dashboard_logs = int(config_loader.get_config_str("Flask", "max_dashboard_logs")) -crawler_enabled = cfg.getboolean("Crawler", "activate_crawler") +crawler_enabled = config_loader.get_config_boolean("Crawler", "activate_crawler") email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' email_regex = re.compile(email_regex) @@ -191,6 +116,6 @@ vt_enabled = False print('VT submission is disabled') except: - vt_auth = {'apikey': cfg.get("Flask", "max_preview_char")} + vt_auth = {'apikey': config_loader.get_config_str("Flask", "max_preview_char")} vt_enabled = False print('VT submission is disabled') diff --git a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py index 76bae898..4af8fa0b 100644 --- a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py +++ b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py @@ -44,7 +44,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_tags = Flask_config.r_serv_tags r_serv_metadata = Flask_config.r_serv_metadata diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index d15b78a8..4772e82e 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -23,7 +23,6 @@ import Tag app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_tags = Flask_config.r_serv_tags r_serv_metadata = Flask_config.r_serv_metadata diff --git a/var/www/modules/dashboard/Flask_dashboard.py b/var/www/modules/dashboard/Flask_dashboard.py index 160d9edb..29def6cf 100644 --- a/var/www/modules/dashboard/Flask_dashboard.py +++ b/var/www/modules/dashboard/Flask_dashboard.py @@ -21,7 +21,7 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg +config_loader = Flask_config.config_loader baseUrl = Flask_config.baseUrl r_serv = Flask_config.r_serv r_serv_log = Flask_config.r_serv_log @@ -171,8 +171,8 @@ def stuff(): @login_required @login_analyst def index(): - default_minute = cfg.get("Flask", "minute_processed_paste") - threshold_stucked_module = cfg.getint("Module_ModuleInformation", "threshold_stucked_module") + default_minute = config_loader.get_config_str("Flask", "minute_processed_paste") + threshold_stucked_module = config_loader.get_config_int("Module_ModuleInformation", "threshold_stucked_module") log_select = {10, 25, 50, 100} log_select.add(max_dashboard_logs) log_select = list(log_select) diff --git a/var/www/modules/hashDecoded/Flask_hashDecoded.py b/var/www/modules/hashDecoded/Flask_hashDecoded.py index f6073afb..ae3ee48b 100644 --- a/var/www/modules/hashDecoded/Flask_hashDecoded.py +++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py @@ -24,7 +24,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_metadata = Flask_config.r_serv_metadata vt_enabled = Flask_config.vt_enabled diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index e3ee2bcb..f64df4e1 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -23,7 +23,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_cache = Flask_config.r_cache r_serv_onion = Flask_config.r_serv_onion diff --git a/var/www/modules/hunter/Flask_hunter.py b/var/www/modules/hunter/Flask_hunter.py index f86d3403..e9028b36 100644 --- a/var/www/modules/hunter/Flask_hunter.py +++ b/var/www/modules/hunter/Flask_hunter.py @@ -27,7 +27,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_term = Flask_config.r_serv_term r_serv_cred = Flask_config.r_serv_cred diff --git a/var/www/modules/rawSkeleton/Flask_rawSkeleton.py b/var/www/modules/rawSkeleton/Flask_rawSkeleton.py index d767a83c..dca8f331 100644 --- a/var/www/modules/rawSkeleton/Flask_rawSkeleton.py +++ b/var/www/modules/rawSkeleton/Flask_rawSkeleton.py @@ -14,7 +14,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg rawSkeleton = Blueprint('rawSkeleton', __name__, template_folder='templates') diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index cc32afd6..cbd93dd6 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -31,7 +31,6 @@ app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_cache = Flask_config.r_cache r_serv_db = Flask_config.r_serv_db diff --git a/var/www/modules/search/Flask_search.py b/var/www/modules/search/Flask_search.py index 67a518fb..ff5395e6 100644 --- a/var/www/modules/search/Flask_search.py +++ b/var/www/modules/search/Flask_search.py @@ -25,7 +25,7 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg +config_loader = Flask_config.config_loader baseUrl = Flask_config.baseUrl r_serv_pasteName = Flask_config.r_serv_pasteName r_serv_metadata = Flask_config.r_serv_metadata @@ -34,9 +34,8 @@ bootstrap_label = Flask_config.bootstrap_label PASTES_FOLDER = Flask_config.PASTES_FOLDER -baseindexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) -indexRegister_path = os.path.join(os.environ['AIL_HOME'], - cfg.get("Indexer", "register")) +baseindexpath = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Indexer", "path")) +indexRegister_path = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Indexer", "register")) searches = Blueprint('searches', __name__, template_folder='templates') diff --git a/var/www/modules/sentiment/Flask_sentiment.py b/var/www/modules/sentiment/Flask_sentiment.py index af6c220c..895bd0ee 100644 --- a/var/www/modules/sentiment/Flask_sentiment.py +++ b/var/www/modules/sentiment/Flask_sentiment.py @@ -20,7 +20,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_charts = Flask_config.r_serv_charts r_serv_sentiment = Flask_config.r_serv_sentiment diff --git a/var/www/modules/settings/Flask_settings.py b/var/www/modules/settings/Flask_settings.py index a569cbbb..0ad1f43c 100644 --- a/var/www/modules/settings/Flask_settings.py +++ b/var/www/modules/settings/Flask_settings.py @@ -19,7 +19,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_db = Flask_config.r_serv_db max_preview_char = Flask_config.max_preview_char diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 10519d53..a972a346 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -23,7 +23,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_pasteName = Flask_config.r_serv_pasteName r_serv_metadata = Flask_config.r_serv_metadata diff --git a/var/www/modules/terms/Flask_terms.py b/var/www/modules/terms/Flask_terms.py index c594839a..3e166063 100644 --- a/var/www/modules/terms/Flask_terms.py +++ b/var/www/modules/terms/Flask_terms.py @@ -29,7 +29,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_term = Flask_config.r_serv_term r_serv_cred = Flask_config.r_serv_cred diff --git a/var/www/modules/trendingcharts/Flask_trendingcharts.py b/var/www/modules/trendingcharts/Flask_trendingcharts.py index a037e171..b2dfa68a 100644 --- a/var/www/modules/trendingcharts/Flask_trendingcharts.py +++ b/var/www/modules/trendingcharts/Flask_trendingcharts.py @@ -17,7 +17,7 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg +config_loader = Flask_config.config_loader baseUrl = Flask_config.baseUrl r_serv_charts = Flask_config.r_serv_charts @@ -69,7 +69,7 @@ def progressionCharts(): @login_required @login_analyst def wordstrending(): - default_display = cfg.get("Flask", "default_display") + default_display = config_loader.get_config_str("Flask", "default_display") return render_template("Wordstrending.html", default_display = default_display) @@ -77,7 +77,7 @@ def wordstrending(): @login_required @login_analyst def protocolstrending(): - default_display = cfg.get("Flask", "default_display") + default_display = config_loader.get_config_str("Flask", "default_display") return render_template("Protocolstrending.html", default_display = default_display) @@ -85,7 +85,7 @@ def protocolstrending(): @login_required @login_analyst def trending(): - default_display = cfg.get("Flask", "default_display") + default_display = config_loader.get_config_str("Flask", "default_display") return render_template("Trending.html", default_display = default_display) diff --git a/var/www/modules/trendingmodules/Flask_trendingmodules.py b/var/www/modules/trendingmodules/Flask_trendingmodules.py index 80646ecb..816d8055 100644 --- a/var/www/modules/trendingmodules/Flask_trendingmodules.py +++ b/var/www/modules/trendingmodules/Flask_trendingmodules.py @@ -17,7 +17,6 @@ import Flask_config app = Flask_config.app -cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_serv_charts = Flask_config.r_serv_charts From 4b389559ab8bde8324b18236b5917d3c9b44e792 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 29 Oct 2019 09:13:44 +0100 Subject: [PATCH 03/14] chg: [Domain] move Domain to lib/ --- bin/{packages => lib}/Domain.py | 16 ++++++++++++---- bin/packages/Correlation.py | 1 + bin/packages/Cryptocurrency.py | 21 +++++++++++++++++---- 3 files changed, 30 insertions(+), 8 deletions(-) rename bin/{packages => lib}/Domain.py (82%) diff --git a/bin/packages/Domain.py b/bin/lib/Domain.py similarity index 82% rename from bin/packages/Domain.py rename to bin/lib/Domain.py index 76f97735..233fc574 100755 --- a/bin/packages/Domain.py +++ b/bin/lib/Domain.py @@ -12,12 +12,17 @@ import time import redis +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Correlation +import Cryptocurrency import Item -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/')) -import Flask_config +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader -r_serv_onion = Flask_config.r_serv_onion +config_loader = ConfigLoader.ConfigLoader() +r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") +config_loader = None def get_domain_type(domain): if str(domain).endswith('.onion'): @@ -52,7 +57,7 @@ def get_link_tree(): ### ### correlation ### - +""" def _get_domain_correlation(domain, correlation_name=None, correlation_type=None): res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) if res: @@ -74,7 +79,10 @@ def get_item_pgp_mail(item_id): def get_item_pgp_correlation(item_id): pass +""" +def _get_domain_correlation(domain, correlation_list): + return Cryptocurrency.get_cryptocurrency_domain(domain) class Domain(object): """docstring for Domain.""" diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index cf923049..dbef4d8d 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -49,6 +49,7 @@ def verify_correlation_field_request(self, request_dict, correlation_type, item_ if not request_dict: return ({'status': 'error', 'reason': 'Malformed JSON'}, 400) + print(correlation_type) field_name = request_dict.get(correlation_type, None) if not field_name: return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 ) diff --git a/bin/packages/Cryptocurrency.py b/bin/packages/Cryptocurrency.py index eb5c00e6..d9f657b9 100755 --- a/bin/packages/Cryptocurrency.py +++ b/bin/packages/Cryptocurrency.py @@ -14,7 +14,7 @@ r_serv_metadata = Flask_config.r_serv_metadata -all_cryptocurrency = ['bitcoin', 'etherum'] +all_cryptocurrency = ['bitcoin', 'ethereum', 'bitcoin-cash', 'litecoin', 'monero', 'zcash', 'dash'] digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' @@ -41,6 +41,18 @@ def verify_cryptocurrency_address(cryptocurrency_type, cryptocurrency_address): else: return True +def get_all_all_cryptocurrency(): + return all_cryptocurrency + +# check if all crypto type in the list are valid +# if a type is invalid, return the full list of currency types +def sanythise_cryptocurrency_types(cryptocurrency_types): + if cryptocurrency_types is None: + return get_all_all_cryptocurrency() + for currency in cryptocurrency_types: # # TODO: # OPTIMIZE: + if currency not in all_cryptocurrency: + return get_all_all_cryptocurrency() + return cryptocurrency_types def get_cryptocurrency(request_dict, cryptocurrency_type): # basic verification @@ -54,9 +66,10 @@ def get_cryptocurrency(request_dict, cryptocurrency_type): return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name) -# # TODO: add get all cryptocurrency option -def get_cryptocurrency_domain(request_dict, cryptocurrency_type): - res = cryptocurrency.verify_correlation_field_request(request_dict, cryptocurrency_type, item_type='domain') +def get_cryptocurrency_domain(request_dict, cryptocurrency_type=None): + currency_types = sanythise_cryptocurrency_types(cryptocurrency_type) + + res = cryptocurrency.verify_correlation_field_request(request_dict, currency_types, item_type='domain') if res: return res field_name = request_dict.get(cryptocurrency_type) From 44bb18a8be7aac01a529f08f6a57d600337a8f4a Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 29 Oct 2019 16:52:33 +0100 Subject: [PATCH 04/14] chg: [Correlation] get correlation (crypto + pgp) by domain --- bin/lib/Domain.py | 68 +++++++++++++-------- bin/packages/Correlation.py | 105 +++++++++++++++++++++------------ bin/packages/Cryptocurrency.py | 45 ++++---------- bin/packages/Pgp.py | 19 +++--- bin/packages/Tag.py | 17 ++++-- 5 files changed, 148 insertions(+), 106 deletions(-) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 233fc574..8da5d960 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -14,8 +14,10 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Correlation -import Cryptocurrency +from Cryptocurrency import cryptocurrency +from Pgp import pgp import Item +import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader @@ -54,35 +56,53 @@ def get_link_tree(): pass -### -### correlation -### -""" -def _get_domain_correlation(domain, correlation_name=None, correlation_type=None): - res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) - if res: - return list(res) - else: - return [] +def get_domain_tags(domain): + ''' + Retun all tags of a given domain. -def get_item_bitcoin(item_id): - return _get_item_correlation('cryptocurrency', 'bitcoin', item_id) + :param domain: crawled domain + ''' + return Tag.get_item_tags(domain) -def get_item_pgp_key(item_id): - return _get_item_correlation('pgpdump', 'key', item_id) +def get_domain_cryptocurrency(domain, currencies_type=None): + ''' + Retun all cryptocurrencies of a given domain. -def get_item_pgp_name(item_id): - return _get_item_correlation('pgpdump', 'name', item_id) + :param domain: crawled domain + :param currencies_type: list of cryptocurrencies type + :type currencies_type: list, optional + ''' + return cryptocurrency.get_domain_correlation_dict(domain, correlation_type=currencies_type) -def get_item_pgp_mail(item_id): - return _get_item_correlation('pgpdump', 'mail', item_id) +def get_domain_pgp(domain, currencies_type=None): + ''' + Retun all pgp of a given domain. -def get_item_pgp_correlation(item_id): - pass -""" + :param domain: crawled domain + :param currencies_type: list of pgp type + :type currencies_type: list, optional + ''' + return pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type) + +def get_domain_all_correlation(domain, correlation_type=None): + ''' + Retun all correlation of a given domain. + + :param domain: crawled domain + :type domain: str + + :return: a dict of all correlation for a given domain + :rtype: dict + ''' + domain_correl = {} + res = get_domain_cryptocurrency(domain) + if res: + domain_correl['cryptocurrency'] = res + res = get_domain_pgp(domain) + if res: + domain_correl['pgp'] = res + return domain_correl -def _get_domain_correlation(domain, correlation_list): - return Cryptocurrency.get_cryptocurrency_domain(domain) class Domain(object): """docstring for Domain.""" diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index dbef4d8d..27b6dc24 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -5,16 +5,18 @@ import sys import redis -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules/')) -import Flask_config - -r_serv_metadata = Flask_config.r_serv_metadata +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'lib/')) +import ConfigLoader +config_loader = ConfigLoader.ConfigLoader() +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +config_loader = None class Correlation(object): - def __init__(self, correlation_name): + def __init__(self, correlation_name, all_correlation_types): self.correlation_name = correlation_name + self.all_correlation_types = all_correlation_types def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'): if type=='paste': @@ -29,13 +31,6 @@ def _get_items(self, correlation_type, field_name): else: return [] - def _get_domains(self, correlation_type, field_name): - res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) - if res: - return list(res) - else: - return [] - def _get_metadata(self, correlation_type, field_name): meta_dict = {} meta_dict['first_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(self.correlation_name, correlation_type, field_name), 'first_seen') @@ -49,7 +44,6 @@ def verify_correlation_field_request(self, request_dict, correlation_type, item_ if not request_dict: return ({'status': 'error', 'reason': 'Malformed JSON'}, 400) - print(correlation_type) field_name = request_dict.get(correlation_type, None) if not field_name: return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 ) @@ -69,37 +63,72 @@ def get_correlation(self, request_dict, correlation_type, field_name): return (dict_resp, 200) - def get_correlation_domain(self, request_dict, correlation_type, field_name): - dict_resp = {} - - dict_resp['domain'] = self._get_domains(correlation_type, field_name) - - #if request_dict.get('metadata'): - # dict_resp['metadata'] = self._get_metadata(correlation_type, field_name) - - dict_resp[correlation_type] = field_name + def get_all_correlation_types(self): + ''' + Gel all correlation types + + :return: A list of all the correlation types + :rtype: list + ''' + return self.all_correlation_types + + def sanythise_correlation_types(self, correlation_types): + ''' + Check if all correlation types in the list are valid. + + :param correlation_types: list of correlation type + :type currency_type: list + + :return: If a type is invalid, return the full list of correlation types else return the provided list + :rtype: list + ''' + if correlation_types is None: + return self.get_all_correlation_types() + for correl in correlation_types: # # TODO: # OPTIMIZE: + if correl not in self.get_all_correlation_types(): + return self.get_all_correlation_types() + return correlation_types + + + def _get_domain_correlation_obj(self, domain, correlation_type): + ''' + Return correlation of a given domain. + + :param domain: crawled domain + :type domain: str + :param correlation_type: correlation type + :type correlation_type: str + + :return: a list of correlation + :rtype: list + ''' + res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain)) + if res: + return list(res) + else: + return [] - return (dict_resp, 200) + def get_domain_correlation_dict(self, domain, correlation_type=None): + ''' + Return all correlation of a given domain. -######## INTERNAL ######## + :param domain: crawled domain + :param correlation_type: list of correlation types + :type correlation_type: list, optional -def _get_domain_correlation_obj(correlation_name, correlation_type, domain): - print('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain)) - res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(correlation_name, correlation_type, domain)) - if res: - return list(res) - else: - return [] + :return: a dictionnary of all the requested correlations + :rtype: dict + ''' + correlation_type = self.sanythise_correlation_types(correlation_type) + dict_correlation = {} + for correl in correlation_type: + res = self._get_domain_correlation_obj(domain, correl) + if res: + dict_correlation[correl] = res + return dict_correlation -######## ######## ######## API EXPOSED ######## -def get_domain_correlation_obj(request_dict, correlation_name, correlation_type, domain): - dict_resp = {} - dict_resp[correlation_type] = _get_domain_correlation_obj(correlation_name, correlation_type, domain) - dict_resp['domain'] = domain - - return (dict_resp, 200) ######## ######## diff --git a/bin/packages/Cryptocurrency.py b/bin/packages/Cryptocurrency.py index d9f657b9..d83f91dd 100755 --- a/bin/packages/Cryptocurrency.py +++ b/bin/packages/Cryptocurrency.py @@ -7,18 +7,25 @@ from hashlib import sha256 -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) -import Flask_config +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) from Correlation import Correlation import Item -r_serv_metadata = Flask_config.r_serv_metadata +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader -all_cryptocurrency = ['bitcoin', 'ethereum', 'bitcoin-cash', 'litecoin', 'monero', 'zcash', 'dash'] +config_loader = ConfigLoader.ConfigLoader() +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +config_loader = None digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' -cryptocurrency = Correlation('cryptocurrency') + +class Cryptocurrency(Correlation): + def __init__(self): + super().__init__('cryptocurrency', ['bitcoin', 'ethereum', 'bitcoin-cash', 'litecoin', 'monero', 'zcash', 'dash']) + +cryptocurrency = Cryptocurrency() # http://rosettacode.org/wiki/Bitcoin/address_validation#Python def decode_base58(bc, length): @@ -41,18 +48,6 @@ def verify_cryptocurrency_address(cryptocurrency_type, cryptocurrency_address): else: return True -def get_all_all_cryptocurrency(): - return all_cryptocurrency - -# check if all crypto type in the list are valid -# if a type is invalid, return the full list of currency types -def sanythise_cryptocurrency_types(cryptocurrency_types): - if cryptocurrency_types is None: - return get_all_all_cryptocurrency() - for currency in cryptocurrency_types: # # TODO: # OPTIMIZE: - if currency not in all_cryptocurrency: - return get_all_all_cryptocurrency() - return cryptocurrency_types def get_cryptocurrency(request_dict, cryptocurrency_type): # basic verification @@ -66,22 +61,6 @@ def get_cryptocurrency(request_dict, cryptocurrency_type): return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name) -def get_cryptocurrency_domain(request_dict, cryptocurrency_type=None): - currency_types = sanythise_cryptocurrency_types(cryptocurrency_type) - - res = cryptocurrency.verify_correlation_field_request(request_dict, currency_types, item_type='domain') - if res: - return res - field_name = request_dict.get(cryptocurrency_type) - if not verify_cryptocurrency_address(cryptocurrency_type, field_name): - return ( {'status': 'error', 'reason': 'Invalid Cryptocurrency address'}, 400 ) - - return cryptocurrency.get_correlation_domain(request_dict, cryptocurrency_type, field_name) - -def get_domain_cryptocurrency(request_dict, cryptocurrency_type): - return cryptocurrency.get_domain_correlation_obj(self, request_dict, cryptocurrency_type, domain) - - def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address): # create basic medata if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)): diff --git a/bin/packages/Pgp.py b/bin/packages/Pgp.py index 12ff34fa..986c78d3 100755 --- a/bin/packages/Pgp.py +++ b/bin/packages/Pgp.py @@ -5,18 +5,23 @@ import sys import redis -from hashlib import sha256 - -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) -import Flask_config - +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) from Correlation import Correlation import Item -serv_metadata = Flask_config.r_serv_metadata +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +config_loader = ConfigLoader.ConfigLoader() +serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +config_loader = None + -pgpdump = Correlation('pgpdump') +class Pgp(Correlation): + def __init__(self): + super().__init__('pgpdump', ['key', 'mail', 'name']) +pgp = Pgp() def get_pgp(request_dict, pgp_type): # basic verification diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index f1147715..00c59cfa 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -2,17 +2,22 @@ # -*-coding:UTF-8 -* import os +import sys import redis -import Flask_config import Date import Item +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + from pytaxonomies import Taxonomies from pymispgalaxies import Galaxies, Clusters -r_serv_tags = Flask_config.r_serv_tags -r_serv_metadata = Flask_config.r_serv_metadata +config_loader = ConfigLoader.ConfigLoader() +r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +config_loader = None def get_taxonomie_from_tag(tag): return tag.split(':')[0] @@ -77,8 +82,12 @@ def is_tag_in_all_tag(tag): def get_all_tags(): return list(r_serv_tags.smembers('list_tags')) +''' +Retun all the tags of a given item. +:param item_id: (Paste or domain) +''' def get_item_tags(item_id): - tags = r_serv_metadata.smembers('tag:'+item_id) + tags = r_serv_metadata.smembers('tag:{}'.format(item_id)) if tags: return list(tags) else: From a2d6874417f6867edfd471c2de5dad5f037d698e Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 30 Oct 2019 17:12:04 +0100 Subject: [PATCH 05/14] chg: [Domain + UI Crawler] refractor show domain --- bin/lib/Domain.py | 111 +++++++++++- bin/packages/Correlation.py | 18 ++ var/www/blueprints/crawler_splash.py | 59 ++++++ .../crawler/crawler_splash}/showDomain.html | 168 +++++++++++++++--- 4 files changed, 327 insertions(+), 29 deletions(-) create mode 100644 var/www/blueprints/crawler_splash.py rename var/www/{modules/hiddenServices/templates => templates/crawler/crawler_splash}/showDomain.html (52%) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 8da5d960..fd8ac372 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -56,6 +56,7 @@ def get_link_tree(): pass + def get_domain_tags(domain): ''' Retun all tags of a given domain. @@ -103,11 +104,119 @@ def get_domain_all_correlation(domain, correlation_type=None): domain_correl['pgp'] = res return domain_correl + # TODO: handle port +def get_domain_history(domain, domain_type, port): # TODO: add date_range: from to + nb_elem + ''' + Retun . + + :param domain: crawled domain + :type domain: str + + :return: + :rtype: list of tuple (item_core, epoch) + ''' + return r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, -1, withscores=True) + +def get_domain_history_with_status(domain, domain_type, port): # TODO: add date_range: from to + nb_elem + ''' + Retun . + + :param domain: crawled domain + :type domain: str + + :return: + :rtype: list of dict (epoch, date: %Y/%m/%d - %H:%M.%S, boolean status) + ''' + l_history = [] + history = get_domain_history(domain, domain_type, port) + for root_item, epoch_val in history: + epoch_val = int(epoch_val) # force int + # domain down, root_item==epoch_val + try: + int(root_item) + status = False + # domain up, root_item=str + except ValueError: + status = True + l_history.append({"epoch": epoch_val, "date": time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val)), "status": status}) + return l_history + class Domain(object): """docstring for Domain.""" def __init__(self, domain, port=80): self.domain = str(domain) - ## TODO: handle none port self.type = get_domain_type(domain) + + def get_domain_first_seen(self): + ''' + Get domain first seen date + + :return: domain first seen date + :rtype: str + ''' + first_seen = r_serv_onion.hget('{}_metadata:{}'.format(self.type, self.domain), 'first_seen') + if first_seen is not None: + first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8]) + return first_seen + + def get_domain_last_check(self):# # TODO: add epoch ??? + ''' + Get domain last check date + + :return: domain last check date + :rtype: str + ''' + last_check = r_serv_onion.hget('{}_metadata:{}'.format(self.type, self.domain), 'last_check') + if last_check is not None: + last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) + return last_check + + #def get_domain_all_ports(self): + # pass + + def get_domain_metadata(self, first_seen=True, last_ckeck=True, ports=True): + ''' + Get Domain basic metadata + + :param first_seen: get domain first_seen + :type first_seen: boolean + :param last_ckeck: get domain last_check + :type last_ckeck: boolean + :param ports: get all domain ports + :type ports: boolean + + :return: a dict of all metadata for a given domain + :rtype: dict + ''' + dict_metadata = {} + if first_seen: + res = self.get_domain_first_seen() + if res is not None: + dict_metadata['first_seen'] = res + if last_ckeck: + res = self.get_domain_last_check() + if res is not None: + dict_metadata['last_check'] = res + return dict_metadata + + def get_domain_tags(self): + ''' + Retun all tags of a given domain. + + :param domain: crawled domain + ''' + return get_domain_tags(self.domain) + + def get_domain_correlation(self): + ''' + Retun all cryptocurrencies of a given domain. + ''' + return get_domain_all_correlation(self.domain) + + def get_domain_history_with_status(self): + ''' + Retun the full history of a given domain and port. + ''' + return get_domain_history_with_status(self.domain, self.type, 80) diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index 27b6dc24..ee339fb2 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -108,6 +108,24 @@ def _get_domain_correlation_obj(self, domain, correlation_type): else: return [] + def _get_correlation_obj_domain(self, field_name, correlation_type): + ''' + Return all domains that contain this correlation. + + :param domain: field name + :type domain: str + :param correlation_type: correlation type + :type correlation_type: str + + :return: a list of correlation + :rtype: list + ''' + res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) + if res: + return list(res) + else: + return [] + def get_domain_correlation_dict(self, domain, correlation_type=None): ''' Return all correlation of a given domain. diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py new file mode 100644 index 00000000..364a84d9 --- /dev/null +++ b/var/www/blueprints/crawler_splash.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response +from flask_login import login_required, current_user, login_user, logout_user + +sys.path.append('modules') +import Flask_config + +# Import Role_Manager +from Role_Manager import create_user_db, check_password_strength, check_user_role_integrity +from Role_Manager import login_admin, login_analyst + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) +import Domain + +r_cache = Flask_config.r_cache +r_serv_db = Flask_config.r_serv_db +r_serv_tags = Flask_config.r_serv_tags +bootstrap_label = Flask_config.bootstrap_label + +# ============ BLUEPRINT ============ +crawler_splash = Blueprint('crawler_splash', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/crawler/crawler_splash')) + +# ============ VARIABLES ============ + + + +# ============ FUNCTIONS ============ + + + +# ============= ROUTES ============== +@crawler_splash.route('/crawlers/showDomain') +#@login_required +#@login_analyst +def showDomain(): + domain_name = request.args.get('domain') + epoch = request.args.get('epoch') + port = request.args.get('port') + + domain = Domain.Domain(domain_name) + + dict_domain = domain.get_domain_metadata() + dict_domain = {**dict_domain, **domain.get_domain_correlation()} + dict_domain['domain'] = domain_name + dict_domain['tags'] = domain.get_domain_tags() + dict_domain['history'] = domain.get_domain_history_with_status() + + print(dict_domain) + + return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, screenshot={'item': None, '':None}, dict_links={}) diff --git a/var/www/modules/hiddenServices/templates/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html similarity index 52% rename from var/www/modules/hiddenServices/templates/showDomain.html rename to var/www/templates/crawler/crawler_splash/showDomain.html index 4230ec5a..72c9e155 100644 --- a/var/www/modules/hiddenServices/templates/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -6,10 +6,10 @@ - - + + @@ -45,7 +45,7 @@ {% endif %} -

{{ domain }} :

+

{{ dict_domain['domain'] }} :

@@ -58,23 +58,22 @@

{{ domain }} :

- - + +
{{ first_seen }}{{ last_check }}{%if "first_seen" in dict_domain%}{{ dict_domain['first_seen'] }}{%endif%}{%if "last_check" in dict_domain%}{{ dict_domain['last_check'] }}{%endif%} {{ ports }}
- Origin Paste: {% if origin_paste_name=='manual' or origin_paste_name=='auto' %} {{ origin_paste_name }} {%else%} {{ origin_paste_name }} {%endif%}
- {% for tag in origin_paste_tags %} - - {{ tag[0] }} + {% for tag in dict_domain['tags'] %} + + {{ tag }} {% endfor %}
@@ -83,17 +82,122 @@

{{ domain }} :

-
- {% for tag in domain_tags %} - - {{ tag }} {{ domain_tags[tag] }} - - {% endfor %} -
-
-
+ + {% if 'pgp' in dict_domain%} +
+
+
+
+
+
+ PGP Dumps   +
{{l_64|length}}
+
+
+
+ +
+
+
+
+
+ + + + + + + + + {% for dict_key in dict_domain['pgp']%} + {% if dict_key=="mail" %} + {% set var_icon = "fas fa-at" %} + {% elif dict_key=="name" %} + {% set var_icon = "fas fa-user-tag" %} + {% else %} + {% set var_icon = "fas fa-key" %} + {% endif %} + {% for key_id in dict_domain['pgp'][dict_key]%} + + + + + {% endfor %} + {% endfor %} + +
PGP TypeKey ID
+ +   {{ dict_key }} + {{ key_id }}
+
+
+
+
+ {% endif %} + + + {% if 'cryptocurrency' in dict_domain%} +
+
+
+
+
+
+ Cryptocurrencies   +
{{l_64|length}}
+
+
+
+ +
+
+
+
+
+ + + + + + + + + {% for dict_key in dict_domain['cryptocurrency']%} + {% if dict_key=="bitcoin" %} + {% set var_icon = "fab fa-bitcoin" %} + {% elif dict_key=="monero" %} + {% set var_icon = "fab fa-monero" %} + {% else %} + {% set var_icon = "fas fa-coins" %} + {% endif %} + {% for key_id in dict_domain['cryptocurrency'][dict_key]%} + + + + + {% endfor %} + {% endfor %} + +
Currencyaddress
+ +   {{ dict_key }} + {{ key_id }}
+
+
+
+
+ {% endif %} + + + {% if l_pastes %} +
+ @@ -129,7 +233,7 @@

{{ domain }} :

- {% if domain_history %} + {% if dict_domain["domain_history"] %}
@@ -142,7 +246,7 @@

{{ domain }} :

- + {% endfor %} {% endif %} @@ -200,7 +200,7 @@

{{ dict_domain['domain'] }} :

  {{ dict_key }} - + {% endfor %} {% endif %} From 6b9ba9d37714f0417f33a22063b45f51800d9533 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 8 Nov 2019 09:25:09 +0100 Subject: [PATCH 13/14] chg: [Domain] Show last origin --- bin/lib/Domain.py | 29 +++++++++++++++++++ var/www/blueprints/crawler_splash.py | 5 ++-- .../crawler/crawler_splash/showDomain.html | 12 ++++---- 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index f305afa6..962c2e19 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -154,6 +154,21 @@ def get_domain_last_check(domain, domain_type=None, r_format="str"): last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) return last_check +def get_domain_last_origin(domain, domain_type): + ''' + Get domain last origin + + :param domain: crawled domain + :type domain: str + :param domain_type: domain type + :type domain_type: str + + :return: last orgin item_id + :rtype: str + ''' + origin_item = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'paste_parent') + return origin_item + def get_domain_tags(domain): ''' Retun all tags of a given domain. @@ -280,6 +295,20 @@ def get_domain_last_check(self): ''' return get_domain_last_check(self.domain, domain_type=self.type) + def get_domain_last_origin(self): + ''' + Get domain last origin + + :param domain: crawled domain + :type domain: str + :param domain_type: domain type + :type domain_type: str + + :return: last orgin item_id + :rtype: str + ''' + return get_domain_last_origin(self.domain, self.type) + def is_domain_up(self): # # TODO: handle multiple ports ''' Return True if this domain is UP diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 6977aa4b..2f142a9c 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -46,8 +46,8 @@ def api_validator(api_response): # ============= ROUTES ============== # add route : /crawlers/show_domain @crawler_splash.route('/crawlers/showDomain') -#@login_required -#@login_analyst +@login_required +@login_analyst def showDomain(): domain_name = request.args.get('domain') epoch = request.args.get('epoch') @@ -63,6 +63,7 @@ def showDomain(): dict_domain['domain'] = domain_name if domain.is_domain_up(): dict_domain = {**dict_domain, **domain.get_domain_correlation()} + dict_domain['origin_item'] = domain.get_domain_last_origin() dict_domain['tags'] = domain.get_domain_tags() dict_domain['history'] = domain.get_domain_history_with_status() dict_domain['crawler_history'] = domain.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 11b21440..8e95dcb5 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -78,11 +78,6 @@

{{ dict_domain['domain'] }} :

-
{{domain}}
+
{{dict_domain["domain"]}}
{% if epoch_item[2] %}
UP
{% else %} @@ -177,11 +281,13 @@

{{ domain }} :

+
@@ -196,13 +302,19 @@

{{ domain }} :

From 3c6e424ac388ccb4c8df869e82599c4753bbac22 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 5 Nov 2019 09:49:51 +0100 Subject: [PATCH 07/14] chg: [UI Domain] UI: tag domain --- bin/lib/Domain.py | 26 +++- bin/packages/Tag.py | 70 ++++++++-- var/www/blueprints/crawler_splash.py | 8 +- var/www/modules/Tags/Flask_Tags.py | 22 +++ .../crawler/crawler_splash/showDomain.html | 52 +++++-- var/www/templates/modals/add_tags.html | 131 ++++++++++++++++++ 6 files changed, 282 insertions(+), 27 deletions(-) create mode 100644 var/www/templates/modals/add_tags.html diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 89d39870..f305afa6 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -131,7 +131,28 @@ def get_domain_items_crawled(domain, domain_type, port, epoch=None, items_link=F def get_link_tree(): pass +def get_domain_last_check(domain, domain_type=None, r_format="str"): + ''' + Get domain last check date + + :param domain: crawled domain + :type domain: str + :param domain_type: domain type + :type domain_type: str + :return: domain last check date + :rtype: str + ''' + if not domain_type: + domain_type = get_domain_type(domain) + last_check = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'last_check') + if last_check is not None: + if r_format=="int": + last_check = int(last_check) + # str + else: + last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) + return last_check def get_domain_tags(domain): ''' @@ -257,10 +278,7 @@ def get_domain_last_check(self): :return: domain last check date :rtype: str ''' - last_check = r_serv_onion.hget('{}_metadata:{}'.format(self.type, self.domain), 'last_check') - if last_check is not None: - last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) - return last_check + return get_domain_last_check(self.domain, domain_type=self.type) def is_domain_up(self): # # TODO: handle multiple ports ''' diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index 44ac43b2..ac5143d1 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -10,6 +10,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader +import Domain from pytaxonomies import Taxonomies from pymispgalaxies import Galaxies, Clusters @@ -110,7 +111,7 @@ def get_item_tags_minimal(item_id): return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_item_tags(item_id) ] # TEMPLATE + API QUERY -def add_items_tag(tags=[], galaxy_tags=[], item_id=None): +def add_items_tag(tags=[], galaxy_tags=[], item_id=None): ## TODO: remove me res_dict = {} if item_id == None: return ({'status': 'error', 'reason': 'Item id not found'}, 404) @@ -138,18 +139,58 @@ def add_items_tag(tags=[], galaxy_tags=[], item_id=None): return (res_dict, 200) -def add_item_tag(tag, item_path): +# TEMPLATE + API QUERY +def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"): + res_dict = {} + if item_id == None: + return ({'status': 'error', 'reason': 'Item id not found'}, 404) + if not tags and not galaxy_tags: + return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) + if item_type not in ('paste', 'domain'): + return ({'status': 'error', 'reason': 'Incorrect item_type'}, 400) - item_date = int(Item.get_item_date(item_path)) + res_dict['tags'] = [] + for tag in tags: + if tag: + taxonomie = get_taxonomie_from_tag(tag) + if is_taxonomie_tag_enabled(taxonomie, tag): + add_item_tag(tag, item_id, item_type=item_type) + res_dict['tags'].append(tag) + else: + return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) - #add tag - r_serv_metadata.sadd('tag:{}'.format(item_path), tag) - r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path) + for tag in galaxy_tags: + if tag: + galaxy = get_galaxy_from_tag(tag) + if is_galaxy_tag_enabled(galaxy, tag): + add_item_tag(tag, item_id, item_type=item_type) + res_dict['tags'].append(tag) + else: + return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) - if Item.is_crawled(item_path): - domain = Item.get_item_domain(item_path) - r_serv_metadata.sadd('tag:{}'.format(domain), tag) - r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) + res_dict['id'] = item_id + res_dict['type'] = item_type + return (res_dict, 200) + + +def add_item_tag(tag, item_path, item_type="paste"): + + if item_type=="paste": + item_date = int(Item.get_item_date(item_path)) + + #add tag + r_serv_metadata.sadd('tag:{}'.format(item_path), tag) + r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path) + + if Item.is_crawled(item_path): + domain = Item.get_item_domain(item_path) + r_serv_metadata.sadd('tag:{}'.format(domain), tag) + r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) + # domain item + else: + item_date = int(Domain.get_domain_last_check(item_path, r_format="int")) + r_serv_metadata.sadd('tag:{}'.format(item_path), tag) + r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), item_path) r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) @@ -250,3 +291,12 @@ def update_tag_last_seen(tag, tag_first_seen, tag_last_seen): else: tag_last_seen = Date.date_substract_day(tag_last_seen) update_tag_last_seen(tag, tag_first_seen, tag_last_seen) + + +# used by modal +def get_modal_add_tags(item_id, tag_type='paste'): + ''' + Modal: add tags to domain or Paste + ''' + return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(), + "item_id": item_id, "type": tag_type} diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index c996a49a..6977aa4b 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -20,6 +20,9 @@ from Role_Manager import create_user_db, check_password_strength, check_user_role_integrity from Role_Manager import login_admin, login_analyst +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +from Tag import get_modal_add_tags + sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import Domain @@ -40,8 +43,8 @@ def api_validator(api_response): if api_response: return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1] - # ============= ROUTES ============== +# add route : /crawlers/show_domain @crawler_splash.route('/crawlers/showDomain') #@login_required #@login_analyst @@ -65,4 +68,5 @@ def showDomain(): dict_domain['crawler_history'] = domain.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items']) - return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label) + return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, + modal_add_tags=get_modal_add_tags(dict_domain['domain'], tag_type="domain")) diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index 4772e82e..f2ebaab1 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -442,6 +442,28 @@ def addTags(): # success return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) +@Tags.route("/Tags/add_item_tags") +@login_required +@login_analyst +def add_item_tags(): + + tags = request.args.get('tags') + tagsgalaxies = request.args.get('tagsgalaxies') + item_id = request.args.get('item_id') + item_type = request.args.get('type') + + list_tag = tags.split(',') + list_tag_galaxies = tagsgalaxies.split(',') + + res = Tag.add_items_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, item_id=item_id, item_type=item_type) + # error + if res[1] != 200: + return str(res[0]) + # success + if item_type=='domain': + return redirect(url_for('crawler_splash.showDomain', domain=item_id)) + else: + return redirect(url_for('showsavedpastes.showsavedpaste', paste=item_id)) @Tags.route("/Tags/taxonomies") @login_required diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 7b71f8da..5ee2bf4d 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -7,11 +7,24 @@ + + + + + @@ -77,6 +90,10 @@

{{ dict_domain['domain'] }} :

{% endfor %}
+ {% include 'modals/add_tags.html' %} + @@ -243,7 +260,7 @@
Crawled Items
{%if item["screenshot"]%} - {%endif%} @@ -358,8 +375,8 @@
Crawled Items
+ diff --git a/var/www/templates/modals/add_tags.html b/var/www/templates/modals/add_tags.html new file mode 100644 index 00000000..98cb5479 --- /dev/null +++ b/var/www/templates/modals/add_tags.html @@ -0,0 +1,131 @@ + + + From c8d5ce9a28b0fc59d81266332bd65f224ff9348f Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 5 Nov 2019 15:18:03 +0100 Subject: [PATCH 08/14] chg: [core] mv bin/packages/config.cfg configs/core.cfg + use ConfigLoader --- .gitignore | 4 +- HOWTO.md | 9 +- bin/Helper.py | 6 +- bin/LAUNCH.sh | 2 +- bin/MISP_The_Hive_feeder.py | 36 +- bin/Mixer.py | 39 +- bin/ModuleInformation.py | 354 ------------------ bin/ModulesInformationV2.py | 21 +- bin/NotificationHelper.py | 27 +- bin/Queues_Monitoring.py | 57 --- bin/Repartition_graph.py | 97 ----- bin/SentimentAnalysis.py | 28 +- bin/Shutdown.py | 68 ---- bin/TermTrackerMod.py | 3 - bin/Update-conf.py | 6 +- bin/ailleakObject.py | 13 +- bin/feeder/pystemon-feeder.py | 25 +- bin/indexer_lookup.py | 12 +- bin/lib/ConfigLoader.py | 6 + bin/packages/Correlation.py | 2 +- bin/packages/HiddenServices.py | 44 +-- bin/packages/Import_helper.py | 10 +- bin/packages/Item.py | 13 +- bin/packages/Paste.py | 39 +- bin/packages/Term.py | 13 +- bin/packages/User.py | 21 +- bin/packages/lib_refine.py | 18 +- bin/submit_paste.py | 53 +-- bin/torcrawler/tor_crawler.py | 20 +- bin/update-background.py | 21 +- .../core.cfg.sample | 0 installing_deps.sh | 4 +- update/default_update/Update.py | 19 +- update/v1.5/Update-ARDB_Metadata.py | 53 +-- update/v1.5/Update-ARDB_Onions.py | 46 +-- update/v1.5/Update-ARDB_Onions_screenshots.py | 51 +-- update/v1.5/Update-ARDB_Tags.py | 54 +-- update/v1.5/Update-ARDB_Tags_background.py | 39 +- update/v1.5/Update.py | 28 +- update/v1.7/Update.py | 21 +- update/v2.0/Update.py | 21 +- update/v2.2/Update.py | 28 +- var/www/Flask_server.py | 1 - var/www/create_default_user.py | 22 +- var/www/modules/Role_Manager.py | 23 +- .../crawler/crawler_splash/showDomain.html | 22 +- 46 files changed, 323 insertions(+), 1176 deletions(-) delete mode 100755 bin/ModuleInformation.py delete mode 100755 bin/Queues_Monitoring.py delete mode 100755 bin/Repartition_graph.py delete mode 100755 bin/Shutdown.py rename bin/packages/config.cfg.sample => configs/core.cfg.sample (100%) diff --git a/.gitignore b/.gitignore index fe1b29a7..7a56b361 100644 --- a/.gitignore +++ b/.gitignore @@ -35,9 +35,9 @@ var/www/server.crt var/www/server.key # Local config -bin/packages/config.cfg -bin/packages/config.cfg.backup configs/keys +configs/core.cfg +configs/core.cfg.backup configs/update.cfg update/current_version files diff --git a/HOWTO.md b/HOWTO.md index 00b7017c..9e72c77e 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -25,7 +25,7 @@ Feed data to AIL: 3. Launch pystemon ``` ./pystemon ``` -4. Edit your configuration file ```bin/packages/config.cfg``` and modify the pystemonpath path accordingly +4. Edit your configuration file ```configs/core.cfg``` and modify the pystemonpath path accordingly 5. Launch pystemon-feeder ``` ./bin/feeder/pystemon-feeder.py ``` @@ -123,7 +123,7 @@ There are two types of installation. You can install a *local* or a *remote* Spl (for a linux docker, the localhost IP is *172.17.0.1*; Should be adapted for other platform) - Restart the tor proxy: ``sudo service tor restart`` -3. *(AIL host)* Edit the ``/bin/packages/config.cfg`` file: +3. *(AIL host)* Edit the ``/configs/core.cfg`` file: - In the crawler section, set ``activate_crawler`` to ``True`` - Change the IP address of Splash servers if needed (remote only) - Set ``splash_onion_port`` according to your Splash servers port numbers that will be used. @@ -134,7 +134,7 @@ There are two types of installation. You can install a *local* or a *remote* Spl - *(Splash host)* Launch all Splash servers with: ```sudo ./bin/torcrawler/launch_splash_crawler.sh -f -p -n ``` -With ```` and ```` matching those specified at ``splash_onion_port`` in the configuration file of point 3 (``/bin/packages/config.cfg``) +With ```` and ```` matching those specified at ``splash_onion_port`` in the configuration file of point 3 (``/configs/core.cfg``) All Splash dockers are launched inside the ``Docker_Splash`` screen. You can use ``sudo screen -r Docker_Splash`` to connect to the screen session and check all Splash servers status. @@ -148,7 +148,7 @@ All Splash dockers are launched inside the ``Docker_Splash`` screen. You can use - ```crawler_hidden_services_install.sh -y``` - Add the following line in ``SOCKSPolicy accept 172.17.0.0/16`` in ``/etc/tor/torrc`` - ```sudo service tor restart``` -- set activate_crawler to True in ``/bin/packages/config.cfg`` +- set activate_crawler to True in ``/configs/core.cfg`` #### Start - ```sudo ./bin/torcrawler/launch_splash_crawler.sh -f $AIL_HOME/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 1``` @@ -166,4 +166,3 @@ Then starting the crawler service (if you follow the procedure above) ##### Python 3 Upgrade To upgrade from an existing AIL installation, you have to launch [python3_upgrade.sh](./python3_upgrade.sh), this script will delete and create a new virtual environment. The script **will upgrade the packages but won't keep your previous data** (neverthless the data is copied into a directory called `old`). If you install from scratch, you don't require to launch the [python3_upgrade.sh](./python3_upgrade.sh). - diff --git a/bin/Helper.py b/bin/Helper.py index 2942b415..cda26ce5 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -20,10 +20,10 @@ import json -class PubSub(object): +class PubSub(object): ## TODO: remove config, use ConfigLoader by default def __init__(self): - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + configfile = os.path.join(os.environ['AIL_HOME'], 'configs/core.cfg') if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ @@ -111,7 +111,7 @@ def subscribe(self): class Process(object): def __init__(self, conf_section, module=True): - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + configfile = os.path.join(os.environ['AIL_HOME'], 'configs/core.cfg') if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index d87ef21e..71621a8e 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -218,7 +218,7 @@ function launching_scripts { function launching_crawler { if [[ ! $iscrawler ]]; then - CONFIG=$AIL_BIN/packages/config.cfg + CONFIG=$AIL_HOME/configs/core.cfg lport=$(awk '/^\[Crawler\]/{f=1} f==1&&/^splash_port/{print $3;exit}' "${CONFIG}") IFS='-' read -ra PORTS <<< "$lport" diff --git a/bin/MISP_The_Hive_feeder.py b/bin/MISP_The_Hive_feeder.py index 2bc33431..33a8841f 100755 --- a/bin/MISP_The_Hive_feeder.py +++ b/bin/MISP_The_Hive_feeder.py @@ -8,20 +8,20 @@ This module send tagged pastes to MISP or THE HIVE Project """ - -import redis -import sys import os +import sys +import uuid +import redis import time import json -import configparser from pubsublogger import publisher from Helper import Process from packages import Paste import ailleakObject -import uuid +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader from pymisp import PyMISP @@ -133,26 +133,10 @@ def feeder(message, count=0): config_section = 'MISP_The_hive_feeder' - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) + config_loader = ConfigLoader.ConfigLoader() - r_serv_db = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - - r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") # set sensor uuid uuid_ail = r_serv_db.get('ail:uuid') @@ -212,7 +196,9 @@ def feeder(message, count=0): refresh_time = 3 ## FIXME: remove it - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + config_loader = None + time_1 = time.time() while True: diff --git a/bin/Mixer.py b/bin/Mixer.py index cbb39676..fd8bedc5 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -29,16 +29,20 @@ The mapping can be done via the variable FEED_QUEUE_MAPPING """ +import os +import sys + import base64 import hashlib -import os import time from pubsublogger import publisher import redis -import configparser from Helper import Process +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + # CONFIG # refresh_time = 30 @@ -52,37 +56,22 @@ p = Process(config_section) - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) + config_loader = ConfigLoader.ConfigLoader() # REDIS # - server = redis.StrictRedis( - host=cfg.get("Redis_Mixer_Cache", "host"), - port=cfg.getint("Redis_Mixer_Cache", "port"), - db=cfg.getint("Redis_Mixer_Cache", "db"), - decode_responses=True) - - server_cache = redis.StrictRedis( - host=cfg.get("Redis_Log_submit", "host"), - port=cfg.getint("Redis_Log_submit", "port"), - db=cfg.getint("Redis_Log_submit", "db"), - decode_responses=True) + server = config_loader.get_redis_conn("Redis_Mixer_Cache") + server_cache = config_loader.get_redis_conn("Redis_Log_submit") # LOGGING # publisher.info("Feed Script started to receive & publish.") # OTHER CONFIG # - operation_mode = cfg.getint("Module_Mixer", "operation_mode") - ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate") - default_unnamed_feed_name = cfg.get("Module_Mixer", "default_unnamed_feed_name") + operation_mode = config_loader.get_config_int("Module_Mixer", "operation_mode") + ttl_key = config_loader.get_config_int("Module_Mixer", "ttl_duplicate") + default_unnamed_feed_name = config_loader.get_config_str("Module_Mixer", "default_unnamed_feed_name") - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + '/' + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' + config_loader = None # STATS # processed_paste = 0 diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py deleted file mode 100755 index 807cb87e..00000000 --- a/bin/ModuleInformation.py +++ /dev/null @@ -1,354 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -''' - -This module can be use to see information of running modules. -These information are logged in "logs/moduleInfo.log" - -It can also try to manage them by killing inactive one. -However, it does not support mutliple occurence of the same module -(It will kill the first one obtained by get) - - -''' - -import time -import datetime -import redis -import os -import signal -import argparse -from subprocess import PIPE, Popen -import configparser -import json -from terminaltables import AsciiTable -import textwrap -from colorama import Fore, Back, Style, init -import curses - -# CONFIG VARIABLES -kill_retry_threshold = 60 #1m -log_filename = "../logs/moduleInfo.log" -command_search_pid = "ps a -o pid,cmd | grep {}" -command_search_name = "ps a -o pid,cmd | grep {}" -command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\"" - -init() #Necesary for colorama -printarrayGlob = [None]*14 -printarrayGlob.insert(0, ["Time", "Module", "PID", "Action"]) -lastTimeKillCommand = {} - -#Curses init -#stdscr = curses.initscr() -#curses.cbreak() -#stdscr.keypad(1) - -# GLOBAL -last_refresh = 0 - - -def getPid(module): - p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) - for line in p.stdout: - print(line) - splittedLine = line.split() - if 'python2' in splittedLine: - return int(splittedLine[0]) - return None - -def clearRedisModuleInfo(): - for k in server.keys("MODULE_*"): - server.delete(k) - inst_time = datetime.datetime.fromtimestamp(int(time.time())) - printarrayGlob.insert(1, [inst_time, "*", "-", "Cleared redis module info"]) - printarrayGlob.pop() - -def cleanRedis(): - for k in server.keys("MODULE_TYPE_*"): - moduleName = k[12:].split('_')[0] - for pid in server.smembers(k): - flag_pid_valid = False - proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) - for line in proc.stdout: - splittedLine = line.split() - if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine: - flag_pid_valid = True - - if not flag_pid_valid: - print(flag_pid_valid, 'cleaning', pid, 'in', k) - server.srem(k, pid) - inst_time = datetime.datetime.fromtimestamp(int(time.time())) - printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k]) - printarrayGlob.pop() - #time.sleep(5) - - -def kill_module(module, pid): - print('') - print('-> trying to kill module:', module) - - if pid is None: - print('pid was None') - printarrayGlob.insert(1, [0, module, pid, "PID was None"]) - printarrayGlob.pop() - pid = getPid(module) - else: #Verify that the pid is at least in redis - if server.exists("MODULE_"+module+"_"+str(pid)) == 0: - return - - lastTimeKillCommand[pid] = int(time.time()) - if pid is not None: - try: - os.kill(pid, signal.SIGUSR1) - except OSError: - print(pid, 'already killed') - inst_time = datetime.datetime.fromtimestamp(int(time.time())) - printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"]) - printarrayGlob.pop() - return - time.sleep(1) - if getPid(module) is None: - print(module, 'has been killed') - print('restarting', module, '...') - p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) - inst_time = datetime.datetime.fromtimestamp(int(time.time())) - printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) - printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"]) - printarrayGlob.pop() - printarrayGlob.pop() - - else: - print('killing failed, retrying...') - inst_time = datetime.datetime.fromtimestamp(int(time.time())) - printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."]) - printarrayGlob.pop() - - time.sleep(1) - os.kill(pid, signal.SIGUSR1) - time.sleep(1) - if getPid(module) is None: - print(module, 'has been killed') - print('restarting', module, '...') - p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) - inst_time = datetime.datetime.fromtimestamp(int(time.time())) - printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) - printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"]) - printarrayGlob.pop() - printarrayGlob.pop() - else: - print('killing failed!') - inst_time = datetime.datetime.fromtimestamp(int(time.time())) - printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"]) - printarrayGlob.pop() - else: - print('Module does not exist') - inst_time = datetime.datetime.fromtimestamp(int(time.time())) - printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"]) - printarrayGlob.pop() - #time.sleep(5) - cleanRedis() - -def get_color(time, idle): - if time is not None: - temp = time.split(':') - time = int(temp[0])*3600 + int(temp[1])*60 + int(temp[2]) - - if time >= args.treshold: - if not idle: - return Back.RED + Style.BRIGHT - else: - return Back.MAGENTA + Style.BRIGHT - elif time > args.treshold/2: - return Back.YELLOW + Style.BRIGHT - else: - return Back.GREEN + Style.BRIGHT - else: - return Style.RESET_ALL - -def waiting_refresh(): - global last_refresh - if time.time() - last_refresh < args.refresh: - return False - else: - last_refresh = time.time() - return True - - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.') - parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate') - parser.add_argument('-t', '--treshold', type=int, required=False, default=60*10*1, help='Refresh rate') - parser.add_argument('-k', '--autokill', type=int, required=False, default=0, help='Enable auto kill option (1 for TRUE, anything else for FALSE)') - parser.add_argument('-c', '--clear', type=int, required=False, default=0, help='Clear the current module information (Used to clear data from old launched modules)') - - args = parser.parse_args() - - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - - # REDIS # - server = redis.StrictRedis( - host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db"), - decode_responses=True) - - if args.clear == 1: - clearRedisModuleInfo() - - lastTime = datetime.datetime.now() - - module_file_array = set() - no_info_modules = {} - path_allmod = os.path.join(os.environ['AIL_HOME'], 'doc/all_modules.txt') - with open(path_allmod, 'r') as module_file: - for line in module_file: - module_file_array.add(line[:-1]) - - cleanRedis() - - while True: - if waiting_refresh(): - - #key = '' - #while key != 'q': - # key = stdsrc.getch() - # stdscr.refresh() - - all_queue = set() - printarray1 = [] - printarray2 = [] - printarray3 = [] - for queue, card in server.hgetall("queues").items(): - all_queue.add(queue) - key = "MODULE_" + queue + "_" - keySet = "MODULE_TYPE_" + queue - array_module_type = [] - - for moduleNum in server.smembers(keySet): - value = server.get(key + str(moduleNum)) - if value is not None: - timestamp, path = value.split(", ") - if timestamp is not None and path is not None: - startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) - processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] - - if int(card) > 0: - if int((datetime.datetime.now() - startTime_readable).total_seconds()) > args.treshold: - log = open(log_filename, 'a') - log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n") - try: - last_kill_try = time.time() - lastTimeKillCommand[moduleNum] - except KeyError: - last_kill_try = kill_retry_threshold+1 - if args.autokill == 1 and last_kill_try > kill_retry_threshold : - kill_module(queue, int(moduleNum)) - - array_module_type.append([get_color(processed_time_readable, False) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, False)]) - - else: - printarray2.append([get_color(processed_time_readable, True) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, True)]) - array_module_type.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) - for e in array_module_type: - printarray1.append(e) - - for curr_queue in module_file_array: - if curr_queue not in all_queue: - printarray3.append([curr_queue, "Not running"]) - else: - if len(list(server.smembers('MODULE_TYPE_'+curr_queue))) == 0: - if curr_queue not in no_info_modules: - no_info_modules[curr_queue] = int(time.time()) - printarray3.append([curr_queue, "No data"]) - else: - #If no info since long time, try to kill - if args.autokill == 1: - if int(time.time()) - no_info_modules[curr_queue] > args.treshold: - kill_module(curr_queue, None) - no_info_modules[curr_queue] = int(time.time()) - printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"]) - else: - printarray3.append([curr_queue, "Stuck or idle, restarting disabled"]) - - ## FIXME To add: - ## Button KILL Process using Curses - - printarray1.sort(key=lambda x: x[0][9:], reverse=False) - printarray2.sort(key=lambda x: x[0][9:], reverse=False) - printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) - printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) - printarray3.insert(0,["Queue", "State"]) - - os.system('clear') - t1 = AsciiTable(printarray1, title="Working queues") - t1.column_max_width(1) - if not t1.ok: - longest_col = t1.column_widths.index(max(t1.column_widths)) - max_length_col = t1.column_max_width(longest_col) - if max_length_col > 0: - for i, content in enumerate(t1.table_data): - if len(content[longest_col]) > max_length_col: - temp = '' - for l in content[longest_col].splitlines(): - if len(l) > max_length_col: - temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n' - else: - temp += l + '\n' - content[longest_col] = temp.strip() - t1.table_data[i] = content - - t2 = AsciiTable(printarray2, title="Idling queues") - t2.column_max_width(1) - if not t2.ok: - longest_col = t2.column_widths.index(max(t2.column_widths)) - max_length_col = t2.column_max_width(longest_col) - if max_length_col > 0: - for i, content in enumerate(t2.table_data): - if len(content[longest_col]) > max_length_col: - temp = '' - for l in content[longest_col].splitlines(): - if len(l) > max_length_col: - temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n' - else: - temp += l + '\n' - content[longest_col] = temp.strip() - t2.table_data[i] = content - - t3 = AsciiTable(printarray3, title="Not running queues") - t3.column_max_width(1) - - printarray4 = [] - for elem in printarrayGlob: - if elem is not None: - printarray4.append(elem) - - t4 = AsciiTable(printarray4, title="Last actions") - t4.column_max_width(1) - - legend_array = [["Color", "Meaning"], [Back.RED+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+Style.RESET_ALL], [Back.MAGENTA+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+" while idle"+Style.RESET_ALL], [Back.YELLOW+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold/2)+Style.RESET_ALL], [Back.GREEN+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time <" +str(args.treshold)]] - legend = AsciiTable(legend_array, title="Legend") - legend.column_max_width(1) - - print(legend.table) - print('\n') - print(t1.table) - print('\n') - print(t2.table) - print('\n') - print(t3.table) - print('\n') - print(t4.table9) - - if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: - lastTime = datetime.datetime.now() - cleanRedis() - #time.sleep(args.refresh) diff --git a/bin/ModulesInformationV2.py b/bin/ModulesInformationV2.py index cef6301c..def7509f 100755 --- a/bin/ModulesInformationV2.py +++ b/bin/ModulesInformationV2.py @@ -10,13 +10,16 @@ from asciimatics.event import KeyboardEvent, MouseEvent import sys, os import time, datetime -import argparse, configparser +import argparse import json import redis import psutil from subprocess import PIPE, Popen from packages import Paste +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + # CONFIG VARIABLES kill_retry_threshold = 60 #1m log_filename = "../logs/moduleInfo.log" @@ -798,21 +801,11 @@ def demo(screen): args = parser.parse_args() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) + config_loader = ConfigLoader.ConfigLoader() # REDIS # - server = redis.StrictRedis( - host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db"), - decode_responses=True) + server = config_loader.get_redis_conn("Redis_Queues") + config_loader = None if args.clear == 1: clearRedisModuleInfo() diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index 4007e56f..02568a1e 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -1,39 +1,34 @@ #!/usr/bin/env python3 # -*-coding:UTF-8 -* +import os +import sys + import argparse -import configparser import traceback -import os import smtplib from pubsublogger import publisher from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + """ This module allows the global configuration and management of notification settings and methods. """ -# CONFIG # -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') +config_loader = ConfigLoader.ConfigLoader() publisher.port = 6380 publisher.channel = "Script" def sendEmailNotification(recipient, alert_name, content): - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv?') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - - sender = cfg.get("Notifications", "sender") - sender_host = cfg.get("Notifications", "sender_host") - sender_port = cfg.getint("Notifications", "sender_port") - sender_pw = cfg.get("Notifications", "sender_pw") + sender = config_loader.get_config_str("Notifications", "sender") + sender_host = config_loader.get_config_str("Notifications", "sender_host") + sender_port = config_loader.get_config_int("Notifications", "sender_port") + sender_pw = config_loader.get_config_str("Notifications", "sender_pw") if sender_pw == 'None': sender_pw = None diff --git a/bin/Queues_Monitoring.py b/bin/Queues_Monitoring.py deleted file mode 100755 index 3f0462ab..00000000 --- a/bin/Queues_Monitoring.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import redis -import argparse -import configparser -import time -import os -from pubsublogger import publisher -import texttable - - -def main(): - """Main Function""" - - # CONFIG # - cfg = configparser.ConfigParser() - cfg.read('./packages/config.cfg') - - # SCRIPT PARSER # - parser = argparse.ArgumentParser( - description='''This script is a part of the Assisted Information Leak framework.''', - epilog='''''') - - parser.add_argument('-db', type=int, default=0, - help='The name of the Redis DB (default 0)', - choices=[0, 1, 2, 3, 4], action='store') - - # REDIS # - r_serv = redis.StrictRedis( - host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db"), - decode_responses=True) - - # LOGGING # - publisher.port = 6380 - publisher.channel = "Queuing" - - while True: - table = texttable.Texttable() - table.header(["Queue name", "#Items"]) - row = [] - for queue in r_serv.smembers("queues"): - current = r_serv.llen(queue) - current = current - r_serv.llen(queue) - row.append((queue, r_serv.llen(queue))) - - time.sleep(0.5) - row.sort() - table.add_rows(row, header=False) - os.system('clear') - print(table.draw()) - - -if __name__ == "__main__": - main() diff --git a/bin/Repartition_graph.py b/bin/Repartition_graph.py deleted file mode 100755 index 5aa146a2..00000000 --- a/bin/Repartition_graph.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/python3 -# -*-coding:UTF-8 -* - -import redis -import argparse -import configparser -from datetime import datetime -from pubsublogger import publisher - -import matplotlib.pyplot as plt - - -def main(): - """Main Function""" - - # CONFIG # - cfg = configparser.ConfigParser() - cfg.read('./packages/config.cfg') - - # SCRIPT PARSER # - parser = argparse.ArgumentParser( - description='''This script is a part of the Analysis Information Leak framework.''', - epilog='''''') - - parser.add_argument('-f', type=str, metavar="filename", default="figure", - help='The absolute path name of the "figure.png"', - action='store') - parser.add_argument('-y', '--year', type=int, required=False, default=None, help='The date related to the DB') - - args = parser.parse_args() - - # REDIS # - # port generated automatically depending on the date - curYear = datetime.now().year if args.year is None else args.year - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_Hashs", "host"), - port=cfg.getint("ARDB_Hashs", "port"), - db=curYear, - decode_responses=True) - - # LOGGING # - publisher.port = 6380 - publisher.channel = "Graph" - - # FUNCTIONS # - publisher.info("""Creating the Repartition Graph""") - - total_list = [] - codepad_list = [] - pastie_list = [] - pastebin_list = [] - for hash in r_serv.keys(): - total_list.append(r_serv.scard(hash)) - - code = 0 - pastie = 0 - pastebin = 0 - for paste in r_serv.smembers(hash): - source = paste.split("/")[5] - - if source == "codepad.org": - code = code + 1 - elif source == "pastie.org": - pastie = pastie + 1 - elif source == "pastebin.com": - pastebin = pastebin + 1 - - codepad_list.append(code) - pastie_list.append(pastie) - pastebin_list.append(pastebin) - - codepad_list.sort(reverse=True) - pastie_list.sort(reverse=True) - pastebin_list.sort(reverse=True) - - total_list.sort(reverse=True) - - plt.plot(codepad_list, 'b', label='Codepad.org') - plt.plot(pastebin_list, 'g', label='Pastebin.org') - plt.plot(pastie_list, 'y', label='Pastie.org') - plt.plot(total_list, 'r', label='Total') - - plt.xscale('log') - plt.xlabel('Hashs') - plt.ylabel('Occur[Hash]') - plt.title('Repartition') - plt.legend() - plt.grid() - plt.tight_layout() - - plt.savefig(args.f+".png", dpi=None, facecolor='w', edgecolor='b', - orientation='portrait', papertype=None, format="png", - transparent=False, bbox_inches=None, pad_inches=0.1, - frameon=True) - -if __name__ == "__main__": - main() diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index 1305fb4f..fc9a2f4c 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -14,6 +14,8 @@ Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. """ +import os +import sys import time import datetime @@ -24,6 +26,9 @@ from Helper import Process from packages import Paste +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + from nltk.sentiment.vader import SentimentIntensityAnalyzer from nltk import tokenize @@ -32,19 +37,6 @@ size_threshold = 250 line_max_length_threshold = 1000 -import os -import configparser - -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - -cfg = configparser.ConfigParser() -cfg.read(configfile) - -sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") #time_clean_sentiment_db = 60*60 def Analyse(message, server): @@ -151,12 +143,12 @@ def timeout_handler(signum, frame): # Sent to the logging a description of the module publisher.info("") + config_loader = ConfigLoader.ConfigLoader() + sentiment_lexicon_file = config_loader.get_config_str("Directories", "sentiment_lexicon_file") + # REDIS_LEVEL_DB # - server = redis.StrictRedis( - host=p.config.get("ARDB_Sentiment", "host"), - port=p.config.get("ARDB_Sentiment", "port"), - db=p.config.get("ARDB_Sentiment", "db"), - decode_responses=True) + server = config_loader.get_redis_conn("ARDB_Sentiment") + config_loader = None time1 = time.time() diff --git a/bin/Shutdown.py b/bin/Shutdown.py deleted file mode 100755 index 609b257a..00000000 --- a/bin/Shutdown.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -The ZMQ_Feed_Q Module -===================== - -This module is consuming the Redis-list created by the ZMQ_Feed_Q Module, -And save the paste on disk to allow others modules to work on them. - -..todo:: Be able to choose to delete or not the saved paste after processing. -..todo:: Store the empty paste (unprocessed) somewhere in Redis. - -..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put -the same Subscriber name in both of them. - -Requirements ------------- - -*Need running Redis instances. -*Need the ZMQ_Feed_Q Module running to be able to work properly. - -""" -import redis -import configparser -import os - -configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg') - - -def main(): - """Main Function""" - - # CONFIG # - cfg = configparser.ConfigParser() - cfg.read(configfile) - - # REDIS - r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db"), - decode_responses=True) - - # FIXME: automatic based on the queue name. - # ### SCRIPTS #### - r_serv.sadd("SHUTDOWN_FLAGS", "Feed") - r_serv.sadd("SHUTDOWN_FLAGS", "Categ") - r_serv.sadd("SHUTDOWN_FLAGS", "Lines") - r_serv.sadd("SHUTDOWN_FLAGS", "Tokenize") - r_serv.sadd("SHUTDOWN_FLAGS", "Attributes") - r_serv.sadd("SHUTDOWN_FLAGS", "Creditcards") - r_serv.sadd("SHUTDOWN_FLAGS", "Duplicate") - r_serv.sadd("SHUTDOWN_FLAGS", "Mails") - r_serv.sadd("SHUTDOWN_FLAGS", "Onion") - r_serv.sadd("SHUTDOWN_FLAGS", "Urls") - - r_serv.sadd("SHUTDOWN_FLAGS", "Feed_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Categ_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Lines_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Tokenize_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Attributes_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Creditcards_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Duplicate_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Mails_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Onion_Q") - r_serv.sadd("SHUTDOWN_FLAGS", "Urls_Q") - -if __name__ == "__main__": - main() diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py index 2ca05628..ce4dcf20 100755 --- a/bin/TermTrackerMod.py +++ b/bin/TermTrackerMod.py @@ -18,9 +18,6 @@ from packages import Item from packages import Term -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) -import Flask_config - full_item_url = "/showsavedpaste/?paste=" mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}" diff --git a/bin/Update-conf.py b/bin/Update-conf.py index aa8575d6..32d557fb 100755 --- a/bin/Update-conf.py +++ b/bin/Update-conf.py @@ -68,9 +68,9 @@ def main(): #------------------------------------------------------------------------------------# - config_file_default = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - config_file_default_sample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample') - config_file_default_backup = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.backup') + config_file_default = os.path.join(os.environ['AIL_HOME'], 'configs/core.cfg') + config_file_default_sample = os.path.join(os.environ['AIL_HOME'], 'configs/core.cfg.sample') + config_file_default_backup = os.path.join(os.environ['AIL_HOME'], 'configs/core.cfg.backup') config_file_update = os.path.join(os.environ['AIL_HOME'], 'configs/update.cfg') config_file_update_sample = os.path.join(os.environ['AIL_HOME'], 'configs/update.cfg.sample') diff --git a/bin/ailleakObject.py b/bin/ailleakObject.py index 111db905..5fbf9f75 100755 --- a/bin/ailleakObject.py +++ b/bin/ailleakObject.py @@ -1,13 +1,18 @@ #!/usr/bin/env python3 # -*-coding:UTF-8 -* +import os +import sys + from pymisp.tools.abstractgenerator import AbstractMISPObjectGenerator -import configparser from packages import Paste import datetime import json from io import BytesIO +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + class AilLeakObject(AbstractMISPObjectGenerator): def __init__(self, uuid_ail, p_source, p_date, p_content, p_duplicate, p_duplicate_number): super(AbstractMISPObjectGenerator, self).__init__('ail-leak') @@ -35,9 +40,9 @@ def __init__(self, pymisp): self.pymisp = pymisp self.currentID_date = None self.eventID_to_push = self.get_daily_event_id() - cfg = configparser.ConfigParser() - cfg.read('./packages/config.cfg') - self.maxDuplicateToPushToMISP = cfg.getint("ailleakObject", "maxDuplicateToPushToMISP") + config_loader = ConfigLoader.ConfigLoader() + self.maxDuplicateToPushToMISP = config_loader.get_config_int("ailleakObject", "maxDuplicateToPushToMISP") + config_loader = None self.attribute_to_tag = None def add_new_object(self, uuid_ail, path, p_source, tag): diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index 5c9f743c..2b06e494 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -17,36 +17,33 @@ # # Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be +import os +import sys import zmq import random -import sys import time import redis import base64 -import os -import configparser -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader -cfg = configparser.ConfigParser() -cfg.read(configfile) +config_loader = ConfigLoader.ConfigLoader() -if cfg.has_option("ZMQ_Global", "bind"): - zmq_url = cfg.get("ZMQ_Global", "bind") +if config_loader.has_option("ZMQ_Global", "bind"): + zmq_url = config_loader.get_config_str("ZMQ_Global", "bind") else: zmq_url = "tcp://127.0.0.1:5556" -pystemonpath = cfg.get("Directories", "pystemonpath") -pastes_directory = cfg.get("Directories", "pastes") +pystemonpath = config_loader.get_config_str("Directories", "pystemonpath") +pastes_directory = config_loader.get_config_str("Directories", "pastes") pastes_directory = os.path.join(os.environ['AIL_HOME'], pastes_directory) base_sleeptime = 0.01 sleep_inc = 0 +config_loader = None + context = zmq.Context() socket = context.socket(zmq.PUB) socket.bind(zmq_url) diff --git a/bin/indexer_lookup.py b/bin/indexer_lookup.py index cb01e3f2..774120be 100644 --- a/bin/indexer_lookup.py +++ b/bin/indexer_lookup.py @@ -10,11 +10,13 @@ # # Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be -import configparser import argparse import gzip import os +import sys +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader def readdoc(path=None): if path is None: @@ -22,13 +24,11 @@ def readdoc(path=None): f = gzip.open(path, 'r') return f.read() -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -cfg = configparser.ConfigParser() -cfg.read(configfile) +config_loader = ConfigLoader.ConfigLoader() # Indexer configuration - index dir and schema setup -indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) -indexertype = cfg.get("Indexer", "type") +indexpath = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Indexer", "path")) +indexertype = config_loader.get_config_str("Indexer", "type") argParser = argparse.ArgumentParser(description='Fulltext search for AIL') argParser.add_argument('-q', action='append', help='query to lookup (one or more)') diff --git a/bin/lib/ConfigLoader.py b/bin/lib/ConfigLoader.py index 51508daa..ade3de49 100755 --- a/bin/lib/ConfigLoader.py +++ b/bin/lib/ConfigLoader.py @@ -46,3 +46,9 @@ def get_config_int(self, section, key_name): def get_config_boolean(self, section, key_name): return self.cfg.getboolean(section, key_name) + + def has_option(self, section, key_name): + return self.cfg.has_option(section, key_name) + + def has_section(self, section): + return self.cfg.has_section(section) diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index 84569724..8150ebf3 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -5,7 +5,7 @@ import sys import redis -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'lib/')) +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader config_loader = ConfigLoader.ConfigLoader() diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index b60c6d99..8ed7372b 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -17,6 +17,7 @@ """ import os +import sys import time import gzip import redis @@ -25,11 +26,12 @@ from io import BytesIO import zipfile -import configparser -import sys sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) from Date import Date +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + class HiddenServices(object): """ This class representing a hiddenServices as an object. @@ -43,27 +45,11 @@ class HiddenServices(object): def __init__(self, domain, type, port=80): - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - self.r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) - - self.r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + self.r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + self.r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' + self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' self.domain = domain self.type = type @@ -71,17 +57,19 @@ def __init__(self, domain, type, port=80): self.tags = {} if type == 'onion' or type == 'regular': - self.paste_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) - self.paste_crawled_directory = os.path.join(self.paste_directory, cfg.get("Directories", "crawled")) - self.paste_crawled_directory_name = cfg.get("Directories", "crawled") - self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot")) + self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + self.paste_crawled_directory = os.path.join(self.paste_directory, config_loader.get_config_str("Directories", "crawled")) + self.paste_crawled_directory_name = config_loader.get_config_str("Directories", "crawled") + self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) self.screenshot_directory_screenshot = os.path.join(self.screenshot_directory, 'screenshot') elif type == 'i2p': - self.paste_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot")) - self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot")) + self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) + self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) else: ## TODO: # FIXME: add error pass + + config_loader = None #def remove_absolute_path_link(self, key, value): # print(key) diff --git a/bin/packages/Import_helper.py b/bin/packages/Import_helper.py index c95c101b..ff15115b 100755 --- a/bin/packages/Import_helper.py +++ b/bin/packages/Import_helper.py @@ -2,13 +2,17 @@ # -*-coding:UTF-8 -* import os +import sys import uuid import redis -import Flask_config +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader -r_serv_db = Flask_config.r_serv_db -r_serv_log_submit = Flask_config.r_serv_log_submit +config_loader = ConfigLoader.ConfigLoader() +r_serv_db = config_loader.get_redis_conn("ARDB_DB") +r_serv_log_submit = config_loader.get_redis_conn("Redis_Log_submit") +config_loader = None def is_valid_uuid_v4(UUID): UUID = UUID.replace('-', '') diff --git a/bin/packages/Item.py b/bin/packages/Item.py index b142ba7b..ff19a19e 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -6,15 +6,18 @@ import gzip import redis -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) -import Flask_config +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Date import Tag -PASTES_FOLDER = Flask_config.PASTES_FOLDER -r_cache = Flask_config.r_cache -r_serv_metadata = Flask_config.r_serv_metadata +config_loader = ConfigLoader.ConfigLoader() +PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' +r_cache = config_loader.get_redis_conn("Redis_Cache") +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +config_loader = None def exist_item(item_id): if os.path.isfile(os.path.join(PASTES_FOLDER, item_id)): diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index 6c464610..ecc3f4e2 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -17,20 +17,22 @@ """ import os +import re +import sys import magic import gzip import redis import operator import string -import re import json -import configparser from io import StringIO -import sys sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) from Date import Date from Hash import Hash +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + from langid.langid import LanguageIdentifier, model from nltk.tokenize import RegexpTokenizer @@ -58,31 +60,12 @@ class Paste(object): def __init__(self, p_path): - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - self.cache = redis.StrictRedis( - host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db"), - decode_responses=True) - self.store = redis.StrictRedis( - host=cfg.get("Redis_Data_Merging", "host"), - port=cfg.getint("Redis_Data_Merging", "port"), - db=cfg.getint("Redis_Data_Merging", "db"), - decode_responses=True) - self.store_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + config_loader = ConfigLoader.ConfigLoader() + self.cache = config_loader.get_redis_conn("Redis_Queues") + self.store = config_loader.get_redis_conn("Redis_Data_Merging") + self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata") + + self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) if self.PASTES_FOLDER not in p_path: self.p_rel_path = p_path self.p_path = os.path.join(self.PASTES_FOLDER, p_path) diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 8cc63c9a..326ade33 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -14,15 +14,20 @@ from nltk.tokenize import RegexpTokenizer from textblob import TextBlob -sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) -import Flask_config +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + from flask import escape import Date import Item -r_serv_term = Flask_config.r_serv_term -email_regex = Flask_config.email_regex +config_loader = ConfigLoader.ConfigLoader() +r_serv_term = config_loader.get_redis_conn("ARDB_Tracker") +config_loader = None + +email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' +email_regex = re.compile(email_regex) special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') special_characters.add('\\s') diff --git a/bin/packages/User.py b/bin/packages/User.py index 829e4205..ef6eba74 100755 --- a/bin/packages/User.py +++ b/bin/packages/User.py @@ -2,9 +2,12 @@ # -*-coding:UTF-8 -* import os +import sys import redis import bcrypt -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader from flask_login import UserMixin @@ -12,20 +15,10 @@ class User(UserMixin): def __init__(self, id): - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) + config_loader = ConfigLoader.ConfigLoader() - self.r_serv_db = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) + self.r_serv_db = config_loader.get_redis_conn("ARDB_DB") + config_loader = None if self.r_serv_db.hexists('user:all', id): self.id = id diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 32f56900..4508879b 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -1,14 +1,20 @@ #!/usr/bin/python3 -import re import os -import configparser +import re +import sys import dns.resolver from pubsublogger import publisher from datetime import timedelta +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +config_loader = ConfigLoader.ConfigLoader() +dns_server = config_loader.get_config_str("Web", "dns") +config_loader = None def is_luhn_valid(card_number): """Apply the Luhn algorithm to validate credit card. @@ -103,14 +109,6 @@ def checking_MX_record(r_serv, adress_set, addr_dns): def checking_A_record(r_serv, domains_set): - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - dns_server = cfg.get("Web", "dns") score = 0 num = len(domains_set) diff --git a/bin/submit_paste.py b/bin/submit_paste.py index 0609f581..cae9c0ed 100755 --- a/bin/submit_paste.py +++ b/bin/submit_paste.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # -*-coding:UTF-8 -* -import configparser import os import sys import gzip @@ -17,6 +16,9 @@ from Helper import Process from pubsublogger import publisher +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + def create_paste(uuid, paste_content, ltags, ltagsgalaxies, name): now = datetime.datetime.now() @@ -154,44 +156,13 @@ def verify_extention_filename(filename): publisher.port = 6380 publisher.channel = "Script" - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - - r_serv_db = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - - r_serv_log_submit = redis.StrictRedis( - host=cfg.get("Redis_Log_submit", "host"), - port=cfg.getint("Redis_Log_submit", "port"), - db=cfg.getint("Redis_Log_submit", "db"), - decode_responses=True) - - r_serv_tags = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), - decode_responses=True) - - r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - serv_statistics = redis.StrictRedis( - host=cfg.get('ARDB_Statistics', 'host'), - port=cfg.getint('ARDB_Statistics', 'port'), - db=cfg.getint('ARDB_Statistics', 'db'), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + r_serv_log_submit = config_loader.get_redis_conn("Redis_Log_submit") + r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") + r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") + serv_statistics = config_loader.get_redis_conn("ARDB_Statistics") expire_time = 120 MAX_FILE_SIZE = 1000000000 @@ -200,7 +171,9 @@ def verify_extention_filename(filename): config_section = 'submit_paste' p = Process(config_section) - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' + + config_loader = None while True: diff --git a/bin/torcrawler/tor_crawler.py b/bin/torcrawler/tor_crawler.py index 2d8365c4..ccb645a0 100755 --- a/bin/torcrawler/tor_crawler.py +++ b/bin/torcrawler/tor_crawler.py @@ -5,29 +5,21 @@ import sys import json import redis -import configparser from TorSplashCrawler import TorSplashCrawler +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + if __name__ == '__main__': if len(sys.argv) != 2: print('usage:', 'tor_crawler.py', 'uuid') exit(1) - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - redis_cache = redis.StrictRedis( - host=cfg.get("Redis_Cache", "host"), - port=cfg.getint("Redis_Cache", "port"), - db=cfg.getint("Redis_Cache", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + redis_cache = config_loader.get_redis_conn("Redis_Cache") + config_loader = None # get crawler config key uuid = sys.argv[1] diff --git a/bin/update-background.py b/bin/update-background.py index 96a1258a..1410cfaa 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -13,23 +13,16 @@ import sys import redis import subprocess -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader if __name__ == "__main__": - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + r_serv = config_loader.get_redis_conn("ARDB_DB") + config_loader = None if r_serv.scard('ail:update_v1.5') != 5: r_serv.delete('ail:update_error') diff --git a/bin/packages/config.cfg.sample b/configs/core.cfg.sample similarity index 100% rename from bin/packages/config.cfg.sample rename to configs/core.cfg.sample diff --git a/installing_deps.sh b/installing_deps.sh index 99325f82..f21852b1 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -81,8 +81,8 @@ pushd ardb/ make popd -if [ ! -f bin/packages/config.cfg ]; then - cp bin/packages/config.cfg.sample bin/packages/config.cfg +if [ ! -f configs/core.cfg ]; then + cp configs/core.cfg.sample configs/core.cfg fi if [ -z "$VIRTUAL_ENV" ]; then diff --git a/update/default_update/Update.py b/update/default_update/Update.py index 3fb511a1..c34f4cd1 100755 --- a/update/default_update/Update.py +++ b/update/default_update/Update.py @@ -9,6 +9,9 @@ import datetime import configparser +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + if __name__ == '__main__': parser = argparse.ArgumentParser(description='AIL default update') parser.add_argument('-t', help='version tag' , type=str, dest='tag', required=True) @@ -23,19 +26,9 @@ start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + r_serv = config_loader.get_redis_conn("ARDB_DB") + config_loader = None #Set current ail version r_serv.set('ail:version', update_tag) diff --git a/update/v1.5/Update-ARDB_Metadata.py b/update/v1.5/Update-ARDB_Metadata.py index 9d198f8d..e8fcb1bb 100755 --- a/update/v1.5/Update-ARDB_Metadata.py +++ b/update/v1.5/Update-ARDB_Metadata.py @@ -5,7 +5,9 @@ import sys import time import redis -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader def update_tracked_terms(main_key, tracked_container_key): for tracked_item in r_serv_term.smembers(main_key): @@ -50,45 +52,16 @@ def update_hash_item(has_type): start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - - r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - r_serv_tag = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), - decode_responses=True) - - r_serv_term = redis.StrictRedis( - host=cfg.get("ARDB_TermFreq", "host"), - port=cfg.getint("ARDB_TermFreq", "port"), - db=cfg.getint("ARDB_TermFreq", "db"), - decode_responses=True) - - r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' + + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") + r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") + r_serv_term = config_loader.get_redis_conn("ARDB_TermFreq") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None r_serv.set('ail:current_background_script', 'metadata') diff --git a/update/v1.5/Update-ARDB_Onions.py b/update/v1.5/Update-ARDB_Onions.py index cb143721..7b4aee99 100755 --- a/update/v1.5/Update-ARDB_Onions.py +++ b/update/v1.5/Update-ARDB_Onions.py @@ -6,7 +6,9 @@ import time import redis import datetime -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader def substract_date(date_from, date_to): date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) @@ -39,39 +41,15 @@ def get_domain_root_from_paste_childrens(item_father, domain): start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - - r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - r_serv_tag = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), - decode_responses=True) - - r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' + + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") + r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None r_serv.set('ail:current_background_script', 'onions') r_serv.set('ail:current_background_script_stat', 0) diff --git a/update/v1.5/Update-ARDB_Onions_screenshots.py b/update/v1.5/Update-ARDB_Onions_screenshots.py index 5aa3cf0f..3327878f 100755 --- a/update/v1.5/Update-ARDB_Onions_screenshots.py +++ b/update/v1.5/Update-ARDB_Onions_screenshots.py @@ -6,10 +6,12 @@ import time import redis import datetime -import configparser from hashlib import sha256 +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + def rreplace(s, old, new, occurrence): li = s.rsplit(old, occurrence) return new.join(li) @@ -28,41 +30,18 @@ def substract_date(date_from, date_to): start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot")) - NEW_SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"), 'screenshot') - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - - r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - r_serv_tag = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), - decode_responses=True) - - r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) + NEW_SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') + + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' + + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") + r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None r_serv.set('ail:current_background_script', 'crawled_screenshot') r_serv.set('ail:current_background_script_stat', 0) diff --git a/update/v1.5/Update-ARDB_Tags.py b/update/v1.5/Update-ARDB_Tags.py index 061a4b4d..f94fc03c 100755 --- a/update/v1.5/Update-ARDB_Tags.py +++ b/update/v1.5/Update-ARDB_Tags.py @@ -5,58 +5,36 @@ import sys import time import redis -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader if __name__ == '__main__': start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() - r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - r_serv_tag = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), - decode_responses=True) - - r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") + r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_important_paste_2018 = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), + host=config_loader.get_config_str("ARDB_Metadata", "host"), + port=config_loader.get_config_int("ARDB_Metadata", "port"), db=2018, decode_responses=True) r_important_paste_2019 = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=2018, + host=config_loader.get_config_str("ARDB_Metadata", "host"), + port=config_loader.get_config_int("ARDB_Metadata", "port"), + db=2019, decode_responses=True) + config_loader = None + r_serv.set('ail:current_background_script', 'tags') r_serv.set('ail:current_background_script_stat', 0) diff --git a/update/v1.5/Update-ARDB_Tags_background.py b/update/v1.5/Update-ARDB_Tags_background.py index fe1ec04b..0e8eef0d 100755 --- a/update/v1.5/Update-ARDB_Tags_background.py +++ b/update/v1.5/Update-ARDB_Tags_background.py @@ -5,7 +5,9 @@ import sys import time import redis -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader def tags_key_fusion(old_item_path_key, new_item_path_key): print('fusion:') @@ -19,33 +21,14 @@ def tags_key_fusion(old_item_path_key, new_item_path_key): start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - - r_serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - r_serv_tag = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.getint("ARDB_Tags", "port"), - db=cfg.getint("ARDB_Tags", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' + + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") + r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") + config_loader = None if r_serv.sismember('ail:update_v1.5', 'tags'): diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py index af8800cf..dee56e44 100755 --- a/update/v1.5/Update.py +++ b/update/v1.5/Update.py @@ -6,33 +6,21 @@ import time import redis import datetime -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader if __name__ == '__main__': start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/' + config_loader = ConfigLoader.ConfigLoader() - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - r_serv_onion = redis.StrictRedis( - host=cfg.get("ARDB_Onion", "host"), - port=cfg.getint("ARDB_Onion", "port"), - db=cfg.getint("ARDB_Onion", "db"), - decode_responses=True) + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None print() print('Updating ARDB_Onion ...') diff --git a/update/v1.7/Update.py b/update/v1.7/Update.py index d2a826e6..b3ebd591 100755 --- a/update/v1.7/Update.py +++ b/update/v1.7/Update.py @@ -6,25 +6,18 @@ import time import redis import datetime -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader if __name__ == '__main__': start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + r_serv = config_loader.get_redis_conn("ARDB_DB") + config_loader = None #Set current ail version r_serv.set('ail:version', 'v1.7') diff --git a/update/v2.0/Update.py b/update/v2.0/Update.py index 4e026e1f..d5533a74 100755 --- a/update/v2.0/Update.py +++ b/update/v2.0/Update.py @@ -6,25 +6,18 @@ import time import redis import datetime -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader if __name__ == '__main__': start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + r_serv = config_loader.get_redis_conn("ARDB_DB") + config_loader = None #Set current ail version r_serv.set('ail:version', 'v2.0') diff --git a/update/v2.2/Update.py b/update/v2.2/Update.py index 6a762f6c..aa2bd3f1 100755 --- a/update/v2.2/Update.py +++ b/update/v2.2/Update.py @@ -7,12 +7,14 @@ import time import redis import datetime -import configparser sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) import Item import Term +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + def rreplace(s, old, new, occurrence): li = s.rsplit(old, occurrence) @@ -23,25 +25,11 @@ def rreplace(s, old, new, occurrence): start_deb = time.time() - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - cfg = configparser.ConfigParser() - cfg.read(configfile) - - r_serv_term_stats = redis.StrictRedis( - host=cfg.get("ARDB_Trending", "host"), - port=cfg.getint("ARDB_Trending", "port"), - db=cfg.getint("ARDB_Trending", "db"), - decode_responses=True) - - r_serv_termfreq = redis.StrictRedis( - host=cfg.get("ARDB_TermFreq", "host"), - port=cfg.getint("ARDB_TermFreq", "port"), - db=cfg.getint("ARDB_TermFreq", "db"), - decode_responses=True) + config_loader = ConfigLoader.ConfigLoader() + + r_serv_term_stats = config_loader.get_redis_conn("ARDB_Trending") + r_serv_termfreq = config_loader.get_redis_conn("ARDB_TermFreq") + config_loader = None r_serv_term_stats.flushdb() diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index a4518a00..63c593b4 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -11,7 +11,6 @@ import random import logging import logging.handlers -import configparser from flask import Flask, render_template, jsonify, request, Request, Response, session, redirect, url_for from flask_login import LoginManager, current_user, login_user, logout_user, login_required diff --git a/var/www/create_default_user.py b/var/www/create_default_user.py index 5bf0e63a..34e07183 100755 --- a/var/www/create_default_user.py +++ b/var/www/create_default_user.py @@ -4,28 +4,18 @@ import os import sys import redis -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) from Role_Manager import create_user_db, edit_user_db, get_default_admin_token, gen_password +config_loader = ConfigLoader.ConfigLoader() - -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') -cfg = configparser.ConfigParser() -cfg.read(configfile) - -r_serv = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) - +r_serv = config_loader.get_redis_conn("ARDB_DB") +config_loader = None if __name__ == "__main__": diff --git a/var/www/modules/Role_Manager.py b/var/www/modules/Role_Manager.py index 2fe5f59f..388c5576 100644 --- a/var/www/modules/Role_Manager.py +++ b/var/www/modules/Role_Manager.py @@ -3,9 +3,12 @@ import os import re +import sys import redis import bcrypt -import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader from functools import wraps from flask_login import LoginManager, current_user, login_user, logout_user, login_required @@ -16,20 +19,10 @@ login_manager.login_view = 'role' # CONFIG # -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - -cfg = configparser.ConfigParser() -cfg.read(configfile) - -r_serv_db = redis.StrictRedis( - host=cfg.get("ARDB_DB", "host"), - port=cfg.getint("ARDB_DB", "port"), - db=cfg.getint("ARDB_DB", "db"), - decode_responses=True) +config_loader = ConfigLoader.ConfigLoader() + +r_serv_db = config_loader.get_redis_conn("ARDB_DB") +config_loader = None default_passwd_file = os.path.join(os.environ['AIL_HOME'], 'DEFAULT_PASSWORD') diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 5ee2bf4d..0d255af4 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -386,15 +386,19 @@
Crawled Items
img.addEventListener("error", img_error); var draw_img = false; -{%if dict_domain['crawler_history']['random_item']['screenshot']%} - var screenshot = "{{dict_domain['crawler_history']['random_item']['screenshot']}}"; - var selected_icon = $("#"+screenshot.replace(/\//g, "")); - selected_icon.addClass("icon_selected"); - selected_icon.removeClass("icon_img"); - - - $("#screenshot_link").attr("href", "screenshot_href + {{dict_domain['crawler_history']['random_item']['id']}}"); - $("#screenshot_link").text("{{dict_domain['crawler_history']['random_item']['link']}}"); +{%if "crawler_history" in dict_domain%} + {%if dict_domain['crawler_history']['random_item']['screenshot']%} + var screenshot = "{{dict_domain['crawler_history']['random_item']['screenshot']}}"; + var selected_icon = $("#"+screenshot.replace(/\//g, "")); + selected_icon.addClass("icon_selected"); + selected_icon.removeClass("icon_img"); + + + $("#screenshot_link").attr("href", "screenshot_href + {{dict_domain['crawler_history']['random_item']['id']}}"); + $("#screenshot_link").text("{{dict_domain['crawler_history']['random_item']['link']}}"); + {%else%} + var screenshot = ""; + {%endif%} {%else%} var screenshot = ""; {%endif%} From 8e8e27b4fc6ca57cc583f9cc94ac4fa864f357bd Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 5 Nov 2019 15:30:26 +0100 Subject: [PATCH 09/14] fix: [tests] add lib path --- tests/testApi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testApi.py b/tests/testApi.py index 4cabd2b8..db65cdec 100644 --- a/tests/testApi.py +++ b/tests/testApi.py @@ -7,6 +7,7 @@ import unittest sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'bin')) sys.path.append(os.environ['AIL_FLASK']) sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) From 881a55665b4bb3904c93eaf0be0ed5c2b0cf2deb Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 5 Nov 2019 16:02:39 +0100 Subject: [PATCH 10/14] chg: [Update] add v2.4 update --- update/v2.4/Update.py | 32 ++++++++++++++++++++++++++++++++ update/v2.4/Update.sh | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100755 update/v2.4/Update.py create mode 100755 update/v2.4/Update.sh diff --git a/update/v2.4/Update.py b/update/v2.4/Update.py new file mode 100755 index 00000000..e45f5241 --- /dev/null +++ b/update/v2.4/Update.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v2.4' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv = config_loader.get_redis_conn("ARDB_DB") + config_loader = None + + #Set current update_in_progress + r_serv.set('ail:update_in_progress', new_version) + r_serv.set('ail:current_background_update', new_version) + + #Set current ail version + r_serv.set('ail:version', new_version) + + #Set current ail version + r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.4/Update.sh b/update/v2.4/Update.sh new file mode 100755 index 00000000..4a6fce05 --- /dev/null +++ b/update/v2.4/Update.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +bash ${AIL_BIN}/LAUNCH.sh -lav & +wait +echo "" + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v2.4/Update.py +wait +echo "" +echo "" + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 From 712de0d88ac18b68ae95d29d9a9b1e0d90b5ca79 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 5 Nov 2019 16:44:10 +0100 Subject: [PATCH 11/14] chg: [Update v2.4] cp config --- .gitignore | 2 ++ bin/update-background.py | 3 +++ update/v2.4/Update.sh | 3 +++ 3 files changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index 7a56b361..ac4837a3 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,8 @@ var/www/server.key # Local config configs/keys +bin/packages/core.cfg +bin/packages/config.cfg.backup configs/core.cfg configs/core.cfg.backup configs/update.cfg diff --git a/bin/update-background.py b/bin/update-background.py index 1410cfaa..6f58d0f5 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -53,3 +53,6 @@ r_serv.delete('ail:current_background_script') r_serv.delete('ail:current_background_script_stat') r_serv.delete('ail:current_background_update') + + if r_serv.scard('ail:update_v2.4') != 1: + pass diff --git a/update/v2.4/Update.sh b/update/v2.4/Update.sh index 4a6fce05..308e2a3c 100755 --- a/update/v2.4/Update.sh +++ b/update/v2.4/Update.sh @@ -23,6 +23,9 @@ bash ${AIL_BIN}/LAUNCH.sh -lav & wait echo "" +cp ${AIL_BIN}/packages/config.cfg ${AIL_HOME}/configs/core.cfg +rm ${AIL_BIN}/packages/config.cfg + echo "" echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT echo "" From 63b3d5a5f928154269b89face248d02d8753a2e0 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 5 Nov 2019 16:53:30 +0100 Subject: [PATCH 12/14] fix: [showDomain] fix url pgp + cryptocurrency --- var/www/templates/crawler/crawler_splash/showDomain.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 0d255af4..11b21440 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -143,7 +143,7 @@

{{ dict_domain['domain'] }} :

  {{ dict_key }}
{{ key_id }}{{ key_id }}
{{ key_id }}{{ key_id }}
- {% if origin_paste_name=='manual' or origin_paste_name=='auto' %} - {{ origin_paste_name }} - {%else%} - {{ origin_paste_name }} - {%endif%}
{% for tag in dict_domain['tags'] %} @@ -96,6 +91,13 @@

{{ dict_domain['domain'] }} :

+
+ {% if dict_domain['origin_item']=='manual' or dict_domain['origin_item']=='auto' %} + {{ dict_domain['origin_item'] }} + {%else%} + Last Origin:
{{ dict_domain['origin_item'] }} + {%endif%} + From 880c351c0c182eac7fdeac14c4fe50b6f127663a Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 8 Nov 2019 16:00:42 +0100 Subject: [PATCH 14/14] chg: [update + show decoded items] add background update --- bin/lib/Domain.py | 26 ++++++- bin/packages/Correlation.py | 44 +++++++++++ bin/packages/Cryptocurrency.py | 3 +- bin/packages/Item.py | 62 +++++++++++++++ bin/packages/Tag.py | 8 +- bin/update-background.py | 21 ++++- update/v2.4/Update.py | 5 ++ update/v2.4/Update_domain.py | 77 +++++++++++++++++++ var/www/blueprints/crawler_splash.py | 2 + var/www/modules/Flask_config.py | 4 +- .../crawler/crawler_splash/showDomain.html | 44 +++++++++++ 11 files changed, 288 insertions(+), 8 deletions(-) create mode 100755 update/v2.4/Update_domain.py diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 962c2e19..9220a9e2 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -17,6 +17,7 @@ import Correlation from Cryptocurrency import cryptocurrency from Pgp import pgp +import Decoded import Item import Tag @@ -197,6 +198,14 @@ def get_domain_pgp(domain, currencies_type=None, get_nb=False): ''' return pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb) +def get_domain_decoded(domain): + ''' + Retun all decoded item of a given domain. + + :param domain: crawled domain + ''' + return Decoded.get_domain_decoded_item(domain) + def get_domain_all_correlation(domain, correlation_type=None, get_nb=False): ''' Retun all correlation of a given domain. @@ -214,6 +223,9 @@ def get_domain_all_correlation(domain, correlation_type=None, get_nb=False): res = get_domain_pgp(domain, get_nb=get_nb) if res: domain_correl['pgp'] = res + res = get_domain_decoded(domain) + if res: + domain_correl['decoded'] = res return domain_correl # TODO: handle port @@ -271,6 +283,12 @@ def __init__(self, domain, port=None): if self.is_domain_up(): self.current_port = sanathyse_port(port, self.domain, self.type) + def get_domain_name(self): + return self.domain + + def get_domain_type(self): + return self.type + def get_current_port(self): return self.current_port @@ -361,10 +379,16 @@ def get_domain_correlation(self): ''' return get_domain_all_correlation(self.domain, get_nb=True) - def get_domain_history_with_status(self): + def get_domain_history(self): ''' Retun the full history of a given domain and port. ''' + return get_domain_history(self.domain, self.type, 80) + + def get_domain_history_with_status(self): + ''' + Retun the full history (with status) of a given domain and port. + ''' return get_domain_history_with_status(self.domain, self.type, 80) def get_domain_items_crawled(self, port=None, epoch=None, items_link=False, item_screenshot=False, item_tag=False): diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index 8150ebf3..b3f33079 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -147,6 +147,50 @@ def get_domain_correlation_dict(self, domain, correlation_type=None, get_nb=Fals dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl]) return dict_correlation + def _get_item_correlation_obj(self, item_id, correlation_type): + ''' + Return correlation of a given item id. + + :param item_id: item id + :type item_id: str + :param correlation_type: correlation type + :type correlation_type: str + + :return: a list of correlation + :rtype: list + ''' + res = r_serv_metadata.smembers('item_{}_{}:{}'.format(self.correlation_name, correlation_type, item_id)) + if res: + return list(res) + else: + return [] + + def get_item_correlation_dict(self, item_id, correlation_type=None, get_nb=False): + ''' + Return all correlation of a given item id. + + :param item_id: item id + :param correlation_type: list of correlation types + :type correlation_type: list, optional + + :return: a dictionnary of all the requested correlations + :rtype: dict + ''' + correlation_type = self.sanythise_correlation_types(correlation_type) + dict_correlation = {} + for correl in correlation_type: + res = self._get_item_correlation_obj(item_id, correl) + if res: + dict_correlation[correl] = res + if get_nb: + dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl]) + return dict_correlation + + + + def save_domain_correlation(self, domain, correlation_type, correlation_value): + r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain), correlation_value) + r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, correlation_value), domain) ######## API EXPOSED ######## diff --git a/bin/packages/Cryptocurrency.py b/bin/packages/Cryptocurrency.py index d83f91dd..bce4ac05 100755 --- a/bin/packages/Cryptocurrency.py +++ b/bin/packages/Cryptocurrency.py @@ -61,6 +61,7 @@ def get_cryptocurrency(request_dict, cryptocurrency_type): return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name) +# # TODO: refractor/move me in Correlation def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address): # create basic medata if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)): @@ -89,7 +90,7 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address) # domain - if Item.is_crawled(item_path): + if Item.is_crawled(item_path): # # TODO: use save_domain_correlation domain = Item.get_item_domain(item_path) r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address) r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain) diff --git a/bin/packages/Item.py b/bin/packages/Item.py index ff19a19e..497a0499 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -8,10 +8,13 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader +import Decoded sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Date import Tag +from Cryptocurrency import cryptocurrency +from Pgp import pgp config_loader = ConfigLoader.ConfigLoader() PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' @@ -126,7 +129,61 @@ def get_item(request_dict): ### ### correlation ### +def get_item_cryptocurrency(item_id, currencies_type=None, get_nb=False): + ''' + Return all cryptocurrencies of a given item. + + :param item_id: item id + :param currencies_type: list of cryptocurrencies type + :type currencies_type: list, optional + ''' + return cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb) + +def get_item_pgp(item_id, currencies_type=None, get_nb=False): + ''' + Return all pgp of a given item. + + :param item_id: item id + :param currencies_type: list of cryptocurrencies type + :type currencies_type: list, optional + ''' + return pgp.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb) + +def get_item_decoded(item_id): + ''' + Return all pgp of a given item. + + :param item_id: item id + :param currencies_type: list of cryptocurrencies type + :type currencies_type: list, optional + ''' + return Decoded.get_item_decoded(item_id) + +def get_item_all_correlation(item_id, correlation_type=None, get_nb=False): + ''' + Retun all correlation of a given item id. + + :param item_id: item id + :type domain: str + + :return: a dict of all correlation for a item id + :rtype: dict + ''' + item_correl = {} + res = get_item_cryptocurrency(item_id, get_nb=get_nb) + if res: + item_correl['cryptocurrency'] = res + res = get_item_pgp(item_id, get_nb=get_nb) + if res: + item_correl['pgp'] = res + res = get_item_decoded(item_id) + if res: + item_correl['decoded'] = res + return item_correl + + +## TODO: REFRACTOR def _get_item_correlation(correlation_name, correlation_type, item_id): res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) if res: @@ -134,18 +191,23 @@ def _get_item_correlation(correlation_name, correlation_type, item_id): else: return [] +## TODO: REFRACTOR def get_item_bitcoin(item_id): return _get_item_correlation('cryptocurrency', 'bitcoin', item_id) +## TODO: REFRACTOR def get_item_pgp_key(item_id): return _get_item_correlation('pgpdump', 'key', item_id) +## TODO: REFRACTOR def get_item_pgp_name(item_id): return _get_item_correlation('pgpdump', 'name', item_id) +## TODO: REFRACTOR def get_item_pgp_mail(item_id): return _get_item_correlation('pgpdump', 'mail', item_id) +## TODO: REFRACTOR def get_item_pgp_correlation(item_id): pass diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index ac5143d1..1e6c9fbc 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -172,8 +172,11 @@ def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"): res_dict['type'] = item_type return (res_dict, 200) +def add_domain_tag(tag, domain, item_date): + r_serv_metadata.sadd('tag:{}'.format(domain), tag) + r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) -def add_item_tag(tag, item_path, item_type="paste"): +def add_item_tag(tag, item_path, item_type="paste", tag_date=None): if item_type=="paste": item_date = int(Item.get_item_date(item_path)) @@ -189,8 +192,7 @@ def add_item_tag(tag, item_path, item_type="paste"): # domain item else: item_date = int(Domain.get_domain_last_check(item_path, r_format="int")) - r_serv_metadata.sadd('tag:{}'.format(item_path), tag) - r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), item_path) + add_domain_tag(tag, item_path, item_date) r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) diff --git a/bin/update-background.py b/bin/update-background.py index 6f58d0f5..7ba51f1c 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -54,5 +54,22 @@ r_serv.delete('ail:current_background_script_stat') r_serv.delete('ail:current_background_update') - if r_serv.scard('ail:update_v2.4') != 1: - pass + if r_serv.get('ail:current_background_update') == 'v2.4': + r_serv.delete('ail:update_error') + r_serv.set('ail:update_in_progress', 'v2.4') + r_serv.set('ail:current_background_update', 'v2.4') + r_serv.set('ail:current_background_script', 'domain update') + + update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v2.4', 'Update_domain.py') + process = subprocess.run(['python' ,update_file]) + + + if int(r_serv.get('ail:current_background_script_stat')) != 100: + r_serv.set('ail:update_error', 'Update v2.4 Failed, please relaunch the bin/update-background.py script') + else: + r_serv.delete('ail:update_in_progress') + r_serv.delete('ail:current_background_script') + r_serv.delete('ail:current_background_script_stat') + r_serv.delete('ail:current_background_update') + r_serv.delete('update:nb_elem_to_convert') + r_serv.delete('update:nb_elem_converted') diff --git a/update/v2.4/Update.py b/update/v2.4/Update.py index e45f5241..53456330 100755 --- a/update/v2.4/Update.py +++ b/update/v2.4/Update.py @@ -19,12 +19,17 @@ config_loader = ConfigLoader.ConfigLoader() r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None #Set current update_in_progress r_serv.set('ail:update_in_progress', new_version) r_serv.set('ail:current_background_update', new_version) + r_serv_onion.sunionstore('domain_update_v2.4', 'full_onion_up', 'full_regular_up') + r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v2.4')) + r_serv.set('update:nb_elem_converted',0) + #Set current ail version r_serv.set('ail:version', new_version) diff --git a/update/v2.4/Update_domain.py b/update/v2.4/Update_domain.py new file mode 100755 index 00000000..584818d8 --- /dev/null +++ b/update/v2.4/Update_domain.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Item +import Tag +from Cryptocurrency import cryptocurrency +from Pgp import pgp + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader +import Decoded +import Domain + +def update_update_stats(): + nb_updated = int(r_serv_db.get('update:nb_elem_converted')) + progress = int((nb_updated * 100) / nb_elem_to_update) + print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress)) + r_serv_db.set('ail:current_background_script_stat', progress) + +def update_domain_by_item(domain_obj, item_id): + domain_name = domain_obj.get_domain_name() + # update domain tags + for tag in Tag.get_item_tags(item_id): + if tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': + Tag.add_domain_tag(tag, domain_name, Item.get_item_date(item_id)) + + # update domain correlation + item_correlation = Item.get_item_all_correlation(item_id) + + for correlation_name in item_correlation: + for correlation_type in item_correlation[correlation_name]: + if correlation_name in ('pgp', 'cryptocurrency'): + for correl_value in item_correlation[correlation_name][correlation_type]: + if correlation_name=='pgp': + pgp.save_domain_correlation(domain_name, correlation_type, correl_value) + if correlation_name=='cryptocurrency': + cryptocurrency.save_domain_correlation(domain_name, correlation_type, correl_value) + if correlation_name=='decoded': + for decoded_item in item_correlation['decoded']: + Decoded.save_domain_decoded(domain_name, decoded_item) + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None + + nb_elem_to_update = int( r_serv_db.get('update:nb_elem_to_convert') ) + + while True: + domain = r_serv_onion.spop('domain_update_v2.4') + if domain is not None: + print(domain) + domain = Domain.Domain(domain) + for domain_history in domain.get_domain_history(): + + domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag + if "items" in domain_item: + for item_dict in domain_item['items']: + update_domain_by_item(domain, item_dict['id']) + + r_serv_db.incr('update:nb_elem_converted') + update_update_stats() + + else: + sys.exit(0) diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 2f142a9c..e1639c30 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -69,5 +69,7 @@ def showDomain(): dict_domain['crawler_history'] = domain.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items']) + print(dict_domain) + return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, modal_add_tags=get_modal_add_tags(dict_domain['domain'], tag_type="domain")) diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 6525cb5e..0619b564 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -86,7 +86,9 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_message': 'An Update is running on the background. Some informations like Tags, screenshot can be', - 'update_warning_message_notice_me': 'missing from the UI.'} + 'update_warning_message_notice_me': 'missing from the UI.'}, + 'v2.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be', + 'update_warning_message_notice_me': 'missing from the UI.'} } UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 8e95dcb5..2ebc09f5 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -102,6 +102,47 @@

{{ dict_domain['domain'] }} :

+ {% if 'decoded' in dict_domain%} +
+
+
+
+
+
+ Decoded   +
{{dict_domain['decoded']|length}}
+
+
+
+ +
+
+
+
+
+ + + + + + + + {% for decoded in dict_domain['decoded']%} + + + + {% endfor %} + +
Decoded
{{ decoded }}
+
+
+
+
+ {% endif %} + + {% if 'pgp' in dict_domain%}
@@ -346,6 +387,9 @@
Crawled Items