-
Notifications
You must be signed in to change notification settings - Fork 282
/
Phone.py
executable file
·96 lines (78 loc) · 3.29 KB
/
Phone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Phone Module
================
This module is consuming the Redis-list created by the Categ module.
It apply phone number regexes on item content and warn if above a threshold.
"""
##################################
# Import External packages
##################################
import os
import sys
import phonenumbers
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.objects.Items import Item
# # TODO: # FIXME: improve regex / filter false positives
class Phone(AbstractModule):
"""
Phone module for AIL framework
"""
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
# REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
def __init__(self, queue=True):
super(Phone, self).__init__(queue=queue)
# Waiting time in seconds between to message processed
self.pending_seconds = 1
def extract(self, obj, content, tag):
extracted = []
phones = self.regex_phone_iter('ZZ', obj.get_global_id(), content)
for phone in phones:
extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
return extracted
def compute(self, message):
item = self.get_obj()
content = item.get_content()
# TODO use language detection to choose the country code ?
results = self.regex_phone_iter('ZZ', item.id, content)
for phone in results:
print(phone[2])
if results:
# TAGS
tag = 'infoleak:automatic-detection="phone-number"'
self.add_message_to_queue(message=tag, queue='Tags')
self.redis_logger.warning(f'{self.obj.get_global_id()} contains {len(phone)} Phone numbers')
# # List of the regex results in the Item, may be null
# results = self.REG_PHONE.findall(content)
#
# # If the list is greater than 4, we consider the Item may contain a list of phone numbers
# if len(results) > 4:
# self.logger.debug(results)
# self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)')
#
# msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
# self.add_message_to_queue(msg, 'Tags')
#
# stats = {}
# for phone_number in results:
# try:
# x = phonenumbers.parse(phone_number, None)
# country_code = x.country_code
# if stats.get(country_code) is None:
# stats[country_code] = 1
# else:
# stats[country_code] = stats[country_code] + 1
# except:
# pass
# for country_code in stats:
# if stats[country_code] > 4:
# self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}')
if __name__ == '__main__':
module = Phone()
module.run()