Skip to content

Commit

Permalink
chg: [vanity] add vanity domain explorer + fix blurred screenshot + f…
Browse files Browse the repository at this point in the history
…ix languages search filter
  • Loading branch information
Terrtia committed Feb 19, 2024
1 parent 495ceea commit f07a4b4
Show file tree
Hide file tree
Showing 14 changed files with 361 additions and 315 deletions.
4 changes: 3 additions & 1 deletion bin/crawlers/Crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def enqueue_capture(self, task_uuid, priority):
# force=force,
# general_timeout_in_sec=120)

# with_favicon = True,
capture_uuid = self.lacus.enqueue(url=url,
depth=task.get_depth(),
user_agent=task.get_user_agent(),
Expand Down Expand Up @@ -274,8 +275,9 @@ def compute(self, capture):
for tag in task.get_tags():
self.domain.add_tag(tag)
self.original_domain.add_history(epoch, root_item=self.root_item)
crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
# crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)

self.domain.update_vanity_cluster()
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
print('capture:', capture.uuid, 'completed')
print('task: ', task.uuid, 'completed')
Expand Down
71 changes: 67 additions & 4 deletions bin/lib/objects/Domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,10 @@ def add_language(self, language):
r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
r_crawler.sadd(f'domain:language:{self.id}', language)

def update_vanity_cluster(self):
if self.get_domain_type() == 'onion':
update_vanity_cluster(self.id)

############################################################################
############################################################################

Expand Down Expand Up @@ -644,10 +648,71 @@ def api_search_domains_by_name(name_to_search, domain_types, meta=False, page=1)
################################################################################
################################################################################

#### Vanity Explorer ####

# TODO ADD ME IN OBJ CLASS
def get_domain_vanity(domain, len_vanity=4):
return domain[:len_vanity]

def get_vanity_clusters(nb_min=4):
return r_crawler.zrange('vanity:onion:4', nb_min, '+inf', byscore=True, withscores=True)

def get_vanity_domains(vanity, len_vanity=4, meta=False):
if len_vanity == 4:
domains = r_crawler.smembers(f'vanity:{int(len_vanity)}:{vanity}')
else:
domains = []
for domain in r_crawler.smembers(f'vanity:4:{vanity[:4]}'):
dom_vanity = get_domain_vanity(domain, len_vanity=len_vanity)
if vanity == dom_vanity:
domains.append(domain)
if meta:
metas = []
for domain in domains:
metas.append(Domain(domain).get_meta(options={'languages', 'screenshot', 'tags_safe'}))
return metas
else:
return domains

def get_vanity_cluster(vanity, len_vanity=4, nb_min=4):
if len_vanity == 4:
return get_vanity_clusters(nb_min=nb_min)
else:
clusters = {}
for domain in get_vanity_domains(vanity[:4], len_vanity=4):
new_vanity = get_domain_vanity(domain, len_vanity=len_vanity)
if vanity not in clusters:
clusters[new_vanity] = 0
clusters[new_vanity] += 1
to_remove = []
for new_vanity in clusters:
if clusters[new_vanity] < nb_min:
to_remove.append(new_vanity)
for new_vanity in to_remove:
del clusters[new_vanity]
return clusters

def get_vanity_nb_domains(vanity, len_vanity=4):
return r_crawler.scard(f'vanity:{int(len_vanity)}:{vanity}')

# TODO BUILD DICTIONARY
def update_vanity_cluster(domain):
vanity = get_domain_vanity(domain, len_vanity=4)
add = r_crawler.sadd(f'vanity:4:{vanity}', domain)
if add == 1:
r_crawler.zadd('vanity:onion:4', {vanity: 1})

def _rebuild_vanity_clusters():
for vanity in r_crawler.zrange('vanity:onion:4', 0, -1):
r_crawler.delete(f'vanity:4:{vanity}')
r_crawler.delete('vanity:onion:4')
for domain in get_domains_up_by_type('onion'):
update_vanity_cluster(domain)

def cluster_onion_domain_vanity(len_vanity=4):
domains = {}
occurrences = {}
for domain in get_domains_up_by_type('web'):
for domain in get_domains_up_by_type('onion'):
start = domain[:len_vanity]
if start not in domains:
domains[start] = []
Expand All @@ -659,8 +724,6 @@ def cluster_onion_domain_vanity(len_vanity=4):
res = dict(sorted(occurrences.items(), key=lambda item: item[1], reverse=True))
print(json.dumps(res))

################################################################################
################################################################################

if __name__ == '__main__':
cluster_onion_domain_vanity(len_vanity=4)
_rebuild_vanity_clusters()
31 changes: 31 additions & 0 deletions var/www/blueprints/crawler_splash.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,37 @@ def domains_search_date_post():
type=domain_type, down=down, up=up))


@crawler_splash.route('/domains/explorer/vanity', methods=['GET'])
@login_required
@login_analyst
def domains_explorer_vanity_clusters():
nb_min = request.args.get('min', 0)
if int(nb_min) < 0:
nb_min = 4
vanity_clusters = Domains.get_vanity_clusters(nb_min=nb_min)
return render_template("explorer_vanity_clusters.html", vanity_clusters=vanity_clusters,
length=4)

@crawler_splash.route('/domains/explorer/vanity/explore', methods=['GET'])
@login_required
@login_analyst
def domains_explorer_vanity_explore():
vanity = request.args.get('vanity')
nb_min = request.args.get('min', 0) # TODO SHOW DOMAINS OPTIONS + HARD CODED DOMAINS LIMIT FOR RENDER
length = len(vanity)
if int(nb_min) < 0:
nb_min = 4
vanity_clusters = Domains.get_vanity_cluster(vanity, len_vanity=length+1, nb_min=nb_min)
vanity_domains = Domains.get_vanity_domains(vanity, len_vanity=length, meta=True)
vanities_tree = []
for i in range(4, length):
vanities_tree.append(vanity[:i])
if length == len(vanity):
vanities_tree.append(vanity)
return render_template("explorer_vanity_domains.html", vanity_clusters=vanity_clusters,
bootstrap_label=bootstrap_label, vanity=vanity, vanities_tree=vanities_tree,
vanity_domains=vanity_domains, length=length)

##-- --##


Expand Down
2 changes: 1 addition & 1 deletion var/www/blueprints/objects_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def screenshot(filename):
abort(404)
filename = filename.replace('/', '')
s = Screenshot(filename)
return send_from_directory(SCREENSHOT_FOLDER, s.get_rel_path(add_extension=True), as_attachment=True)
return send_from_directory(SCREENSHOT_FOLDER, s.get_rel_path(add_extension=True), as_attachment=False, mimetype='image')

@objects_item.route("/object/item")
@login_required
Expand Down
4 changes: 2 additions & 2 deletions var/www/templates/chats_explorer/block_message.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
height: 2px;
background: #eee;
}
.message_image {
.object_image {
max-width: 50%;
filter: blur(5px);
}
Expand Down Expand Up @@ -66,7 +66,7 @@
{% endif %}
{% if message['images'] %}
{% for message_image in message['images'] %}
<img class="message_image mb-1" src="{{ url_for('objects_image.image', filename=message_image)}}">
<img class="object_image mb-1" src="{{ url_for('objects_image.image', filename=message_image)}}">
{% endfor %}
{% endif %}
{% if message['files-names'] %}
Expand Down
103 changes: 2 additions & 101 deletions var/www/templates/crawler/crawler_splash/domain_explorer.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,26 +41,8 @@
</div>
<div class="col-12 col-xl-6">

<div class="card my-2 border-secondary" >
<div class="card-body py-2">
<div class="row">
<div class="col-md-3 text-center">
<button class="btn btn-primary" onclick="blocks.value=0;pixelate_all();">
<i class="fas fa-eye-slash"></i>
<span class="label-icon">Hide</span>
</button>
</div>
<div class="col-md-6">
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="3">
</div>
<div class="col-md-3 text-center">
<button class="btn btn-primary" onclick="blocks.value=50;pixelate_all();">
<i class="fas fa-plus-square"></i>
<span class="label-icon">Full resolution</span>
</button>
</div>
</div>
</div>
<div class="my-3">
{% include 'objects/image/block_blur_img_slider.html' %}
</div>

</div>
Expand Down Expand Up @@ -144,85 +126,4 @@
}
</script>


<script>

// img_url
// ctx
// canevas_id

var dict_canevas_blurr_img = {}

function init_canevas_blurr_img(canevas_id, img_url){

// ctx, turn off image smoothin
dict_canevas_blurr_img[canevas_id] = {}
var canvas_container = document.getElementById(canevas_id);
var ctx = canvas_container.getContext('2d');
ctx.webkitImageSmoothingEnabled = false;
ctx.imageSmoothingEnabled = false;
dict_canevas_blurr_img[canevas_id]["ctx"] = ctx;

// img
dict_canevas_blurr_img[canevas_id]["img"] = new Image();
dict_canevas_blurr_img[canevas_id]["img"].onload = function() {pixelate_img(canevas_id);};
dict_canevas_blurr_img[canevas_id]["img"].addEventListener("error", function() {img_error(canevas_id);});
dict_canevas_blurr_img[canevas_id]["img"].src = img_url;
}

function pixelate_all(){
Object.entries(dict_canevas_blurr_img).forEach(([key, value]) => {
pixelate_img(key);
});
}

function pixelate_img(canevas_id) {

if (typeof canevas_id !== 'undefined') {
var canevas_to_blurr = document.getElementById(canevas_id);

/// use slider value
if( blocks.value == 50 ){
size = 1;
} else {
var size = (blocks.value) * 0.01;
}

canevas_to_blurr.width = dict_canevas_blurr_img[canevas_id]["img"].width;
canevas_to_blurr.height = dict_canevas_blurr_img[canevas_id]["img"].height;

/// cache scaled width and height
w = canevas_to_blurr.width * size;
h = canevas_to_blurr.height * size;

/// draw original image to the scaled size
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(dict_canevas_blurr_img[canevas_id]["img"], 0, 0, w, h);

/// pixelated
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(canevas_to_blurr, 0, 0, w, h, 0, 0, canevas_to_blurr.width, canevas_to_blurr.height);
}

}

function img_error(canevas_id) {
dict_canevas_blurr_img[canevas_id]["img"].onerror=null;
dict_canevas_blurr_img[canevas_id]["img"].src="{{ url_for('static', filename='image/AIL.png') }}";
}

blocks.addEventListener('change', pixelate_all, false);

{% for dict_domain in dict_data['list_elem'] %}
{% if dict_domain['is_tags_safe'] %}
{% if dict_domain['screenshot'] %}
var screenshot_url = "{{ url_for('objects_item.screenshot', filename="") }}{{dict_domain['screenshot']}}";
{% else %}
var screenshot_url = "{{ url_for('static', filename='image/AIL.png') }}";
{% endif %}
init_canevas_blurr_img("canvas_{{loop.index0}}", screenshot_url);
{% endif %}

{% endfor %}

</script>

</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
<!DOCTYPE html>
<html>
<head>
<title>Vanity Explorer - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>

<style>
.card-columns {
column-count: 4;
}
</style>

</head>
<body>

{% include 'nav_bar.html' %}

<div class="container-fluid">
<div class="row">

{% include 'crawler/menu_sidebar.html' %}

<div class="col-12 col-lg-10" id="core_content">

<h1>Vanity Explorer</h1>

<table id="tableClusters" class="table">
<thead class="bg-dark text-white">
<tr>
<th>Vanity</th>
<th>NB Domains</th>
<th></th>
</tr>
</thead>
<tbody style="font-size: 15px;">
{% for row in vanity_clusters %}
<tr>
<td>
<a href="{{ url_for('crawler_splash.domains_explorer_vanity_explore') }}?vanity={{row[0]}}&length={{ length }}">{{ row[0] }}</a>
</td>
<td>{{ row[1] | int }}</td>
<td>

</td>
</tr>
{% endfor %}
</tbody>
</table>


</div>
</div>
</div>
</body>


<script>
$(document).ready(function(){
$('#nav_title_domains_explorer').removeClass("text-muted");
$("#nav_vanity_explorer").addClass("active");
$('#tableClusters').DataTable({
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
"iDisplayLength": 10,
"order": [[ 1, "desc" ]]
});

});

function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>

</html>

0 comments on commit f07a4b4

Please sign in to comment.