Skip to content

Commit

Permalink
draft: implement network size estimator formula
Browse files Browse the repository at this point in the history
  • Loading branch information
bochaco committed Apr 10, 2024
1 parent 7add612 commit 50cfb4a
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 50 deletions.
15 changes: 10 additions & 5 deletions sn_networking/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -817,17 +817,22 @@ impl Network {

/// Using a random address, check if there is a sybil attack around it
pub async fn perform_sybil_attack_check(&self) {
let (random_addr, cid) = {
let random_addr = {
let mut rng = rand::thread_rng();
let cid = XorName::random(&mut rng);
let chunk_addr = ChunkAddress::new(cid);
(NetworkAddress::from_chunk_address(chunk_addr), cid)
NetworkAddress::from_chunk_address(ChunkAddress::new(cid))
};

match self.get_closest_peers(&random_addr, true).await {
Ok(closest_peers) => {
if check_for_sybil_attack(&closest_peers, &cid).await {
info!(">>> Sybil attack detected around xorname: {cid}");
if check_for_sybil_attack(
&closest_peers,
random_addr.as_kbucket_key(),
&BTreeMap::default(),
)
.await
{
info!(">>> Sybil attack detected around addr: {random_addr}");
}
}
Err(err) => error!(">>> Failed to get closes peer to check for sybil attack: {err:?}"),
Expand Down
157 changes: 112 additions & 45 deletions sn_networking/src/sybil.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,81 +6,148 @@
// KIND, either express or implied. Please review the Licences for the specific language governing
// permissions and limitations relating to use of the SAFE Network Software.

use libp2p::PeerId;
use std::collections::{BTreeMap, HashMap};

use itertools::Itertools;
use libp2p::{
kad::{KBucketKey, K_VALUE},
PeerId,
};
use num::{integer::binomial, pow::Pow};
use xor_name::{XorName, XOR_NAME_LEN};

// Threshold to determine if there is an attack using Kullback-Liebler (KL) divergence
// between model peer ids distribution vs. actual distribution around any point in the address space.
const KL_DIVERGENCE_THRESHOLD: f64 = 10f64; // TODO: find a good value
const KL_DIVERGENCE_THRESHOLD: f64 = 10f64; // TODO: find a proper value

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment

const ITERATIONS_FOR_NET_SIZE_ESTIMATION: usize = 50;

const K: usize = 20;
const N: usize = 25; // TODO: replace with network size estimation;
// The container maps each random KAD Key to the ordered list
// of its K_VALUE closest peers, sorted by increasing distance. This order
// is a prerequisite for the functions this container is used by,
// i.e. their result is dependant on the correct ordering of these values.
pub(super) type RandomKeysAndClosestPeerIds = BTreeMap<KBucketKey<Vec<u8>>, Vec<PeerId>>;

// Given the set of closest K peers ids to the passed content address, return 'true'
// if there is probabilistically a sybil attack around that CID address.
pub(super) async fn check_for_sybil_attack(peers: &[PeerId], cid: &XorName) -> bool {
// TODO: do we go ahead even if we don't have at least K peer ids...?
info!(
">>> CHECKING SYBIL ATTACK WITH {} PEERS: {peers:?}",
peers.len()
);
let q = num_peers_per_cpl(peers, cid);
let n = get_net_size_estimate();
let p = compute_model_distribution(n);
info!(">>> MODEL DIST WITH {} PEERS: {p}", peers.len());
let kl_divergence = compute_kl_divergence(p, q);
// This implements the algorithm proposed in https://ssg.lancs.ac.uk/wp-content/uploads/ndss_preprint.pdf
pub(super) async fn check_for_sybil_attack(
peers: &[PeerId],
cid: KBucketKey<Vec<u8>>,
random_keys: &RandomKeysAndClosestPeerIds,
) -> bool {
let k = peers.len();
info!(">>> CHECKING SYBIL ATTACK WITH {k} PEERS: {peers:?}");

// FIXME: return error if we don't have at least K peer ids per key

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
assert!(k >= K_VALUE.get());
assert!(random_keys
.iter()
.all(|(_, peers)| peers.len() >= K_VALUE.get()));

let cpls_freqs = average_num_peers_per_cpl(peers, cid.clone());
let q = |x| cpls_freqs.get(&x).cloned().unwrap_or(0) as f64 / k as f64;

let n = get_net_size_estimate(random_keys);
let model_dist = compute_model_distribution(n);
let p = |x| model_dist.get(&(x as usize)).cloned().unwrap_or(0f64) / k as f64;

let kl_divergence = compute_kl_divergence(&p, &q);

kl_divergence > KL_DIVERGENCE_THRESHOLD
}

// Formula 6 in page 7
fn num_peers_per_cpl(peers: &[PeerId], cid: &XorName) -> usize {
let peers_per_cpl = peers.iter().fold(0, |acc, peer| {
let peer_kad_id = XorName::from_content(&peer.to_bytes());
acc + common_prefix(&peer_kad_id, cid)
// Formula 1 in page 3
// Compute the average distance between each of the passed random keys,
// and their i-th closest peer
fn average_between_keys_and_i_th_closest_peer(
i: usize,
random_keys: &RandomKeysAndClosestPeerIds,
) -> f64 {
let m = random_keys.len() as f64;
let distances = random_keys.iter().fold(0f64, |acc, (key_j, peers)| {
let i_th_peer: KBucketKey<PeerId> = peers[i].into();
let distance = key_j.distance(&i_th_peer).ilog2().unwrap_or(0) as f64;
acc + distance
});

peers_per_cpl / K
distances / m
}

// TODO: use released https://github.com/maidsafe/xor_name/pull/96 instead
fn common_prefix(lhs: &XorName, rhs: &XorName) -> usize {
for byte_index in 0..XOR_NAME_LEN {
if lhs[byte_index] != rhs[byte_index] {
return (byte_index * 8) + (lhs[byte_index] ^ rhs[byte_index]).leading_zeros() as usize;
// Formula 2 in page 3
// Estimates the network size based on the distances between the provided
// random KAD Keys and their closest PeerIds.
fn get_net_size_estimate(random_keys: &RandomKeysAndClosestPeerIds) -> usize {
let mut best_n_found = 0;
let mut smallest_value_found = f64::MAX;
for n in 0..ITERATIONS_FOR_NET_SIZE_ESTIMATION {
let value = (1..=K_VALUE.get()).fold(0f64, |acc, i| {
let d_i = average_between_keys_and_i_th_closest_peer(i, random_keys);
let dist: f64 = d_i - ((2f64.pow(256) * i as f64) / (n + 1) as f64);
acc + dist.pow(2)
});
if value < smallest_value_found {
smallest_value_found = value;
best_n_found = n;
}
}
8 * XOR_NAME_LEN
}

// Formula 1 and 2 in page ??
fn get_net_size_estimate() -> usize {
// TODO!
N
best_n_found
}

// Formula 3 in page 7
fn distrib_j_th_largest_prefix_length(j: usize, x: usize) -> f64 {
fn distrib_j_th_largest_prefix_length(n: usize, j: usize, x: usize) -> f64 {
(0..j).fold(0f64, |acc, i| {
acc + binomial(N, i) as f64
* (1f64 - 0.5.pow((x + 1) as f64)).pow((N - i) as f64)
* 0.5.pow(((x + 1) * i) as f64)
acc + (binomial(n, i) as f64
* (1f64 - 0.5.pow((x + 1) as f64)).pow((n - i) as f64)
* 0.5.pow(((x + 1) * i) as f64))
})
}

// Formula 4 in page 7
fn compute_model_distribution(x: usize) -> f64 {
let model_dist = (1..K + 1).fold(0f64, |acc, j| {
acc + distrib_j_th_largest_prefix_length(j, x)
- distrib_j_th_largest_prefix_length(j, x - 1)
});
// Returns a map of common prefix lengths to their probabilistically expected frequency.
fn compute_model_distribution(n: usize) -> HashMap<usize, f64> {
let f = |x| {
(1..=K_VALUE.get()).fold(0f64, |acc, j| {
acc + distrib_j_th_largest_prefix_length(n, j, x)
- distrib_j_th_largest_prefix_length(n, j, x - 1)
})
};

model_dist / K as f64
(0..=255).map(|x| (x, f(x))).collect()
}

// Formula 5 in page 7
fn compute_kl_divergence(model_dist: f64, peers_per_cpl: usize) -> f64 {
fn compute_kl_divergence(
model_dist: &dyn Fn(u8) -> f64,
empirical_dist: &dyn Fn(u8) -> f64,
) -> f64 {
// TODO!

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
model_dist * peers_per_cpl as f64
model_dist(4) * empirical_dist(4)
}

// Formula 6 in page 7
// Returns a map with common prefix lengths of given peers and their frequency.
fn average_num_peers_per_cpl(peers: &[PeerId], cid: KBucketKey<Vec<u8>>) -> HashMap<u8, usize> {
let cid_bytes = cid.hashed_bytes();
peers
.iter()
.map(|peer| {
let peer_key: KBucketKey<PeerId> = (*peer).into();
common_prefix_length(peer_key.hashed_bytes(), cid_bytes)
})
.counts()
}

// Helper to calculate number of common prefix bits between two slices
fn common_prefix_length(lhs: &[u8], rhs: &[u8]) -> u8 {
let mut common_prefix_length = 0u8;
for byte_index in 0..32 {
if lhs[byte_index] == rhs[byte_index] {
common_prefix_length += 8;
} else {
common_prefix_length += (lhs[byte_index] ^ rhs[byte_index]).leading_zeros() as u8;
break;
}
}
common_prefix_length
}

0 comments on commit 50cfb4a

Please sign in to comment.