Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

netmap: Reply to ARP requests from gateway for scan source IPs #807

Merged
merged 3 commits into from Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
166 changes: 130 additions & 36 deletions src/recv-netmap.c
Expand Up @@ -23,6 +23,7 @@
#include "../lib/logger.h"

#include <net/netmap_user.h>
#include <net/if_arp.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <fcntl.h>
Expand All @@ -33,41 +34,121 @@
#include <assert.h>
#include <inttypes.h>

static struct pollfd fds;
static struct netmap_if *nm_if;
static bool *in_multi_seg_packet;
static void handle_packet_wait_ping(uint32_t buflen, const uint8_t *bytes, UNUSED const struct timespec ts);
static void (*handle_packet_func)(uint32_t buflen, const uint8_t *bytes, const struct timespec ts);
static if_stats_ctx_t *stats_ctx;
static bool need_recv_counter;
static uint64_t recv_counter;
typedef size_t (*make_packet_func_t)(uint8_t *buf, void const *arg);

// Send a packet on a netmap ring and fd directly.
// Used to send packets before send threads are up.
static void
handle_packet_wait_ping(uint32_t buflen, const uint8_t *bytes, UNUSED const struct timespec ts)
send_packet(make_packet_func_t mkpkt, void const *arg)
{
if (buflen < sizeof(struct ether_header) + sizeof(struct ip) + ICMP_MINLEN) {
// Synthesize a sock_t for the main netmap fd.
// We're syncing all TX rings this way, not just ring 0.
sock_t sock;
sock.nm.tx_ring_idx = 0;
sock.nm.tx_ring_fd = zconf.nm.nm_fd;

batch_t *batch = create_packet_batch(1);
batch->lens[0] = (int)mkpkt((uint8_t *)batch->packets, arg);
assert(batch->lens[0] <= MAX_PACKET_SIZE);
batch->ips[0] = 0; // unused by netmap
batch->len = 1;
if (send_batch_internal(sock, batch) != 1) {
log_fatal("recv-netmap", "Failed to send packet: %d: %s", errno, strerror(errno));
}
free_packet_batch(batch);
}

// Submit a packet for sending by send thread 0.
// Used to send packets after send threads are up.
// Submitted packets are sent once per scan batch.
static void
submit_packet(make_packet_func_t mkpkt, void const *arg)
{
batch_t *batch = create_packet_batch(1);
batch->lens[0] = (int)mkpkt((uint8_t *)batch->packets, arg);
assert(batch->lens[0] <= MAX_PACKET_SIZE);
batch->ips[0] = 0; // unused by netmap
batch->len = 1;
submit_batch_internal(batch); // consumes batch
}

// In netmap mode, the OS network stack never gets to see incoming packets
// unless we explicitly forward them to the host rings; hence the kernel will
// not be responding to ARP requests. To remove the need for static ARP
// entries on the gateway, respond to ARP requests from the gateway for any of
// the source IPs of the scan.

#define ARP_ETHER_INET_PKT_LEN (sizeof(struct ether_header) + sizeof(struct arphdr) + 2 * ETHER_ADDR_LEN + 2 * sizeof(uint32_t))
#define x_ar_sha(ap) ((uint8_t *)((ap) + 1))
#define x_ar_spa(ap) (((uint8_t *)((ap) + 1)) + ETHER_ADDR_LEN)
#define x_ar_tha(ap) (((uint8_t *)((ap) + 1)) + ETHER_ADDR_LEN + sizeof(uint32_t))
#define x_ar_tpa(ap) (((uint8_t *)((ap) + 1)) + 2 * ETHER_ADDR_LEN + sizeof(uint32_t))

static size_t
droe marked this conversation as resolved.
Show resolved Hide resolved
make_arp_resp(uint8_t *buf, void const *arg)
{
struct arphdr const *req_ah = (struct arphdr const *)arg;

struct ether_header *eh = (struct ether_header *)buf;
memcpy(eh->ether_shost, zconf.hw_mac, ETHER_ADDR_LEN);
memcpy(eh->ether_dhost, x_ar_sha(req_ah), ETHER_ADDR_LEN);
eh->ether_type = htons(ETHERTYPE_ARP);

struct arphdr *ah = (struct arphdr *)(eh + 1);
ah->ar_hrd = htons(ARPHRD_ETHER);
ah->ar_pro = htons(ETHERTYPE_IP);
ah->ar_hln = ETHER_ADDR_LEN;
ah->ar_pln = sizeof(uint32_t);
ah->ar_op = htons(ARPOP_REPLY);
memcpy(x_ar_sha(ah), zconf.hw_mac, ETHER_ADDR_LEN);
*(uint32_t *)x_ar_spa(ah) = *(uint32_t *)x_ar_tpa(req_ah);
memcpy(x_ar_tha(ah), x_ar_sha(req_ah), ETHER_ADDR_LEN);
*(uint32_t *)x_ar_tpa(ah) = *(uint32_t *)x_ar_spa(req_ah);

return ARP_ETHER_INET_PKT_LEN;
}

static void
handle_packet_arp(uint32_t buflen, const uint8_t *bytes, UNUSED const struct timespec ts)
{
if (buflen < ARP_ETHER_INET_PKT_LEN) {
return;
}
struct ether_header *eh = (struct ether_header *)bytes;
if (eh->ether_type != htons(ETHERTYPE_IP)) {
if (eh->ether_type != htons(ETHERTYPE_ARP)) {
return;
}
struct ip *iph = (struct ip *)(eh + 1);
if (iph->ip_v != 4 ||
iph->ip_p != IPPROTO_ICMP ||
iph->ip_src.s_addr != zconf.nm.wait_ping_dstip) {
struct arphdr *ah = (struct arphdr *)(eh + 1);
if (ah->ar_op != htons(ARPOP_REQUEST) ||
ah->ar_hrd != htons(ARPHRD_ETHER) ||
ah->ar_pro != htons(ETHERTYPE_IP) ||
ah->ar_hln != ETHER_ADDR_LEN ||
ah->ar_pln != sizeof(uint32_t)) {
return;
}
struct icmp *icmph = (struct icmp *)(iph + 1);
if (icmph->icmp_type != ICMP_ECHOREPLY) {
macaddr_t *sender_hardware_address = (macaddr_t *)x_ar_sha(ah);
if (memcmp(sender_hardware_address, eh->ether_shost, ETHER_ADDR_LEN) != 0 ||
memcmp(sender_hardware_address, zconf.gw_mac, ETHER_ADDR_LEN) != 0) {
return;
}

log_debug("recv-netmap", "Received ICMP echo reply, ready to commence scan");
handle_packet_func = handle_packet;
in_addr_t target_protocol_address = *(in_addr_t *)x_ar_tpa(ah);
for (size_t i = 0; i < zconf.number_source_ips; i++) {
if (target_protocol_address == zconf.source_ip_addresses[i]) {
log_debug("recv-netmap", "Received ARP request from gateway");
if (handle_packet_func == handle_packet_wait_ping) {
send_packet(make_arp_resp, (void const *)ah);
} else {
submit_packet(make_arp_resp, (void const *)ah);
}
return;
}
}
}

static size_t
make_wait_ping_req(uint8_t *buf)
make_wait_ping_req(uint8_t *buf, UNUSED void const *arg)
{
struct ether_header *eh = (struct ether_header *)buf;
make_eth_header(eh, zconf.hw_mac, zconf.gw_mac);
Expand All @@ -89,17 +170,28 @@ make_wait_ping_req(uint8_t *buf)
}

static void
send_wait_ping_req(sock_t sock)
handle_packet_wait_ping(uint32_t buflen, const uint8_t *bytes, UNUSED const struct timespec ts)
{
batch_t *batch = create_packet_batch(1);
batch->lens[0] = (int)make_wait_ping_req((uint8_t *)batch->packets);
batch->ips[0] = zconf.nm.wait_ping_dstip;
batch->len = 1;
if (send_batch(sock, batch, 1) != 1) {
log_fatal("recv-netmap", "Failed to send ICMP echo request: %d: %s", errno, strerror(errno));
if (buflen < sizeof(struct ether_header) + sizeof(struct ip) + ICMP_MINLEN) {
return;
}
free_packet_batch(batch);
log_debug("recv-netmap", "Sent ICMP echo request");
struct ether_header *eh = (struct ether_header *)bytes;
if (eh->ether_type != htons(ETHERTYPE_IP)) {
return;
}
struct ip *iph = (struct ip *)(eh + 1);
if (iph->ip_v != 4 ||
iph->ip_p != IPPROTO_ICMP ||
iph->ip_src.s_addr != zconf.nm.wait_ping_dstip) {
return;
}
struct icmp *icmph = (struct icmp *)(iph + 1);
if (icmph->icmp_type != ICMP_ECHOREPLY) {
return;
}

log_debug("recv-netmap", "Received ICMP echo reply, ready to commence scan");
handle_packet_func = handle_packet;
}

#ifndef NSEC_PER_SEC
Expand Down Expand Up @@ -135,13 +227,6 @@ wait_for_e2e_connectivity(void)
{
static const time_t timeout_secs = 60;

// Synthesize a sock_t for the main netmap fd.
// This is safe as long as send threads are not spun up yet.
// We're syncing all TX rings this way, not just ring 0.
sock_t sock;
sock.nm.tx_ring_idx = 0;
sock.nm.tx_ring_fd = zconf.nm.nm_fd;

struct timespec t_start;
timespec_get_monotonic(&t_start);
struct timespec t_last_send;
Expand All @@ -159,14 +244,22 @@ wait_for_e2e_connectivity(void)
}

if (timespec_diff(&t_now, &t_last_send).tv_sec >= 1) {
send_wait_ping_req(sock);
send_packet(make_wait_ping_req, NULL);
timespec_get_monotonic(&t_last_send);
log_debug("recv-netmap", "Sent ICMP echo request");
}

recv_packets();
}
}

static struct pollfd fds;
static struct netmap_if *nm_if;
static bool *in_multi_seg_packet;
static if_stats_ctx_t *stats_ctx;
static bool need_recv_counter;
static uint64_t recv_counter;

void recv_init(void)
{
fds.fd = zconf.nm.nm_fd;
Expand Down Expand Up @@ -276,6 +369,7 @@ recv_packets(void)
if (need_recv_counter) {
recv_counter++;
}
handle_packet_arp(slot->len, (uint8_t *)buf, ts);
handle_packet_func(slot->len, (uint8_t *)buf, ts);
}
rxring->cur = rxring->head = head;
Expand Down
3 changes: 3 additions & 0 deletions src/send-internal.h
Expand Up @@ -16,6 +16,9 @@ int send_batch(sock_t sock, batch_t *batch, int retries);

#if defined(PFRING)
#include "send-pfring.h"
#elif defined(NETMAP)
void submit_batch_internal(batch_t *batch);
int send_batch_internal(sock_t sock, batch_t *batch);
#elif defined(__linux__)
#include "send-linux.h"
#endif
Expand Down
94 changes: 75 additions & 19 deletions src/send-netmap.c
Expand Up @@ -20,16 +20,32 @@
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <pthread.h>

#include "../lib/includes.h"
#include "../lib/logger.h"
#include "../lib/queue.h"

#include "socket.h"
#include "state.h"

static pthread_once_t submit_queue_inited = PTHREAD_ONCE_INIT;
static zqueue_t *submit_queue;

static void
submit_queue_init_once(void)
{
submit_queue = queue_init();
assert(submit_queue);
}

int
send_run_init(sock_t sock)
{
if (sock.nm.tx_ring_idx == 0) {
pthread_once(&submit_queue_inited, submit_queue_init_once);
}

struct pollfd fds = {
.fd = sock.nm.tx_ring_fd,
.events = POLLOUT,
Expand All @@ -42,27 +58,23 @@ send_run_init(sock_t sock)
return 0;
}

// This implementation does not use attempts, because retries do not
// make sense based on the premise that syncing a TX ring will never
// fail for transient reasons.
//
// This implementation never reports batches as partially failed,
// because the netmap API does not have partial failure semantics.
// All we know is that a poll or ioctl syscall failed, not if or
// how many of the packets we placed in the ringbuffer were sent.
//
// ZMap's current architecture forces us to copy packet data here.
// An even more optimised implementation might reuse packet data
// in buffers (unless NS_BUF_CHANGED has been set by the kernel on
// a slot), and only update the fields that need to change, such
// as dst IP, checksum etc depending on scan type and params.
int
send_batch(sock_t sock, batch_t *batch, UNUSED int attempts)
// Called from the recv thread to submit a batch of packets
// for sending on thread 0; typically batch size is just 1.
// Used for responding to ARP requests.
// The way this works is rather inefficient and only makes
// sense for low volume packets.
// Since we don't know if send_run_init() has been called
// yet or not, we need to ensure the queue is initialized.
void
submit_batch_internal(batch_t *batch)
{
if (batch->len == 0) {
return 0;
}
pthread_once(&submit_queue_inited, submit_queue_init_once);
push_back((void *)batch, submit_queue);
}

int
send_batch_internal(sock_t sock, batch_t *batch)
{
struct netmap_ring *ring = NETMAP_TXRING(zconf.nm.nm_if, sock.nm.tx_ring_idx);
struct pollfd fds = {
.fd = sock.nm.tx_ring_fd,
Expand Down Expand Up @@ -96,3 +108,47 @@ send_batch(sock_t sock, batch_t *batch, UNUSED int attempts)

return batch->len;
}

// Netmap's send_batch does not use attempts, because retries do
// not make sense based on the premise that syncing a TX ring will
// never fail for transient reasons.
//
// Netmap's send_batch never reports batches as partially failed,
// because the netmap API does not have partial failure semantics.
// All we know is that a poll or ioctl syscall failed, not if or
// how many of the packets we placed in the ringbuffer were sent.
//
// There is a bit of unused optimization potential here; ZMap's
// current architecture requires us to copy packet data on the
// send path, we cannot supply netmap buffers to ZMap to write
// into directly. And even though netmap would allow us to reuse
// data still in buffers (unless NS_BUF_CHANGED has been set by
// the kernel), we cannot take advantage of that currently.
int
send_batch(sock_t sock, batch_t *batch, UNUSED int attempts)
{
// On send thread 0, send any batches that have been
// submitted onto the submit_queue before sending the
// actual batch. There should only be packets in the
// submit_queue very infrequently.
if (sock.nm.tx_ring_idx == 0) {
while (!is_empty(submit_queue)) {
znode_t *node = pop_front(submit_queue);
batch_t *extra_batch = (batch_t *)node->data;
assert(extra_batch->len > 0);
free(node);
if (send_batch_internal(sock, extra_batch) != extra_batch->len) {
log_error("send-netmap", "Failed to send extra batch of %u submitted packet(s)", extra_batch->len);
} else {
log_debug("send-netmap", "Sent extra batch of %u submitted packet(s)", extra_batch->len);
}
free_packet_batch(extra_batch);
}
}

if (batch->len == 0) {
return 0;
}

return send_batch_internal(sock, batch);
}