Skip to content

Commit

Permalink
Use AVX2 when unmasking websockets frames
Browse files Browse the repository at this point in the history
This already used SSE2, but let's use a wider vector if available to
get this over more quickly.
  • Loading branch information
lpereira committed Apr 14, 2024
1 parent e68709f commit a842cb2
Showing 1 changed file with 24 additions and 4 deletions.
28 changes: 24 additions & 4 deletions src/lib/lwan-websocket.c
Expand Up @@ -26,7 +26,7 @@
#include <sys/socket.h>

#if defined(__x86_64__)
#include <emmintrin.h>
#include <immintrin.h>
#endif

#include "lwan-io-wrappers.h"
Expand Down Expand Up @@ -156,19 +156,39 @@ static void unmask(char *msg, size_t msg_len, char mask[static 4])
if (sizeof(void *) == 8) {
const uint64_t mask64 = (uint64_t)mask32 << 32 | mask32;

#if defined(__x86_64__)
#if defined(__AVX2__)
const size_t len256 = msg_len / 32;
if (len256) {
const __m256i mask256 =
_mm256_setr_epi64x((int64_t)mask64, (int64_t)mask64,
(int64_t)mask64, (int64_t)mask64);
for (size_t i = 0; i < len256; i++) {
__m256i v = _mm256_loadu_si256((__m256i *)msg);
_mm256_storeu_si256((__m256i *)msg,
_mm256_xor_si256(v, mask256));
msg += 32;
}

msg_len = (size_t)(msg_end - msg);
}
#endif

#if defined(__SSE2__)
const size_t len128 = msg_len / 16;
if (len128) {
const __m128i mask128 = _mm_setr_epi64((__m64)mask64, (__m64)mask64);
const __m128i mask128 =
_mm_setr_epi64((__m64)mask64, (__m64)mask64);
for (size_t i = 0; i < len128; i++) {
__m128i v = _mm_loadu_si128((__m128i *)msg);
_mm_storeu_si128((__m128i *)msg, _mm_xor_si128(v, mask128));
msg += 16;
}

msg_len = (size_t)(msg_end - msg);
}
#endif

const size_t len64 = (size_t)((msg_end - msg) / 8);
const size_t len64 = msg_len / 8;
for (size_t i = 0; i < len64; i++) {
uint64_t v = string_as_uint64(msg);
v ^= mask64;
Expand Down

0 comments on commit a842cb2

Please sign in to comment.