Skip to content

Commit

Permalink
SSSE3: enc: encode src/dst size in a typedef
Browse files Browse the repository at this point in the history
  • Loading branch information
aklomp committed Jan 16, 2024
1 parent 9c5c07b commit b955289
Showing 1 changed file with 27 additions and 21 deletions.
48 changes: 27 additions & 21 deletions lib/arch/ssse3/enc_loop.c
@@ -1,8 +1,13 @@
// Define array types for the source and destination vectors. This enables the
// use of post-increments to move a pointer to the next element.
typedef uint8_t ssse3_enc_src[12];
typedef uint8_t ssse3_enc_dst[16];

static inline void
enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o)
enc_loop_ssse3_inner (const ssse3_enc_src **s, ssse3_enc_dst **o)
{
// Load input:
__m128i str = _mm_loadu_si128((__m128i *) *s);
__m128i str = _mm_loadu_si128((__m128i *) (*s)++);

// Reshuffle:
str = enc_reshuffle(str);
Expand All @@ -11,10 +16,7 @@ enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o)
str = enc_translate(str);

// Store:
_mm_storeu_si128((__m128i *) *o, str);

*s += 12;
*o += 16;
_mm_storeu_si128((__m128i *) (*o)++, str);
}

static inline void
Expand All @@ -33,34 +35,38 @@ enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
*slen -= rounds * 12; // 12 bytes consumed per round
*olen += rounds * 16; // 16 bytes produced per round

// Cast the source and destination pointers to array type pointers.
const ssse3_enc_src **src = (const ssse3_enc_src **) s;
ssse3_enc_dst **dst = (ssse3_enc_dst **) o;

do {
if (rounds >= 8) {
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
rounds -= 8;
continue;
}
if (rounds >= 4) {
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
rounds -= 4;
continue;
}
if (rounds >= 2) {
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(src, dst);
enc_loop_ssse3_inner(src, dst);
rounds -= 2;
continue;
}
enc_loop_ssse3_inner(s, o);
enc_loop_ssse3_inner(src, dst);
break;

} while (rounds > 0);
Expand Down

0 comments on commit b955289

Please sign in to comment.