Skip to content

Commit

Permalink
Removed more "/KYBER_Q" from the source code, if compiled to DIV, the…
Browse files Browse the repository at this point in the history
…y might be turned into a plaintext-checking oracle (thanks to Prasanna Ravi and Matthias Kannwischer for reporting!)
  • Loading branch information
cryptojedi committed Dec 30, 2023
1 parent bc8e640 commit 11d00ff
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 106 deletions.
89 changes: 1 addition & 88 deletions avx2/poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,94 +22,7 @@
* (of length KYBER_POLYCOMPRESSEDBYTES)
* - const poly *a: pointer to input polynomial
**************************************************/
#if (KYBER_POLYCOMPRESSEDBYTES == 96)
void poly_compress(uint8_t r[96], const poly * restrict a)
{
unsigned int i;
__m256i f0, f1, f2, f3;
__m128i t0, t1;
const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
const __m256i shift1 = _mm256_set1_epi16(1 << 8);
const __m256i mask = _mm256_set1_epi16(7);
const __m256i shift2 = _mm256_set1_epi16((8 << 8) + 1);
const __m256i shift3 = _mm256_set1_epi32((64 << 16) + 1);
const __m256i sllvdidx = _mm256_set1_epi64x(12LL << 32);
const __m256i shufbidx = _mm256_set_epi8( 8, 2, 1, 0,-1,-1,-1,-1,14,13,12, 6, 5, 4,10, 9,
-1,-1,-1,-1,14,13,12, 6, 5, 4,10, 9, 8, 2, 1, 0);

for(i=0;i<KYBER_N/64;i++) {
f0 = _mm256_load_si256(&a->vec[4*i+0]);
f1 = _mm256_load_si256(&a->vec[4*i+1]);
f2 = _mm256_load_si256(&a->vec[4*i+2]);
f3 = _mm256_load_si256(&a->vec[4*i+3]);
f0 = _mm256_mulhi_epi16(f0,v);
f1 = _mm256_mulhi_epi16(f1,v);
f2 = _mm256_mulhi_epi16(f2,v);
f3 = _mm256_mulhi_epi16(f3,v);
f0 = _mm256_mulhrs_epi16(f0,shift1);
f1 = _mm256_mulhrs_epi16(f1,shift1);
f2 = _mm256_mulhrs_epi16(f2,shift1);
f3 = _mm256_mulhrs_epi16(f3,shift1);
f0 = _mm256_and_si256(f0,mask);
f1 = _mm256_and_si256(f1,mask);
f2 = _mm256_and_si256(f2,mask);
f3 = _mm256_and_si256(f3,mask);
f0 = _mm256_packus_epi16(f0,f1);
f2 = _mm256_packus_epi16(f2,f3);
f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7
f2 = _mm256_maddubs_epi16(f2,shift2); // c0 c1 c2 c3 d0 d1 d2 d3 c4 c5 c6 c7 d4 d5 d6 d7
f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3
f2 = _mm256_madd_epi16(f2,shift3); // c0 c1 d0 d1 c2 c3 d2 d3
f0 = _mm256_sllv_epi32(f0,sllvdidx);
f2 = _mm256_sllv_epi32(f2,sllvdidx);
f0 = _mm256_hadd_epi32(f0,f2); // a0 c0 c0 d0 a1 b1 c1 d1
f0 = _mm256_permute4x64_epi64(f0,0xD8); // a0 b0 a1 b1 c0 d0 c1 d1
f0 = _mm256_shuffle_epi8(f0,shufbidx);
t0 = _mm256_castsi256_si128(f0);
t1 = _mm256_extracti128_si256(f0,1);
t0 = _mm_blend_epi32(t0,t1,0x08);
_mm_storeu_si128((__m128i *)&r[24*i+ 0],t0);
_mm_storel_epi64((__m128i *)&r[24*i+16],t1);
}
}

/*************************************************
* Name: poly_decompress
*
* Description: De-serialization and subsequent decompression of a polynomial;
* approximate inverse of poly_compress
*
* Arguments: - poly *r: pointer to output polynomial
* - const uint8_t *a: pointer to input byte array
* (of length KYBER_POLYCOMPRESSEDBYTES bytes)
**************************************************/
void poly_decompress(poly * restrict r, const uint8_t a[96])
{
unsigned int i;
__m128i t;
__m256i f;
const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]);
const __m256i shufbidx = _mm256_set_epi8(5,5,5,5,5,4,4,4,4,4,4,3,3,3,3,3,
2,2,2,2,2,1,1,1,1,1,1,0,0,0,0,0);
const __m256i mask = _mm256_set_epi16(224,28,896,112,14,448,56,7,
224,28,896,112,14,448,56,7);
const __m256i shift = _mm256_set_epi16(128,1024,32,256,2048,64,512,4096,
128,1024,32,256,2048,64,512,4096);

for(i=0;i<KYBER_N/16;i++) {
t = _mm_castps_si128(_mm_load_ss((float *)&a[6*i+0])));
t = _mm_insert_epi16(t,*(int16_t *)&a[6*i+4],2);
f = _mm256_broadcastsi128_si256(t);
f = _mm256_blend_epi16(f,g,0x);
f = _mm256_shuffle_epi8(f,shufbidx);
f = _mm256_and_si256(f,mask);
f = _mm256_mullo_epi16(f,shift);
f = _mm256_mulhrs_epi16(f,q);
_mm256_store_si256(&r->vec[i],f);
}
}

#elif (KYBER_POLYCOMPRESSEDBYTES == 128)
#if (KYBER_POLYCOMPRESSEDBYTES == 128)
void poly_compress(uint8_t r[128], const poly * restrict a)
{
unsigned int i;
Expand Down
18 changes: 15 additions & 3 deletions ref/poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,23 @@
void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
{
unsigned int i,j;
int16_t u;
int32_t u;
uint32_t d0;
uint8_t t[8];

#if (KYBER_POLYCOMPRESSEDBYTES == 128)

for(i=0;i<KYBER_N/8;i++) {
for(j=0;j<8;j++) {
// map to positive standard representatives
u = a->coeffs[8*i+j];
u += (u >> 15) & KYBER_Q;
t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15;
/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
d0 = u << 4;
d0 += 1665;
d0 *= 80635;
d0 >>= 28;
t[j] = d0 & 0xf;
}

r[0] = t[0] | (t[1] << 4);
Expand All @@ -42,7 +49,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
// map to positive standard representatives
u = a->coeffs[8*i+j];
u += (u >> 15) & KYBER_Q;
t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31;
/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
d0 = u << 5;
d0 += 1664;
d0 *= 40318;
d0 >>= 27;
t[j] = d0 & 0x1f;
}

r[0] = (t[0] >> 0) | (t[1] << 5);
Expand Down
18 changes: 16 additions & 2 deletions ref/polyvec.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
{
unsigned int i,j,k;
uint64_t d0;

#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
uint16_t t[8];
Expand All @@ -23,7 +24,14 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
for(k=0;k<8;k++) {
t[k] = a->vec[i].coeffs[8*j+k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff;
/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
d0 = t[k];
d0 <<= 11;
d0 += 1664;
d0 *= 645084;
d0 >>= 31;
t[k] = d0 & 0x7ff;

}

r[ 0] = (t[0] >> 0);
Expand All @@ -47,7 +55,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
for(k=0;k<4;k++) {
t[k] = a->vec[i].coeffs[4*j+k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff;
/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
d0 = t[k];
d0 <<= 10;
d0 += 1665;
d0 *= 1290167;
d0 >>= 32;
t[k] = d0 & 0x3ff;
}

r[0] = (t[0] >> 0);
Expand Down
6 changes: 3 additions & 3 deletions ref/test/test_kyber.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#define NTESTS 1000

static int test_keys()
static int test_keys(void)
{
uint8_t pk[CRYPTO_PUBLICKEYBYTES];
uint8_t sk[CRYPTO_SECRETKEYBYTES];
Expand All @@ -31,7 +31,7 @@ static int test_keys()
return 0;
}

static int test_invalid_sk_a()
static int test_invalid_sk_a(void)
{
uint8_t pk[CRYPTO_PUBLICKEYBYTES];
uint8_t sk[CRYPTO_SECRETKEYBYTES];
Expand Down Expand Up @@ -59,7 +59,7 @@ static int test_invalid_sk_a()
return 0;
}

static int test_invalid_ciphertext()
static int test_invalid_ciphertext(void)
{
uint8_t pk[CRYPTO_PUBLICKEYBYTES];
uint8_t sk[CRYPTO_SECRETKEYBYTES];
Expand Down
2 changes: 1 addition & 1 deletion ref/test/test_speed.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
uint64_t t[NTESTS];
uint8_t seed[KYBER_SYMBYTES] = {0};

int main()
int main(void)
{
unsigned int i;
uint8_t pk[CRYPTO_PUBLICKEYBYTES];
Expand Down
19 changes: 10 additions & 9 deletions runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,22 @@ else
fi

if [ "$ARCH" = "amd64" -o "$ARCH" = "arm64" ]; then
export CC=/usr/bin/clang
export CFLAGS="-fsanitize=undefined,address ${CFLAGS}"
export CC=/usr/bin/gcc
# export CFLAGS="-fsanitize=undefined,address ${CFLAGS}"
fi

for dir in $DIRS; do
make -j$(nproc) -C $dir clean
make -j$(nproc) -C $dir
for alg in 512 768 1024; do
#valgrind --vex-guest-max-insns=25 ./$dir/test_kyber$alg
./$dir/test/test_kyber$alg &
PID1=$!
echo testvec$alg
./$dir/test/test_vectors$alg > tvecs$alg &
PID2=$!
wait $PID1 $PID2
valgrind --vex-guest-max-insns=25 ./$dir/test/test_kyber$alg
echo test_kyber$alg
./$dir/test/test_kyber$alg
# PID1=$!
# echo testvec$alg
./$dir/test/test_vectors$alg > tvecs$alg
# PID2=$!
# wait $PID1 $PID2
done
shasum -a256 -c SHA256SUMS
done
Expand Down

0 comments on commit 11d00ff

Please sign in to comment.