276 lines
7.5 KiB
C
276 lines
7.5 KiB
C
|
/*
|
||
|
Ed25519 batch verification
|
||
|
*/
|
||
|
|
||
|
#define max_batch_size 64
|
||
|
#define heap_batch_size ((max_batch_size * 2) + 1)
|
||
|
|
||
|
/* which limb is the 128th bit in? */
|
||
|
static const size_t limb128bits = (128 + bignum256modm_bits_per_limb - 1) / bignum256modm_bits_per_limb;
|
||
|
|
||
|
typedef size_t heap_index_t;
|
||
|
|
||
|
typedef struct batch_heap_t {
|
||
|
unsigned char r[heap_batch_size][16]; /* 128 bit random values */
|
||
|
ge25519 points[heap_batch_size];
|
||
|
bignum256modm scalars[heap_batch_size];
|
||
|
heap_index_t heap[heap_batch_size];
|
||
|
size_t size;
|
||
|
} batch_heap;
|
||
|
|
||
|
/* swap two values in the heap */
|
||
|
static void
|
||
|
heap_swap(heap_index_t *heap, size_t a, size_t b) {
|
||
|
heap_index_t temp;
|
||
|
temp = heap[a];
|
||
|
heap[a] = heap[b];
|
||
|
heap[b] = temp;
|
||
|
}
|
||
|
|
||
|
/* add the scalar at the end of the list to the heap */
|
||
|
static void
|
||
|
heap_insert_next(batch_heap *heap) {
|
||
|
size_t node = heap->size, parent;
|
||
|
heap_index_t *pheap = heap->heap;
|
||
|
bignum256modm *scalars = heap->scalars;
|
||
|
|
||
|
/* insert at the bottom */
|
||
|
pheap[node] = (heap_index_t)node;
|
||
|
|
||
|
/* sift node up to its sorted spot */
|
||
|
parent = (node - 1) / 2;
|
||
|
while (node && lt256_modm_batch(scalars[pheap[parent]], scalars[pheap[node]], bignum256modm_limb_size - 1)) {
|
||
|
heap_swap(pheap, parent, node);
|
||
|
node = parent;
|
||
|
parent = (node - 1) / 2;
|
||
|
}
|
||
|
heap->size++;
|
||
|
}
|
||
|
|
||
|
/* update the heap when the root element is updated */
|
||
|
static void
|
||
|
heap_updated_root(batch_heap *heap, size_t limbsize) {
|
||
|
size_t node, parent, childr, childl;
|
||
|
heap_index_t *pheap = heap->heap;
|
||
|
bignum256modm *scalars = heap->scalars;
|
||
|
|
||
|
/* sift root to the bottom */
|
||
|
parent = 0;
|
||
|
node = 1;
|
||
|
childl = 1;
|
||
|
childr = 2;
|
||
|
while ((childr < heap->size)) {
|
||
|
node = lt256_modm_batch(scalars[pheap[childl]], scalars[pheap[childr]], limbsize) ? childr : childl;
|
||
|
heap_swap(pheap, parent, node);
|
||
|
parent = node;
|
||
|
childl = (parent * 2) + 1;
|
||
|
childr = childl + 1;
|
||
|
}
|
||
|
|
||
|
/* sift root back up to its sorted spot */
|
||
|
parent = (node - 1) / 2;
|
||
|
while (node && lte256_modm_batch(scalars[pheap[parent]], scalars[pheap[node]], limbsize)) {
|
||
|
heap_swap(pheap, parent, node);
|
||
|
node = parent;
|
||
|
parent = (node - 1) / 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* build the heap with count elements, count must be >= 3 */
|
||
|
static void
|
||
|
heap_build(batch_heap *heap, size_t count) {
|
||
|
heap->heap[0] = 0;
|
||
|
heap->size = 0;
|
||
|
while (heap->size < count)
|
||
|
heap_insert_next(heap);
|
||
|
}
|
||
|
|
||
|
/* extend the heap to contain new_count elements */
|
||
|
static void
|
||
|
heap_extend(batch_heap *heap, size_t new_count) {
|
||
|
while (heap->size < new_count)
|
||
|
heap_insert_next(heap);
|
||
|
}
|
||
|
|
||
|
/* get the top 2 elements of the heap */
|
||
|
static void
|
||
|
heap_get_top2(batch_heap *heap, heap_index_t *max1, heap_index_t *max2, size_t limbsize) {
|
||
|
heap_index_t h0 = heap->heap[0], h1 = heap->heap[1], h2 = heap->heap[2];
|
||
|
if (lt256_modm_batch(heap->scalars[h1], heap->scalars[h2], limbsize))
|
||
|
h1 = h2;
|
||
|
*max1 = h0;
|
||
|
*max2 = h1;
|
||
|
}
|
||
|
|
||
|
/* */
|
||
|
static void
|
||
|
ge25519_multi_scalarmult_vartime_final(ge25519 *r, ge25519 *point, bignum256modm scalar) {
|
||
|
const bignum256modm_element_t topbit = ((bignum256modm_element_t)1 << (bignum256modm_bits_per_limb - 1));
|
||
|
size_t limb = limb128bits;
|
||
|
bignum256modm_element_t flag;
|
||
|
|
||
|
if (isone256_modm_batch(scalar)) {
|
||
|
/* this will happen most of the time after bos-carter */
|
||
|
*r = *point;
|
||
|
return;
|
||
|
} else if (iszero256_modm_batch(scalar)) {
|
||
|
/* this will only happen if all scalars == 0 */
|
||
|
memset(r, 0, sizeof(*r));
|
||
|
r->y[0] = 1;
|
||
|
r->z[0] = 1;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
*r = *point;
|
||
|
|
||
|
/* find the limb where first bit is set */
|
||
|
while (!scalar[limb])
|
||
|
limb--;
|
||
|
|
||
|
/* find the first bit */
|
||
|
flag = topbit;
|
||
|
while ((scalar[limb] & flag) == 0)
|
||
|
flag >>= 1;
|
||
|
|
||
|
/* exponentiate */
|
||
|
for (;;) {
|
||
|
ge25519_double(r, r);
|
||
|
if (scalar[limb] & flag)
|
||
|
ge25519_add(r, r, point);
|
||
|
|
||
|
flag >>= 1;
|
||
|
if (!flag) {
|
||
|
if (!limb--)
|
||
|
break;
|
||
|
flag = topbit;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* count must be >= 5 */
|
||
|
static void
|
||
|
ge25519_multi_scalarmult_vartime(ge25519 *r, batch_heap *heap, size_t count) {
|
||
|
heap_index_t max1, max2;
|
||
|
|
||
|
/* start with the full limb size */
|
||
|
size_t limbsize = bignum256modm_limb_size - 1;
|
||
|
|
||
|
/* whether the heap has been extended to include the 128 bit scalars */
|
||
|
int extended = 0;
|
||
|
|
||
|
/* grab an odd number of scalars to build the heap, unknown limb sizes */
|
||
|
heap_build(heap, ((count + 1) / 2) | 1);
|
||
|
|
||
|
for (;;) {
|
||
|
heap_get_top2(heap, &max1, &max2, limbsize);
|
||
|
|
||
|
/* only one scalar remaining, we're done */
|
||
|
if (iszero256_modm_batch(heap->scalars[max2]))
|
||
|
break;
|
||
|
|
||
|
/* exhausted another limb? */
|
||
|
if (!heap->scalars[max1][limbsize])
|
||
|
limbsize -= 1;
|
||
|
|
||
|
/* can we extend to the 128 bit scalars? */
|
||
|
if (!extended && isatmost128bits256_modm_batch(heap->scalars[max1])) {
|
||
|
heap_extend(heap, count);
|
||
|
heap_get_top2(heap, &max1, &max2, limbsize);
|
||
|
extended = 1;
|
||
|
}
|
||
|
|
||
|
sub256_modm_batch(heap->scalars[max1], heap->scalars[max1], heap->scalars[max2], limbsize);
|
||
|
ge25519_add(&heap->points[max2], &heap->points[max2], &heap->points[max1]);
|
||
|
heap_updated_root(heap, limbsize);
|
||
|
}
|
||
|
|
||
|
ge25519_multi_scalarmult_vartime_final(r, &heap->points[max1], heap->scalars[max1]);
|
||
|
}
|
||
|
|
||
|
/* not actually used for anything other than testing */
|
||
|
unsigned char batch_point_buffer[3][32];
|
||
|
|
||
|
static int
|
||
|
ge25519_is_neutral_vartime(const ge25519 *p) {
|
||
|
static const unsigned char zero[32] = {0};
|
||
|
unsigned char point_buffer[3][32];
|
||
|
curve25519_contract(point_buffer[0], p->x);
|
||
|
curve25519_contract(point_buffer[1], p->y);
|
||
|
curve25519_contract(point_buffer[2], p->z);
|
||
|
memcpy(batch_point_buffer[1], point_buffer[1], 32);
|
||
|
return (memcmp(point_buffer[0], zero, 32) == 0) && (memcmp(point_buffer[1], point_buffer[2], 32) == 0);
|
||
|
}
|
||
|
|
||
|
int
|
||
|
ED25519_FN(ed25519_sign_open_batch) (const unsigned char **m, size_t *mlen, const unsigned char **pk, const unsigned char **RS, size_t num, int *valid) {
|
||
|
batch_heap ALIGN(16) batch;
|
||
|
ge25519 ALIGN(16) p;
|
||
|
bignum256modm *r_scalars;
|
||
|
size_t i, batchsize;
|
||
|
unsigned char hram[64];
|
||
|
int ret = 0;
|
||
|
|
||
|
for (i = 0; i < num; i++)
|
||
|
valid[i] = 1;
|
||
|
|
||
|
while (num > 3) {
|
||
|
batchsize = (num > max_batch_size) ? max_batch_size : num;
|
||
|
|
||
|
/* generate r (scalars[batchsize+1]..scalars[2*batchsize] */
|
||
|
ED25519_FN(ed25519_randombytes_unsafe) (batch.r, batchsize * 16);
|
||
|
r_scalars = &batch.scalars[batchsize + 1];
|
||
|
for (i = 0; i < batchsize; i++)
|
||
|
expand256_modm(r_scalars[i], batch.r[i], 16);
|
||
|
|
||
|
/* compute scalars[0] = ((r1s1 + r2s2 + ...)) */
|
||
|
for (i = 0; i < batchsize; i++) {
|
||
|
expand256_modm(batch.scalars[i], RS[i] + 32, 32);
|
||
|
mul256_modm(batch.scalars[i], batch.scalars[i], r_scalars[i]);
|
||
|
}
|
||
|
for (i = 1; i < batchsize; i++)
|
||
|
add256_modm(batch.scalars[0], batch.scalars[0], batch.scalars[i]);
|
||
|
|
||
|
/* compute scalars[1]..scalars[batchsize] as r[i]*H(R[i],A[i],m[i]) */
|
||
|
for (i = 0; i < batchsize; i++) {
|
||
|
ed25519_hram(hram, RS[i], pk[i], m[i], mlen[i]);
|
||
|
expand256_modm(batch.scalars[i+1], hram, 64);
|
||
|
mul256_modm(batch.scalars[i+1], batch.scalars[i+1], r_scalars[i]);
|
||
|
}
|
||
|
|
||
|
/* compute points */
|
||
|
batch.points[0] = ge25519_basepoint;
|
||
|
for (i = 0; i < batchsize; i++)
|
||
|
if (!ge25519_unpack_negative_vartime(&batch.points[i+1], pk[i]))
|
||
|
goto fallback;
|
||
|
for (i = 0; i < batchsize; i++)
|
||
|
if (!ge25519_unpack_negative_vartime(&batch.points[batchsize+i+1], RS[i]))
|
||
|
goto fallback;
|
||
|
|
||
|
ge25519_multi_scalarmult_vartime(&p, &batch, (batchsize * 2) + 1);
|
||
|
if (!ge25519_is_neutral_vartime(&p)) {
|
||
|
ret |= 2;
|
||
|
|
||
|
fallback:
|
||
|
for (i = 0; i < batchsize; i++) {
|
||
|
valid[i] = ED25519_FN(ed25519_sign_open) (m[i], mlen[i], pk[i], RS[i]) ? 0 : 1;
|
||
|
ret |= (valid[i] ^ 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
m += batchsize;
|
||
|
mlen += batchsize;
|
||
|
pk += batchsize;
|
||
|
RS += batchsize;
|
||
|
num -= batchsize;
|
||
|
valid += batchsize;
|
||
|
}
|
||
|
|
||
|
for (i = 0; i < num; i++) {
|
||
|
valid[i] = ED25519_FN(ed25519_sign_open) (m[i], mlen[i], pk[i], RS[i]) ? 0 : 1;
|
||
|
ret |= (valid[i] ^ 1);
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|