bulletproofs: misc optimizations

Use double mults where possible, avoid conversions, simplify
This commit is contained in:
moneromooo-monero 2018-01-05 22:39:59 +00:00
parent 112f32f068
commit e9164bb38b
No known key found for this signature in database
GPG Key ID: 686F07454D6CEFC3

View File

@ -65,6 +65,33 @@ static const rct::keyV twoN = vector_powers(TWO, maxN);
static const rct::key ip12 = inner_product(oneN, twoN); static const rct::key ip12 = inner_product(oneN, twoN);
static boost::mutex init_mutex; static boost::mutex init_mutex;
//addKeys3acc_p3
//aAbB += a*A + b*B where a, b are scalars, A, B are curve points
//A and B must be input after applying "precomp"
static void addKeys3acc_p3(ge_p3 *aAbB, const key &a, const ge_dsmp A, const key &b, const ge_dsmp B)
{
ge_p3 rv;
ge_p1p1 p1;
ge_p2 p2;
ge_double_scalarmult_precomp_vartime2_p3(&rv, a.bytes, A, b.bytes, B);
ge_cached cached;
ge_p3_to_cached(&cached, aAbB);
ge_add(&p1, &rv, &cached);
ge_p1p1_to_p3(aAbB, &p1);
}
static void addKeys_acc_p3(ge_p3 *acc_p3, const rct::key &a, const rct::key &point)
{
ge_p3 p3;
CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&p3, point.bytes) == 0, "ge_frombytes_vartime failed");
ge_scalarmult_p3(&p3, a.bytes, &p3);
ge_cached cached;
ge_p3_to_cached(&cached, acc_p3);
ge_p1p1 p1;
ge_add(&p1, &p3, &cached);
ge_p1p1_to_p3(acc_p3, &p1);
}
static rct::key get_exponent(const rct::key &base, size_t idx) static rct::key get_exponent(const rct::key &base, size_t idx)
{ {
static const std::string salt("bulletproof"); static const std::string salt("bulletproof");
@ -94,13 +121,13 @@ static rct::key vector_exponent(const rct::keyV &a, const rct::keyV &b)
{ {
CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b");
CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN"); CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN");
rct::key res = rct::identity(); ge_p3 res_p3 = ge_p3_identity;
for (size_t i = 0; i < a.size(); ++i) for (size_t i = 0; i < a.size(); ++i)
{ {
rct::key term; rct::addKeys3acc_p3(&res_p3, a[i], Gprecomp[i], b[i], Hprecomp[i]);
rct::addKeys3(term, a[i], Gprecomp[i], b[i], Hprecomp[i]);
rct::addKeys(res, res, term);
} }
rct::key res;
ge_p3_tobytes(res.bytes, &res_p3);
return res; return res;
} }
@ -111,11 +138,11 @@ static rct::key vector_exponent_custom(const rct::keyV &A, const rct::keyV &B, c
CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b");
CHECK_AND_ASSERT_THROW_MES(a.size() == A.size(), "Incompatible sizes of a and A"); CHECK_AND_ASSERT_THROW_MES(a.size() == A.size(), "Incompatible sizes of a and A");
CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN"); CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN");
rct::key res = rct::identity(); ge_p3 res_p3 = ge_p3_identity;
for (size_t i = 0; i < a.size(); ++i) for (size_t i = 0; i < a.size(); ++i)
{ {
rct::key term;
#if 0 #if 0
rct::key term;
// we happen to know where A and B might fall, so don't bother checking the rest // we happen to know where A and B might fall, so don't bother checking the rest
ge_dsmp *Acache = NULL, *Bcache = NULL; ge_dsmp *Acache = NULL, *Bcache = NULL;
ge_dsmp Acache_custom[1], Bcache_custom[1]; ge_dsmp Acache_custom[1], Bcache_custom[1];
@ -136,13 +163,16 @@ static rct::key vector_exponent_custom(const rct::keyV &A, const rct::keyV &B, c
Bcache = Bcache_custom; Bcache = Bcache_custom;
} }
rct::addKeys3(term, a[i], *Acache, b[i], *Bcache); rct::addKeys3(term, a[i], *Acache, b[i], *Bcache);
rct::addKeys(res, res, term);
#else #else
ge_dsmp Acache, Bcache; ge_dsmp Acache, Bcache;
rct::precomp(Bcache, B[i]); rct::precomp(Bcache, B[i]);
rct::addKeys3(term, a[i], A[i], b[i], Bcache); rct::precomp(Acache, A[i]);
addKeys3acc_p3(&res_p3, a[i], Acache, b[i], Bcache);
#endif #endif
rct::addKeys(res, res, term);
} }
rct::key res;
ge_p3_tobytes(res.bytes, &res_p3);
return res; return res;
} }
@ -163,6 +193,24 @@ static rct::keyV vector_powers(const rct::key &x, size_t n)
return res; return res;
} }
/* Given a scalar, return the sum of its powers from 0 to n-1 */
static rct::key vector_power_sum(const rct::key &x, size_t n)
{
if (n == 0)
return rct::zero();
rct::key res = rct::identity();
if (n == 1)
return res;
rct::key prev = x;
for (size_t i = 1; i < n; ++i)
{
if (i > 1)
sc_mul(prev.bytes, prev.bytes, x.bytes);
sc_add(res.bytes, res.bytes, prev.bytes);
}
return res;
}
/* Given two scalar arrays, construct the inner product */ /* Given two scalar arrays, construct the inner product */
static rct::key inner_product(const rct::keyV &a, const rct::keyV &b) static rct::key inner_product(const rct::keyV &a, const rct::keyV &b)
{ {
@ -876,12 +924,13 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
PERF_TIMER_START_BP(VERIFY_line_61); PERF_TIMER_START_BP(VERIFY_line_61);
// PAPER LINE 61 // PAPER LINE 61
rct::key L61Left = rct::addKeys(rct::scalarmultBase(proof.taux), rct::scalarmultKey(rct::H, proof.t)); rct::key L61Left;
rct::addKeys2(L61Left, proof.taux, proof.t, rct::H);
const rct::keyV zpow = vector_powers(z, M+3); const rct::keyV zpow = vector_powers(z, M+3);
rct::key k; rct::key k;
const rct::key ip1y = vector_sum(vector_powers(y, MN)); const rct::key ip1y = vector_power_sum(y, MN);
sc_mulsub(k.bytes, zpow[2].bytes, ip1y.bytes, rct::zero().bytes); sc_mulsub(k.bytes, zpow[2].bytes, ip1y.bytes, rct::zero().bytes);
for (size_t j = 1; j <= M; ++j) for (size_t j = 1; j <= M; ++j)
{ {
@ -893,20 +942,32 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
PERF_TIMER_START_BP(VERIFY_line_61rl); PERF_TIMER_START_BP(VERIFY_line_61rl);
sc_muladd(tmp.bytes, z.bytes, ip1y.bytes, k.bytes); sc_muladd(tmp.bytes, z.bytes, ip1y.bytes, k.bytes);
rct::key L61Right = rct::scalarmultKey(rct::H, tmp); rct::key L61Right = rct::scalarmultKey(rct::H, tmp);
for (size_t j = 0; j < M; ++j) ge_p3 L61Right_p3;
CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&L61Right_p3, L61Right.bytes) == 0, "ge_frombytes_vartime failed");
for (size_t j = 0; j+1 < proof.V.size(); j += 2)
{
CHECK_AND_ASSERT_MES(j+2+1 < zpow.size(), false, "invalid zpow index");
ge_dsmp precomp0, precomp1;
rct::precomp(precomp0, j < proof.V.size() ? proof.V[j] : rct::identity());
rct::precomp(precomp1, j+1 < proof.V.size() ? proof.V[j+1] : rct::identity());
rct::addKeys3acc_p3(&L61Right_p3, zpow[j+2], precomp0, zpow[j+2+1], precomp1);
}
for (size_t j = proof.V.size() & 0xfffffffe; j < M; j++)
{ {
CHECK_AND_ASSERT_MES(j+2 < zpow.size(), false, "invalid zpow index"); CHECK_AND_ASSERT_MES(j+2 < zpow.size(), false, "invalid zpow index");
tmp = rct::scalarmultKey(j < proof.V.size() ? proof.V[j] : rct::identity(), zpow[j+2]); // faster equivalent to:
rct::addKeys(L61Right, L61Right, tmp); // tmp = rct::scalarmultKey(j < proof.V.size() ? proof.V[j] : rct::identity(), zpow[j+2]);
// rct::addKeys(L61Right, L61Right, tmp);
if (j < proof.V.size())
addKeys_acc_p3(&L61Right_p3, zpow[j+2], proof.V[j]);
} }
tmp = rct::scalarmultKey(proof.T1, x); addKeys_acc_p3(&L61Right_p3, x, proof.T1);
rct::addKeys(L61Right, L61Right, tmp);
rct::key xsq; rct::key xsq;
sc_mul(xsq.bytes, x.bytes, x.bytes); sc_mul(xsq.bytes, x.bytes, x.bytes);
tmp = rct::scalarmultKey(proof.T2, xsq); addKeys_acc_p3(&L61Right_p3, xsq, proof.T2);
rct::addKeys(L61Right, L61Right, tmp); ge_p3_tobytes(L61Right.bytes, &L61Right_p3);
PERF_TIMER_STOP(VERIFY_line_61rl); PERF_TIMER_STOP(VERIFY_line_61rl);
if (!(L61Right == L61Left)) if (!(L61Right == L61Left))
@ -937,7 +998,7 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
PERF_TIMER_START_BP(VERIFY_line_24_25); PERF_TIMER_START_BP(VERIFY_line_24_25);
// Basically PAPER LINES 24-25 // Basically PAPER LINES 24-25
// Compute the curvepoints from G[i] and H[i] // Compute the curvepoints from G[i] and H[i]
rct::key inner_prod = rct::identity(); ge_p3 inner_prod_p3 = ge_p3_identity;
rct::key yinvpow = rct::identity(); rct::key yinvpow = rct::identity();
rct::key ypow = rct::identity(); rct::key ypow = rct::identity();
@ -981,8 +1042,7 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
// Now compute the basepoint's scalar multiplication // Now compute the basepoint's scalar multiplication
// Each of these could be written as a multiexp operation instead // Each of these could be written as a multiexp operation instead
rct::addKeys3(tmp, g_scalar, Gprecomp[i], h_scalar, Hprecomp[i]); addKeys3acc_p3(&inner_prod_p3, g_scalar, Gprecomp[i], h_scalar, Hprecomp[i]);
rct::addKeys(inner_prod, inner_prod, tmp);
if (i != MN-1) if (i != MN-1)
{ {
@ -990,6 +1050,8 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
sc_mul(ypow.bytes, ypow.bytes, y.bytes); sc_mul(ypow.bytes, ypow.bytes, y.bytes);
} }
} }
rct::key inner_prod;
ge_p3_tobytes(inner_prod.bytes, &inner_prod_p3);
PERF_TIMER_STOP(VERIFY_line_24_25); PERF_TIMER_STOP(VERIFY_line_24_25);
PERF_TIMER_START_BP(VERIFY_line_26); PERF_TIMER_START_BP(VERIFY_line_26);
@ -997,6 +1059,8 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
rct::key pprime; rct::key pprime;
sc_sub(tmp.bytes, rct::zero().bytes, proof.mu.bytes); sc_sub(tmp.bytes, rct::zero().bytes, proof.mu.bytes);
rct::addKeys(pprime, P, rct::scalarmultBase(tmp)); rct::addKeys(pprime, P, rct::scalarmultBase(tmp));
ge_p3 pprime_p3;
CHECK_AND_ASSERT_MES(ge_frombytes_vartime(&pprime_p3, pprime.bytes) == 0, false, "ge_frombytes_vartime failed");
for (size_t i = 0; i < rounds; ++i) for (size_t i = 0; i < rounds; ++i)
{ {
@ -1006,15 +1070,15 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
ge_dsmp cacheL, cacheR; ge_dsmp cacheL, cacheR;
rct::precomp(cacheL, proof.L[i]); rct::precomp(cacheL, proof.L[i]);
rct::precomp(cacheR, proof.R[i]); rct::precomp(cacheR, proof.R[i]);
rct::addKeys3(tmp, tmp, cacheL, tmp2, cacheR); addKeys3acc_p3(&pprime_p3, tmp, cacheL, tmp2, cacheR);
rct::addKeys(pprime, pprime, tmp);
#else #else
rct::addKeys(pprime, pprime, rct::scalarmultKey(proof.L[i], tmp)); rct::addKeys(pprime, pprime, rct::scalarmultKey(proof.L[i], tmp));
rct::addKeys(pprime, pprime, rct::scalarmultKey(proof.R[i], tmp2)); rct::addKeys(pprime, pprime, rct::scalarmultKey(proof.R[i], tmp2));
#endif #endif
} }
sc_mul(tmp.bytes, proof.t.bytes, x_ip.bytes); sc_mul(tmp.bytes, proof.t.bytes, x_ip.bytes);
rct::addKeys(pprime, pprime, rct::scalarmultKey(rct::H, tmp)); addKeys_acc_p3(&pprime_p3, tmp, rct::H);
ge_p3_tobytes(pprime.bytes, &pprime_p3);
PERF_TIMER_STOP(VERIFY_line_26); PERF_TIMER_STOP(VERIFY_line_26);
PERF_TIMER_START_BP(VERIFY_step2_check); PERF_TIMER_START_BP(VERIFY_step2_check);