From 4564a5d17b4c1a4507ac14dc96ad2bcaa675712c Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Tue, 7 Aug 2018 08:02:42 +0000 Subject: [PATCH 01/16] bulletproofs: speedup PROVE --- src/ringct/bulletproofs.cc | 122 ++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 64 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 381f50872..0e5b3b55f 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -127,15 +127,6 @@ static void sub_acc_p3(ge_p3 *acc_p3, const rct::key &point) ge_p1p1_to_p3(acc_p3, &p1); } -static rct::key scalarmultKey(const ge_p3 &P, const rct::key &a) -{ - ge_p2 R; - ge_scalarmult(&R, a.bytes, &P); - rct::key aP; - ge_tobytes(aP.bytes, &R); - return aP; -} - static rct::key get_exponent(const rct::key &base, size_t idx) { static const std::string salt("bulletproof"); @@ -193,23 +184,28 @@ static rct::key vector_exponent(const rct::keyV &a, const rct::keyV &b) } /* Compute a custom vector-scalar commitment */ -static rct::key vector_exponent_custom(const rct::keyV &A, const rct::keyV &B, const rct::keyV &a, const rct::keyV &b) +static rct::key cross_vector_exponent8(size_t size, const std::vector &A, size_t Ao, const std::vector &B, size_t Bo, const rct::keyV &a, size_t ao, const rct::keyV &b, size_t bo, const ge_p3 *extra_point, const rct::key *extra_scalar) { - CHECK_AND_ASSERT_THROW_MES(A.size() == B.size(), "Incompatible sizes of A and B"); - CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); - CHECK_AND_ASSERT_THROW_MES(a.size() == A.size(), "Incompatible sizes of a and A"); - CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN"); + CHECK_AND_ASSERT_THROW_MES(size + Ao <= A.size(), "Incompatible size for A"); + CHECK_AND_ASSERT_THROW_MES(size + Bo <= B.size(), "Incompatible size for B"); + CHECK_AND_ASSERT_THROW_MES(size + ao <= a.size(), "Incompatible size for a"); + CHECK_AND_ASSERT_THROW_MES(size + bo <= b.size(), "Incompatible size for b"); + CHECK_AND_ASSERT_THROW_MES(size <= maxN*maxM, "size is too large"); + CHECK_AND_ASSERT_THROW_MES(!!extra_point == !!extra_scalar, "only one of extra point/scalar present"); std::vector multiexp_data; - multiexp_data.reserve(a.size()*2); - for (size_t i = 0; i < a.size(); ++i) + multiexp_data.resize(size*2 + (!!extra_point)); + for (size_t i = 0; i < size; ++i) { - multiexp_data.resize(multiexp_data.size() + 1); - multiexp_data.back().scalar = a[i]; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, A[i].bytes) == 0, "ge_frombytes_vartime failed"); - multiexp_data.resize(multiexp_data.size() + 1); - multiexp_data.back().scalar = b[i]; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, B[i].bytes) == 0, "ge_frombytes_vartime failed"); + sc_mul(multiexp_data[i*2].scalar.bytes, a[ao+i].bytes, INV_EIGHT.bytes);; + multiexp_data[i*2].point = A[Ao+i]; + sc_mul(multiexp_data[i*2+1].scalar.bytes, b[bo+i].bytes, INV_EIGHT.bytes); + multiexp_data[i*2+1].point = B[Bo+i]; + } + if (extra_point) + { + sc_mul(multiexp_data.back().scalar.bytes, extra_scalar->bytes, INV_EIGHT.bytes); + multiexp_data.back().point = *extra_point; } return multiexp(multiexp_data, false); } @@ -273,16 +269,19 @@ static rct::keyV hadamard(const rct::keyV &a, const rct::keyV &b) return res; } -/* Given two curvepoint arrays, construct the Hadamard product */ -static rct::keyV hadamard2(const rct::keyV &a, const rct::keyV &b) +/* folds a curvepoint array using a two way scaled Hadamard product */ +static void hadamard_fold(std::vector &v, const rct::key &a, const rct::key &b) { - CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); - rct::keyV res(a.size()); - for (size_t i = 0; i < a.size(); ++i) + CHECK_AND_ASSERT_THROW_MES((v.size() & 1) == 0, "Vector size should be even"); + const size_t sz = v.size() / 2; + for (size_t n = 0; n < sz; ++n) { - rct::addKeys(res[i], a[i], b[i]); + ge_dsmp c[2]; + ge_dsm_precomp(c[0], &v[n]); + ge_dsm_precomp(c[1], &v[sz + n]); + ge_double_scalarmult_precomp_vartime2_p3(&v[n], a.bytes, c[0], b.bytes, c[1]); } - return res; + v.resize(sz); } /* Add two vectors */ @@ -326,17 +325,6 @@ static rct::keyV vector_dup(const rct::key &x, size_t N) return rct::keyV(N, x); } -/* Exponentiate a curve vector by a scalar */ -static rct::keyV vector_scalar2(const rct::keyV &a, const rct::key &x) -{ - rct::keyV res(a.size()); - for (size_t i = 0; i < a.size(); ++i) - { - rct::scalarmultKey(res[i], a[i], x); - } - return res; -} - /* Get the sum of a vector's elements */ static rct::key vector_sum(const rct::keyV &a) { @@ -620,16 +608,16 @@ try_again: // These are used in the inner product rounds size_t nprime = N; - rct::keyV Gprime(N); - rct::keyV Hprime(N); + std::vector Gprime(N); + std::vector Hprime(N); rct::keyV aprime(N); rct::keyV bprime(N); const rct::key yinv = invert(y); rct::key yinvpow = rct::identity(); for (size_t i = 0; i < N; ++i) { - Gprime[i] = Gi[i]; - Hprime[i] = scalarmultKey(Hi_p3[i], yinvpow); + Gprime[i] = Gi_p3[i]; + ge_scalarmult_p3(&Hprime[i], yinvpow.bytes, &Hi_p3[i]); sc_mul(yinvpow.bytes, yinvpow.bytes, yinv.bytes); aprime[i] = l[i]; bprime[i] = r[i]; @@ -652,14 +640,10 @@ try_again: rct::key cR = inner_product(slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); // PAPER LINES 18-19 - L[round] = vector_exponent_custom(slice(Gprime, nprime, Gprime.size()), slice(Hprime, 0, nprime), slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); sc_mul(tmp.bytes, cL.bytes, x_ip.bytes); - rct::addKeys(L[round], L[round], rct::scalarmultH(tmp)); - L[round] = rct::scalarmultKey(L[round], INV_EIGHT); - R[round] = vector_exponent_custom(slice(Gprime, 0, nprime), slice(Hprime, nprime, Hprime.size()), slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); + L[round] = cross_vector_exponent8(nprime, Gprime, nprime, Hprime, 0, aprime, 0, bprime, nprime, &ge_p3_H, &tmp); sc_mul(tmp.bytes, cR.bytes, x_ip.bytes); - rct::addKeys(R[round], R[round], rct::scalarmultH(tmp)); - R[round] = rct::scalarmultKey(R[round], INV_EIGHT); + R[round] = cross_vector_exponent8(nprime, Gprime, 0, Hprime, nprime, aprime, nprime, bprime, 0, &ge_p3_H, &tmp); // PAPER LINES 21-22 w[round] = hash_cache_mash(hash_cache, L[round], R[round]); @@ -672,8 +656,11 @@ try_again: // PAPER LINES 24-25 const rct::key winv = invert(w[round]); - Gprime = hadamard2(vector_scalar2(slice(Gprime, 0, nprime), winv), vector_scalar2(slice(Gprime, nprime, Gprime.size()), w[round])); - Hprime = hadamard2(vector_scalar2(slice(Hprime, 0, nprime), w[round]), vector_scalar2(slice(Hprime, nprime, Hprime.size()), winv)); + if (nprime > 1) + { + hadamard_fold(Gprime, winv, w[round]); + hadamard_fold(Hprime, w[round], winv); + } // PAPER LINES 28-29 aprime = vector_add(vector_scalar(slice(aprime, 0, nprime), w[round]), vector_scalar(slice(aprime, nprime, aprime.size()), winv)); @@ -914,16 +901,16 @@ try_again: // These are used in the inner product rounds size_t nprime = MN; - rct::keyV Gprime(MN); - rct::keyV Hprime(MN); + std::vector Gprime(MN); + std::vector Hprime(MN); rct::keyV aprime(MN); rct::keyV bprime(MN); const rct::key yinv = invert(y); rct::key yinvpow = rct::identity(); for (size_t i = 0; i < MN; ++i) { - Gprime[i] = Gi[i]; - Hprime[i] = scalarmultKey(Hi_p3[i], yinvpow); + Gprime[i] = Gi_p3[i]; + ge_scalarmult_p3(&Hprime[i], yinvpow.bytes, &Hi_p3[i]); sc_mul(yinvpow.bytes, yinvpow.bytes, yinv.bytes); aprime[i] = l[i]; bprime[i] = r[i]; @@ -942,18 +929,18 @@ try_again: nprime /= 2; // PAPER LINES 16-17 + PERF_TIMER_START_BP(PROVE_inner_product); rct::key cL = inner_product(slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); rct::key cR = inner_product(slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); + PERF_TIMER_STOP(PROVE_inner_product); // PAPER LINES 18-19 - L[round] = vector_exponent_custom(slice(Gprime, nprime, Gprime.size()), slice(Hprime, 0, nprime), slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); + PERF_TIMER_START_BP(PROVE_LR); sc_mul(tmp.bytes, cL.bytes, x_ip.bytes); - rct::addKeys(L[round], L[round], rct::scalarmultH(tmp)); - L[round] = rct::scalarmultKey(L[round], INV_EIGHT); - R[round] = vector_exponent_custom(slice(Gprime, 0, nprime), slice(Hprime, nprime, Hprime.size()), slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); + L[round] = cross_vector_exponent8(nprime, Gprime, nprime, Hprime, 0, aprime, 0, bprime, nprime, &ge_p3_H, &tmp); sc_mul(tmp.bytes, cR.bytes, x_ip.bytes); - rct::addKeys(R[round], R[round], rct::scalarmultH(tmp)); - R[round] = rct::scalarmultKey(R[round], INV_EIGHT); + R[round] = cross_vector_exponent8(nprime, Gprime, 0, Hprime, nprime, aprime, nprime, bprime, 0, &ge_p3_H, &tmp); + PERF_TIMER_STOP(PROVE_LR); // PAPER LINES 21-22 w[round] = hash_cache_mash(hash_cache, L[round], R[round]); @@ -966,12 +953,19 @@ try_again: // PAPER LINES 24-25 const rct::key winv = invert(w[round]); - Gprime = hadamard2(vector_scalar2(slice(Gprime, 0, nprime), winv), vector_scalar2(slice(Gprime, nprime, Gprime.size()), w[round])); - Hprime = hadamard2(vector_scalar2(slice(Hprime, 0, nprime), w[round]), vector_scalar2(slice(Hprime, nprime, Hprime.size()), winv)); + if (nprime > 1) + { + PERF_TIMER_START_BP(PROVE_hadamard2); + hadamard_fold(Gprime, winv, w[round]); + hadamard_fold(Hprime, w[round], winv); + PERF_TIMER_STOP(PROVE_hadamard2); + } // PAPER LINES 28-29 + PERF_TIMER_START_BP(PROVE_prime); aprime = vector_add(vector_scalar(slice(aprime, 0, nprime), w[round]), vector_scalar(slice(aprime, nprime, aprime.size()), winv)); bprime = vector_add(vector_scalar(slice(bprime, 0, nprime), winv), vector_scalar(slice(bprime, nprime, bprime.size()), w[round])); + PERF_TIMER_STOP(PROVE_prime); ++round; } From a49a17618fcb4c734248dcc06954c86dcc3fc5cd Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Tue, 7 Aug 2018 09:59:14 +0000 Subject: [PATCH 02/16] bulletproofs: shave off a lot of scalar muls from the g/h construction --- src/ringct/bulletproofs.cc | 59 +++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 0e5b3b55f..8a6ea2dce 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -1138,41 +1138,60 @@ bool bulletproof_VERIFY(const std::vector &proofs) winv[i] = invert(w[i]); PERF_TIMER_STOP(VERIFY_line_24_25_invert); + // precalc + PERF_TIMER_START_BP(VERIFY_line_24_25_precalc); + rct::keyV w_cache(1< 0; --s) + { + sc_mul(w_cache[s].bytes, w_cache[s/2].bytes, w[j].bytes); + sc_mul(w_cache[s-1].bytes, w_cache[s/2].bytes, winv[j].bytes); + } + } + PERF_TIMER_STOP(VERIFY_line_24_25_precalc); + for (size_t i = 0; i < MN; ++i) { - // Convert the index to binary IN REVERSE and construct the scalar exponent rct::key g_scalar = proof.a; rct::key h_scalar; - sc_mul(h_scalar.bytes, proof.b.bytes, yinvpow.bytes); + if (i == 0) + h_scalar = proof.b; + else + sc_mul(h_scalar.bytes, proof.b.bytes, yinvpow.bytes); - for (size_t j = rounds; j-- > 0; ) - { - size_t J = w.size() - j - 1; - - if ((i & (((size_t)1)< Date: Wed, 8 Aug 2018 12:14:13 +0000 Subject: [PATCH 03/16] bulletproofs: random minor speedups --- src/ringct/bulletproofs.cc | 78 +++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 8a6ea2dce..2d2e6af36 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -93,18 +93,6 @@ static bool is_reduced(const rct::key &scalar) return scalar == reduced; } -static void addKeys_acc_p3(ge_p3 *acc_p3, const rct::key &a, const rct::key &point) -{ - ge_p3 p3; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&p3, point.bytes) == 0, "ge_frombytes_vartime failed"); - ge_scalarmult_p3(&p3, a.bytes, &p3); - ge_cached cached; - ge_p3_to_cached(&cached, acc_p3); - ge_p1p1 p1; - ge_add(&p1, &p3, &cached); - ge_p1p1_to_p3(acc_p3, &p1); -} - static void add_acc_p3(ge_p3 *acc_p3, const rct::key &point) { ge_p3 p3; @@ -435,10 +423,13 @@ Bulletproof bulletproof_PROVE(const rct::key &sv, const rct::key &gamma) rct::key V; rct::keyV aL(N), aR(N); + rct::key tmp, tmp2; PERF_TIMER_START_BP(PROVE_v); - rct::addKeys2(V, gamma, sv, rct::H); - V = rct::scalarmultKey(V, INV_EIGHT); + rct::key gamma8, sv8; + sc_mul(gamma8.bytes, gamma.bytes, INV_EIGHT.bytes); + sc_mul(sv8.bytes, sv.bytes, INV_EIGHT.bytes); + rct::addKeys2(V, gamma8, sv8, rct::H); PERF_TIMER_STOP(PROVE_v); PERF_TIMER_START_BP(PROVE_aLaR); @@ -515,7 +506,6 @@ try_again: const auto yN = vector_powers(y, N); rct::key ip1y = vector_sum(yN); - rct::key tmp; sc_muladd(t0.bytes, z.bytes, ip1y.bytes, t0.bytes); rct::key zsq; @@ -563,10 +553,16 @@ try_again: // PAPER LINES 47-48 rct::key tau1 = rct::skGen(), tau2 = rct::skGen(); - rct::key T1 = rct::addKeys(rct::scalarmultH(t1), rct::scalarmultBase(tau1)); - T1 = rct::scalarmultKey(T1, INV_EIGHT); - rct::key T2 = rct::addKeys(rct::scalarmultH(t2), rct::scalarmultBase(tau2)); - T2 = rct::scalarmultKey(T2, INV_EIGHT); + rct::key T1, T2; + ge_p3 p3; + sc_mul(tmp.bytes, t1.bytes, INV_EIGHT.bytes); + sc_mul(tmp2.bytes, tau1.bytes, INV_EIGHT.bytes); + ge_double_scalarmult_base_vartime_p3(&p3, tmp.bytes, &ge_p3_H, tmp2.bytes); + ge_p3_tobytes(T1.bytes, &p3); + sc_mul(tmp.bytes, t2.bytes, INV_EIGHT.bytes); + sc_mul(tmp2.bytes, tau2.bytes, INV_EIGHT.bytes); + ge_double_scalarmult_base_vartime_p3(&p3, tmp.bytes, &ge_p3_H, tmp2.bytes); + ge_p3_tobytes(T2.bytes, &p3); // PAPER LINES 49-51 rct::key x = hash_cache_mash(hash_cache, z, T1, T2); @@ -671,7 +667,7 @@ try_again: PERF_TIMER_STOP(PROVE_step4); // PAPER LINE 58 (with inclusions from PAPER LINE 8 and PAPER LINE 20) - return Bulletproof(V, A, S, T1, T2, taux, mu, L, R, aprime[0], bprime[0], t); + return Bulletproof(V, A, S, T1, T2, taux, mu, std::move(L), std::move(R), aprime[0], bprime[0], t); } Bulletproof bulletproof_PROVE(uint64_t v, const rct::key &gamma) @@ -715,13 +711,15 @@ Bulletproof bulletproof_PROVE(const rct::keyV &sv, const rct::keyV &gamma) rct::keyV V(sv.size()); rct::keyV aL(MN), aR(MN); - rct::key tmp; + rct::key tmp, tmp2; PERF_TIMER_START_BP(PROVE_v); for (size_t i = 0; i < sv.size(); ++i) { - rct::addKeys2(V[i], gamma[i], sv[i], rct::H); - V[i] = rct::scalarmultKey(V[i], INV_EIGHT); + rct::key gamma8, sv8; + sc_mul(gamma8.bytes, gamma[i].bytes, INV_EIGHT.bytes); + sc_mul(sv8.bytes, sv[i].bytes, INV_EIGHT.bytes); + rct::addKeys2(V[i], gamma8, sv8, rct::H); } PERF_TIMER_STOP(PROVE_v); @@ -843,10 +841,16 @@ try_again: // PAPER LINES 47-48 rct::key tau1 = rct::skGen(), tau2 = rct::skGen(); - rct::key T1 = rct::addKeys(rct::scalarmultH(t1), rct::scalarmultBase(tau1)); - T1 = rct::scalarmultKey(T1, INV_EIGHT); - rct::key T2 = rct::addKeys(rct::scalarmultH(t2), rct::scalarmultBase(tau2)); - T2 = rct::scalarmultKey(T2, INV_EIGHT); + rct::key T1, T2; + ge_p3 p3; + sc_mul(tmp.bytes, t1.bytes, INV_EIGHT.bytes); + sc_mul(tmp2.bytes, tau1.bytes, INV_EIGHT.bytes); + ge_double_scalarmult_base_vartime_p3(&p3, tmp.bytes, &ge_p3_H, tmp2.bytes); + ge_p3_tobytes(T1.bytes, &p3); + sc_mul(tmp.bytes, t2.bytes, INV_EIGHT.bytes); + sc_mul(tmp2.bytes, tau2.bytes, INV_EIGHT.bytes); + ge_double_scalarmult_base_vartime_p3(&p3, tmp.bytes, &ge_p3_H, tmp2.bytes); + ge_p3_tobytes(T2.bytes, &p3); // PAPER LINES 49-51 rct::key x = hash_cache_mash(hash_cache, z, T1, T2); @@ -972,7 +976,7 @@ try_again: PERF_TIMER_STOP(PROVE_step4); // PAPER LINE 58 (with inclusions from PAPER LINE 8 and PAPER LINE 20) - return Bulletproof(V, A, S, T1, T2, taux, mu, L, R, aprime[0], bprime[0], t); + return Bulletproof(std::move(V), A, S, T1, T2, taux, mu, std::move(L), std::move(R), aprime[0], bprime[0], t); } Bulletproof bulletproof_PROVE(const std::vector &v, const rct::keyV &gamma) @@ -1034,10 +1038,10 @@ bool bulletproof_VERIFY(const std::vector &proofs) // setup weighted aggregates rct::key Z0 = rct::identity(); rct::key z1 = rct::zero(); - rct::key Z2 = rct::identity(); + rct::key &Z2 = Z0; rct::key z3 = rct::zero(); rct::keyV z4(maxMN, rct::zero()), z5(maxMN, rct::zero()); - rct::key Y2 = rct::identity(), Y3 = rct::identity(), Y4 = rct::identity(); + rct::key Y2 = rct::identity(), &Y3 = Y2, &Y4 = Y2; rct::key y0 = rct::zero(), y1 = rct::zero(); for (const Bulletproof *p: proofs) { @@ -1224,11 +1228,8 @@ bool bulletproof_VERIFY(const std::vector &proofs) // now check all proofs at once PERF_TIMER_START_BP(VERIFY_step2_check); ge_p3 check1; - ge_scalarmult_base(&check1, y0.bytes); - addKeys_acc_p3(&check1, y1, rct::H); + ge_double_scalarmult_base_vartime_p3(&check1, y1.bytes, &ge_p3_H, y0.bytes); sub_acc_p3(&check1, Y2); - sub_acc_p3(&check1, Y3); - sub_acc_p3(&check1, Y4); if (!ge_p3_is_point_at_infinity(&check1)) { MERROR("Verification failure at step 1"); @@ -1238,18 +1239,15 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_sub(tmp.bytes, rct::zero().bytes, z1.bytes); ge_double_scalarmult_base_vartime_p3(&check2, z3.bytes, &ge_p3_H, tmp.bytes); add_acc_p3(&check2, Z0); - add_acc_p3(&check2, Z2); std::vector multiexp_data; multiexp_data.reserve(2 * maxMN); for (size_t i = 0; i < maxMN; ++i) { - sc_sub(tmp.bytes, rct::zero().bytes, z4[i].bytes); - multiexp_data.emplace_back(tmp, Gi_p3[i]); - sc_sub(tmp.bytes, rct::zero().bytes, z5[i].bytes); - multiexp_data.emplace_back(tmp, Hi_p3[i]); + multiexp_data.emplace_back(z4[i], Gi_p3[i]); + multiexp_data.emplace_back(z5[i], Hi_p3[i]); } - add_acc_p3(&check2, multiexp(multiexp_data, true)); + sub_acc_p3(&check2, multiexp(multiexp_data, true)); PERF_TIMER_STOP(VERIFY_step2_check); if (!ge_p3_is_point_at_infinity(&check2)) From 484155d043d661ae1a3d7a196b2354719b90485a Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Wed, 8 Aug 2018 15:01:41 +0000 Subject: [PATCH 04/16] bulletproofs: some more speedup --- src/ringct/bulletproofs.cc | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 2d2e6af36..5c75e6418 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -423,6 +423,7 @@ Bulletproof bulletproof_PROVE(const rct::key &sv, const rct::key &gamma) rct::key V; rct::keyV aL(N), aR(N); + rct::keyV aL8(N), aR8(N); rct::key tmp, tmp2; PERF_TIMER_START_BP(PROVE_v); @@ -438,12 +439,15 @@ Bulletproof bulletproof_PROVE(const rct::key &sv, const rct::key &gamma) if (sv[i/8] & (((uint64_t)1)<<(i%8))) { aL[i] = rct::identity(); + aL8[i] = INV_EIGHT; + aR[i] = aR8[i] = rct::zero(); } else { - aL[i] = rct::zero(); + aL[i] = aL8[i] = rct::zero(); + aR[i] = MINUS_ONE; + aR8[i] = MINUS_INV_EIGHT; } - sc_sub(aR[i].bytes, aL[i].bytes, rct::identity().bytes); } PERF_TIMER_STOP(PROVE_aLaR); @@ -469,10 +473,10 @@ try_again: PERF_TIMER_START_BP(PROVE_step1); // PAPER LINES 38-39 rct::key alpha = rct::skGen(); - rct::key ve = vector_exponent(aL, aR); + rct::key ve = vector_exponent(aL8, aR8); rct::key A; - rct::addKeys(A, ve, rct::scalarmultBase(alpha)); - A = rct::scalarmultKey(A, INV_EIGHT); + sc_mul(tmp.bytes, alpha.bytes, INV_EIGHT.bytes); + rct::addKeys(A, ve, rct::scalarmultBase(tmp)); // PAPER LINES 40-42 rct::keyV sL = rct::skvGen(N), sR = rct::skvGen(N); @@ -711,6 +715,7 @@ Bulletproof bulletproof_PROVE(const rct::keyV &sv, const rct::keyV &gamma) rct::keyV V(sv.size()); rct::keyV aL(MN), aR(MN); + rct::keyV aL8(MN), aR8(MN); rct::key tmp, tmp2; PERF_TIMER_START_BP(PROVE_v); @@ -728,19 +733,18 @@ Bulletproof bulletproof_PROVE(const rct::keyV &sv, const rct::keyV &gamma) { for (size_t i = N; i-- > 0; ) { - if (j >= sv.size()) - { - aL[j*N+i] = rct::zero(); - } - else if (sv[j][i/8] & (((uint64_t)1)<<(i%8))) + if (j < sv.size() && (sv[j][i/8] & (((uint64_t)1)<<(i%8)))) { aL[j*N+i] = rct::identity(); + aL8[j*N+i] = INV_EIGHT; + aR[j*N+i] = aR8[j*N+i] = rct::zero(); } else { - aL[j*N+i] = rct::zero(); + aL[j*N+i] = aL8[j*N+i] = rct::zero(); + aR[j*N+i] = MINUS_ONE; + aR8[j*N+i] = MINUS_INV_EIGHT; } - sc_sub(aR[j*N+i].bytes, aL[j*N+i].bytes, rct::identity().bytes); } } PERF_TIMER_STOP(PROVE_aLaR); @@ -771,10 +775,10 @@ try_again: PERF_TIMER_START_BP(PROVE_step1); // PAPER LINES 38-39 rct::key alpha = rct::skGen(); - rct::key ve = vector_exponent(aL, aR); + rct::key ve = vector_exponent(aL8, aR8); rct::key A; - rct::addKeys(A, ve, rct::scalarmultBase(alpha)); - A = rct::scalarmultKey(A, INV_EIGHT); + sc_mul(tmp.bytes, alpha.bytes, INV_EIGHT.bytes); + rct::addKeys(A, ve, rct::scalarmultBase(tmp)); // PAPER LINES 40-42 rct::keyV sL = rct::skvGen(MN), sR = rct::skvGen(MN); From a281b950bff73fc715554d00ec32292ef97b56ec Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Wed, 8 Aug 2018 18:39:31 +0000 Subject: [PATCH 05/16] bulletproofs: remove single value prover It is now expressed in terms of the array prover --- src/ringct/bulletproofs.cc | 284 +------------------------------------ src/ringct/rctSigs.cpp | 9 -- 2 files changed, 2 insertions(+), 291 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 5c75e6418..09d22c6d1 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -313,17 +313,6 @@ static rct::keyV vector_dup(const rct::key &x, size_t N) return rct::keyV(N, x); } -/* Get the sum of a vector's elements */ -static rct::key vector_sum(const rct::keyV &a) -{ - rct::key res = rct::zero(); - for (size_t i = 0; i < a.size(); ++i) - { - sc_add(res.bytes, res.bytes, a[i].bytes); - } - return res; -} - static rct::key switch_endianness(rct::key k) { std::reverse(k.bytes, k.bytes + sizeof(k)); @@ -414,281 +403,12 @@ static rct::key hash_cache_mash(rct::key &hash_cache, const rct::key &mash0, con /* Given a value v (0..2^N-1) and a mask gamma, construct a range proof */ Bulletproof bulletproof_PROVE(const rct::key &sv, const rct::key &gamma) { - init_exponents(); - - PERF_TIMER_UNIT(PROVE, 1000000); - - constexpr size_t logN = 6; // log2(64) - constexpr size_t N = 1< 0; ) - { - if (sv[i/8] & (((uint64_t)1)<<(i%8))) - { - aL[i] = rct::identity(); - aL8[i] = INV_EIGHT; - aR[i] = aR8[i] = rct::zero(); - } - else - { - aL[i] = aL8[i] = rct::zero(); - aR[i] = MINUS_ONE; - aR8[i] = MINUS_INV_EIGHT; - } - } - PERF_TIMER_STOP(PROVE_aLaR); - - rct::key hash_cache = rct::hash_to_scalar(V); - - // DEBUG: Test to ensure this recovers the value -#ifdef DEBUG_BP - uint64_t test_aL = 0, test_aR = 0; - for (size_t i = 0; i < N; ++i) - { - if (aL[i] == rct::identity()) - test_aL += ((uint64_t)1)< Gprime(N); - std::vector Hprime(N); - rct::keyV aprime(N); - rct::keyV bprime(N); - const rct::key yinv = invert(y); - rct::key yinvpow = rct::identity(); - for (size_t i = 0; i < N; ++i) - { - Gprime[i] = Gi_p3[i]; - ge_scalarmult_p3(&Hprime[i], yinvpow.bytes, &Hi_p3[i]); - sc_mul(yinvpow.bytes, yinvpow.bytes, yinv.bytes); - aprime[i] = l[i]; - bprime[i] = r[i]; - } - rct::keyV L(logN); - rct::keyV R(logN); - int round = 0; - rct::keyV w(logN); // this is the challenge x in the inner product protocol - PERF_TIMER_STOP(PROVE_step3); - - PERF_TIMER_START_BP(PROVE_step4); - // PAPER LINE 13 - while (nprime > 1) - { - // PAPER LINE 15 - nprime /= 2; - - // PAPER LINES 16-17 - rct::key cL = inner_product(slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); - rct::key cR = inner_product(slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); - - // PAPER LINES 18-19 - sc_mul(tmp.bytes, cL.bytes, x_ip.bytes); - L[round] = cross_vector_exponent8(nprime, Gprime, nprime, Hprime, 0, aprime, 0, bprime, nprime, &ge_p3_H, &tmp); - sc_mul(tmp.bytes, cR.bytes, x_ip.bytes); - R[round] = cross_vector_exponent8(nprime, Gprime, 0, Hprime, nprime, aprime, nprime, bprime, 0, &ge_p3_H, &tmp); - - // PAPER LINES 21-22 - w[round] = hash_cache_mash(hash_cache, L[round], R[round]); - if (w[round] == rct::zero()) - { - PERF_TIMER_STOP(PROVE_step4); - MINFO("w[round] is 0, trying again"); - goto try_again; - } - - // PAPER LINES 24-25 - const rct::key winv = invert(w[round]); - if (nprime > 1) - { - hadamard_fold(Gprime, winv, w[round]); - hadamard_fold(Hprime, w[round], winv); - } - - // PAPER LINES 28-29 - aprime = vector_add(vector_scalar(slice(aprime, 0, nprime), w[round]), vector_scalar(slice(aprime, nprime, aprime.size()), winv)); - bprime = vector_add(vector_scalar(slice(bprime, 0, nprime), winv), vector_scalar(slice(bprime, nprime, bprime.size()), w[round])); - - ++round; - } - PERF_TIMER_STOP(PROVE_step4); - - // PAPER LINE 58 (with inclusions from PAPER LINE 8 and PAPER LINE 20) - return Bulletproof(V, A, S, T1, T2, taux, mu, std::move(L), std::move(R), aprime[0], bprime[0], t); + return bulletproof_PROVE(rct::keyV(1, sv), rct::keyV(1, gamma)); } Bulletproof bulletproof_PROVE(uint64_t v, const rct::key &gamma) { - // vG + gammaH - PERF_TIMER_START_BP(PROVE_v); - rct::key sv = rct::zero(); - sv.bytes[0] = v & 255; - sv.bytes[1] = (v >> 8) & 255; - sv.bytes[2] = (v >> 16) & 255; - sv.bytes[3] = (v >> 24) & 255; - sv.bytes[4] = (v >> 32) & 255; - sv.bytes[5] = (v >> 40) & 255; - sv.bytes[6] = (v >> 48) & 255; - sv.bytes[7] = (v >> 56) & 255; - PERF_TIMER_STOP(PROVE_v); - return bulletproof_PROVE(sv, gamma); + return bulletproof_PROVE(std::vector(1, v), rct::keyV(1, gamma)); } /* Given a set of values v (0..2^N-1) and masks gamma, construct a range proof */ diff --git a/src/ringct/rctSigs.cpp b/src/ringct/rctSigs.cpp index 0d1789a38..5ec9a1750 100644 --- a/src/ringct/rctSigs.cpp +++ b/src/ringct/rctSigs.cpp @@ -45,15 +45,6 @@ using namespace std; #define CHECK_AND_ASSERT_MES_L1(expr, ret, message) {if(!(expr)) {MCERROR("verify", message); return ret;}} namespace rct { - Bulletproof proveRangeBulletproof(key &C, key &mask, uint64_t amount) - { - mask = rct::skGen(); - Bulletproof proof = bulletproof_PROVE(amount, mask); - CHECK_AND_ASSERT_THROW_MES(proof.V.size() == 1, "V has not exactly one element"); - C = proof.V[0]; - return proof; - } - Bulletproof proveRangeBulletproof(keyV &C, keyV &masks, const std::vector &amounts) { masks = rct::skvGen(amounts.size()); From c415df97bd20b2bcc999c0c0056bb32e9b102a5b Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Wed, 8 Aug 2018 21:19:57 +0000 Subject: [PATCH 06/16] performance_tests: sc_check and ge_dsm_precomp --- tests/performance_tests/CMakeLists.txt | 2 + tests/performance_tests/crypto_ops.h | 3 ++ tests/performance_tests/main.cpp | 3 ++ tests/performance_tests/sc_check.h | 52 ++++++++++++++++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 tests/performance_tests/sc_check.h diff --git a/tests/performance_tests/CMakeLists.txt b/tests/performance_tests/CMakeLists.txt index 837d39bd3..5cd054d86 100644 --- a/tests/performance_tests/CMakeLists.txt +++ b/tests/performance_tests/CMakeLists.txt @@ -46,6 +46,8 @@ set(performance_tests_headers range_proof.h bulletproof.h crypto_ops.h + sc_reduce32.h + sc_check.h multiexp.h multi_tx_test_base.h performance_tests.h diff --git a/tests/performance_tests/crypto_ops.h b/tests/performance_tests/crypto_ops.h index 3c68583c5..4766a1205 100644 --- a/tests/performance_tests/crypto_ops.h +++ b/tests/performance_tests/crypto_ops.h @@ -47,6 +47,7 @@ enum test_op op_scalarmultKey, op_scalarmultH, op_scalarmult8, + op_ge_dsm_precomp, op_ge_double_scalarmult_base_vartime, op_ge_double_scalarmult_precomp_vartime, op_ge_double_scalarmult_precomp_vartime2, @@ -84,6 +85,7 @@ public: ge_cached tmp_cached; ge_p1p1 tmp_p1p1; ge_p2 tmp_p2; + ge_dsmp dsmp; switch (op) { case op_sc_add: sc_add(key.bytes, scalar0.bytes, scalar1.bytes); break; @@ -101,6 +103,7 @@ public: case op_scalarmultKey: rct::scalarmultKey(point0, scalar0); break; case op_scalarmultH: rct::scalarmultH(scalar0); break; case op_scalarmult8: rct::scalarmult8(point0); break; + case op_ge_dsm_precomp: ge_dsm_precomp(dsmp, &p3_0); break; case op_ge_double_scalarmult_base_vartime: ge_double_scalarmult_base_vartime(&tmp_p2, scalar0.bytes, &p3_0, scalar1.bytes); break; case op_ge_double_scalarmult_precomp_vartime: ge_double_scalarmult_precomp_vartime(&tmp_p2, scalar0.bytes, &p3_0, scalar1.bytes, precomp0); break; case op_ge_double_scalarmult_precomp_vartime2: ge_double_scalarmult_precomp_vartime2(&tmp_p2, scalar0.bytes, precomp0, scalar1.bytes, precomp1); break; diff --git a/tests/performance_tests/main.cpp b/tests/performance_tests/main.cpp index 87a1573c2..3765d1249 100644 --- a/tests/performance_tests/main.cpp +++ b/tests/performance_tests/main.cpp @@ -50,6 +50,7 @@ #include "is_out_to_acc.h" #include "subaddress_expand.h" #include "sc_reduce32.h" +#include "sc_check.h" #include "cn_fast_hash.h" #include "rct_mlsag.h" #include "equality.h" @@ -184,6 +185,7 @@ int main(int argc, char** argv) TEST_PERFORMANCE0(filter, p, test_ge_frombytes_vartime); TEST_PERFORMANCE0(filter, p, test_generate_keypair); TEST_PERFORMANCE0(filter, p, test_sc_reduce32); + TEST_PERFORMANCE0(filter, p, test_sc_check); TEST_PERFORMANCE1(filter, p, test_signature, false); TEST_PERFORMANCE1(filter, p, test_signature, true); @@ -249,6 +251,7 @@ int main(int argc, char** argv) TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_scalarmultKey); TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_scalarmultH); TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_scalarmult8); + TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_ge_dsm_precomp); TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_ge_double_scalarmult_base_vartime); TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_ge_double_scalarmult_precomp_vartime); TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_ge_double_scalarmult_precomp_vartime2); diff --git a/tests/performance_tests/sc_check.h b/tests/performance_tests/sc_check.h new file mode 100644 index 000000000..036abf12d --- /dev/null +++ b/tests/performance_tests/sc_check.h @@ -0,0 +1,52 @@ +// Copyright (c) 2018, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "crypto/crypto.h" + +class test_sc_check +{ +public: + static const size_t loop_count = 10000000; + + bool init() + { + m_scalar = crypto::rand(); + return true; + } + + bool test() + { + sc_check((unsigned char*)m_scalar.data); + return true; + } + +private: + crypto::ec_scalar m_scalar; +}; From bf8e4b98709ffcf133ea875f3fbf7f99c28e52db Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Wed, 8 Aug 2018 21:20:50 +0000 Subject: [PATCH 07/16] bulletproofs: some more minor speedup --- src/ringct/bulletproofs.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 09d22c6d1..f22a109e9 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -86,11 +86,9 @@ static inline rct::key multiexp(const std::vector &data, bool HiGi return data.size() <= 64 ? straus(data, NULL, 0) : pippenger(data, NULL, get_pippenger_c(data.size())); } -static bool is_reduced(const rct::key &scalar) +static inline bool is_reduced(const rct::key &scalar) { - rct::key reduced = scalar; - sc_reduce32(reduced.bytes); - return scalar == reduced; + return sc_check(scalar.bytes) == 0; } static void add_acc_p3(ge_p3 *acc_p3, const rct::key &point) @@ -139,8 +137,8 @@ static void init_exponents() Gi[i] = get_exponent(rct::H, i * 2 + 1); CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&Gi_p3[i], Gi[i].bytes) == 0, "ge_frombytes_vartime failed"); - data.push_back({rct::zero(), Gi[i]}); - data.push_back({rct::zero(), Hi[i]}); + data.push_back({rct::zero(), Gi_p3[i]}); + data.push_back({rct::zero(), Hi_p3[i]}); } straus_HiGi_cache = straus_init_cache(data, STRAUS_SIZE_LIMIT); From 4061960a16d27f803ca54b8887f46185c29c336e Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Mon, 13 Aug 2018 11:18:54 +0000 Subject: [PATCH 08/16] multiexp: pack the digits table when STRAUS_C is 4 Spotted by stoffu --- src/ringct/multiexp.cc | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc index 21957b94c..fb2f18551 100644 --- a/src/ringct/multiexp.cc +++ b/src/ringct/multiexp.cc @@ -320,7 +320,7 @@ rct::key bos_coster_heap_conv_robust(std::vector data) return res; } -static constexpr unsigned int STRAUS_C = 4; +#define STRAUS_C 4 struct straus_cached_data { @@ -447,26 +447,23 @@ rct::key straus(const std::vector &data, const std::shared_ptr digits{new uint8_t[64 * data.size()]}; +#else std::unique_ptr digits{new uint8_t[256 * data.size()]}; +#endif for (size_t j = 0; j < data.size(); ++j) { unsigned char bytes33[33]; memcpy(bytes33, data[j].scalar.bytes, 32); bytes33[32] = 0; const unsigned char *bytes = bytes33; -#if 1 - static_assert(STRAUS_C == 4, "optimized version needs STRAUS_C == 4"); +#if STRAUS_C==4 unsigned int i; - for (i = 0; i < 256; i += 8, bytes++) + for (i = 0; i < 64; i += 2, bytes++) { - digits[j*256+i] = bytes[0] & 0xf; - digits[j*256+i+1] = (bytes[0] >> 1) & 0xf; - digits[j*256+i+2] = (bytes[0] >> 2) & 0xf; - digits[j*256+i+3] = (bytes[0] >> 3) & 0xf; - digits[j*256+i+4] = ((bytes[0] >> 4) | (bytes[1]<<4)) & 0xf; - digits[j*256+i+5] = ((bytes[0] >> 5) | (bytes[1]<<3)) & 0xf; - digits[j*256+i+6] = ((bytes[0] >> 6) | (bytes[1]<<2)) & 0xf; - digits[j*256+i+7] = ((bytes[0] >> 7) | (bytes[1]<<1)) & 0xf; + digits[j*64+i] = bytes[0] & 0xf; + digits[j*64+i+1] = bytes[0] >> 4; } #elif 1 for (size_t i = 0; i < 256; ++i) @@ -521,7 +518,11 @@ skipfirst: if (skip[j]) continue; #endif +#if STRAUS_C==4 + const uint8_t digit = digits[j*64+i/4]; +#else const uint8_t digit = digits[j*256+i]; +#endif if (digit) { ge_add(&p1, &band_p3, &CACHE_OFFSET(local_cache, j, digit)); From fc9f7d9c81acfe19fdf1285e690d4dfe5ee9d172 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Thu, 23 Aug 2018 18:52:05 +0000 Subject: [PATCH 09/16] bulletproofs: merge multiexps as per sarang's new python code --- src/ringct/bulletproofs.cc | 106 +++++++++++++------------------------ 1 file changed, 38 insertions(+), 68 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index f22a109e9..549e52296 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -91,28 +91,6 @@ static inline bool is_reduced(const rct::key &scalar) return sc_check(scalar.bytes) == 0; } -static void add_acc_p3(ge_p3 *acc_p3, const rct::key &point) -{ - ge_p3 p3; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&p3, point.bytes) == 0, "ge_frombytes_vartime failed"); - ge_cached cached; - ge_p3_to_cached(&cached, &p3); - ge_p1p1 p1; - ge_add(&p1, acc_p3, &cached); - ge_p1p1_to_p3(acc_p3, &p1); -} - -static void sub_acc_p3(ge_p3 *acc_p3, const rct::key &point) -{ - ge_p3 p3; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&p3, point.bytes) == 0, "ge_frombytes_vartime failed"); - ge_cached cached; - ge_p3_to_cached(&cached, &p3); - ge_p1p1 p1; - ge_sub(&p1, acc_p3, &cached); - ge_p1p1_to_p3(acc_p3, &p1); -} - static rct::key get_exponent(const rct::key &base, size_t idx) { static const std::string salt("bulletproof"); @@ -733,6 +711,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) // sanity and figure out which proof is longest size_t max_length = 0; + size_t nV = 0; for (const Bulletproof *p: proofs) { const Bulletproof &proof = *p; @@ -749,6 +728,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) CHECK_AND_ASSERT_MES(proof.L.size() > 0, false, "Empty proof"); max_length = std::max(max_length, proof.L.size()); + nV += proof.V.size(); } CHECK_AND_ASSERT_MES(max_length < 32, false, "At least one proof is too large"); size_t maxMN = 1u << max_length; @@ -757,13 +737,13 @@ bool bulletproof_VERIFY(const std::vector &proofs) const size_t N = 1 << logN; rct::key tmp; + std::vector multiexp_data; + multiexp_data.reserve(nV + (2 * (10/*logM*/ + logN) + 4) * proofs.size() + 2 * maxMN); + // setup weighted aggregates - rct::key Z0 = rct::identity(); rct::key z1 = rct::zero(); - rct::key &Z2 = Z0; rct::key z3 = rct::zero(); rct::keyV z4(maxMN, rct::zero()), z5(maxMN, rct::zero()); - rct::key Y2 = rct::identity(), &Y3 = Y2, &Y4 = Y2; rct::key y0 = rct::zero(), y1 = rct::zero(); for (const Bulletproof *p: proofs) { @@ -773,7 +753,8 @@ bool bulletproof_VERIFY(const std::vector &proofs) for (logM = 0; (M = 1< &proofs) rct::key proof8_T1 = rct::scalarmult8(proof.T1); rct::key proof8_T2 = rct::scalarmult8(proof.T2); rct::key proof8_S = rct::scalarmult8(proof.S); + rct::key proof8_A = rct::scalarmult8(proof.A); PERF_TIMER_START_BP(VERIFY_line_61); // PAPER LINE 61 - sc_muladd(y0.bytes, proof.taux.bytes, weight.bytes, y0.bytes); + sc_muladd(y0.bytes, proof.taux.bytes, weight_y.bytes, y0.bytes); const rct::keyV zpow = vector_powers(z, M+3); @@ -814,26 +796,26 @@ bool bulletproof_VERIFY(const std::vector &proofs) PERF_TIMER_START_BP(VERIFY_line_61rl_new); sc_muladd(tmp.bytes, z.bytes, ip1y.bytes, k.bytes); - std::vector multiexp_data; - multiexp_data.reserve(proof.V.size()); sc_sub(tmp.bytes, proof.t.bytes, tmp.bytes); - sc_muladd(y1.bytes, tmp.bytes, weight.bytes, y1.bytes); + sc_muladd(y1.bytes, tmp.bytes, weight_y.bytes, y1.bytes); for (size_t j = 0; j < proof8_V.size(); j++) { - multiexp_data.emplace_back(zpow[j+2], proof8_V[j]); + sc_mul(tmp.bytes, zpow[j+2].bytes, weight_y.bytes); + multiexp_data.emplace_back(tmp, proof8_V[j]); } - rct::addKeys(Y2, Y2, rct::scalarmultKey(multiexp(multiexp_data, false), weight)); - sc_mul(tmp.bytes, x.bytes, weight.bytes); - rct::addKeys(Y3, Y3, rct::scalarmultKey(proof8_T1, tmp)); + sc_mul(tmp.bytes, x.bytes, weight_y.bytes); + multiexp_data.emplace_back(tmp, proof8_T1); rct::key xsq; sc_mul(xsq.bytes, x.bytes, x.bytes); - sc_mul(tmp.bytes, xsq.bytes, weight.bytes); - rct::addKeys(Y4, Y4, rct::scalarmultKey(proof8_T2, tmp)); + sc_mul(tmp.bytes, xsq.bytes, weight_y.bytes); + multiexp_data.emplace_back(tmp, proof8_T2); PERF_TIMER_STOP(VERIFY_line_61rl_new); PERF_TIMER_START_BP(VERIFY_line_62); // PAPER LINE 62 - rct::addKeys(Z0, Z0, rct::scalarmultKey(rct::addKeys(rct::scalarmult8(proof.A), rct::scalarmultKey(proof8_S, x)), weight)); + multiexp_data.emplace_back(weight_z, proof8_A); + sc_mul(tmp.bytes, x.bytes, weight_z.bytes); + multiexp_data.emplace_back(tmp, proof8_S); PERF_TIMER_STOP(VERIFY_line_62); // Compute the number of rounds for the inner product @@ -909,8 +891,8 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_mulsub(h_scalar.bytes, tmp.bytes, yinvpow.bytes, h_scalar.bytes); } - sc_muladd(z4[i].bytes, g_scalar.bytes, weight.bytes, z4[i].bytes); - sc_muladd(z5[i].bytes, h_scalar.bytes, weight.bytes, z5[i].bytes); + sc_muladd(z4[i].bytes, g_scalar.bytes, weight_z.bytes, z4[i].bytes); + sc_muladd(z5[i].bytes, h_scalar.bytes, weight_z.bytes, z5[i].bytes); if (i == 0) { @@ -928,55 +910,43 @@ bool bulletproof_VERIFY(const std::vector &proofs) // PAPER LINE 26 PERF_TIMER_START_BP(VERIFY_line_26_new); - multiexp_data.clear(); - multiexp_data.reserve(2*rounds); - - sc_muladd(z1.bytes, proof.mu.bytes, weight.bytes, z1.bytes); + sc_muladd(z1.bytes, proof.mu.bytes, weight_z.bytes, z1.bytes); for (size_t i = 0; i < rounds; ++i) { sc_mul(tmp.bytes, w[i].bytes, w[i].bytes); + sc_mul(tmp.bytes, tmp.bytes, weight_z.bytes); multiexp_data.emplace_back(tmp, proof8_L[i]); sc_mul(tmp.bytes, winv[i].bytes, winv[i].bytes); + sc_mul(tmp.bytes, tmp.bytes, weight_z.bytes); multiexp_data.emplace_back(tmp, proof8_R[i]); } - rct::key acc = multiexp(multiexp_data, false); - rct::addKeys(Z2, Z2, rct::scalarmultKey(acc, weight)); sc_mulsub(tmp.bytes, proof.a.bytes, proof.b.bytes, proof.t.bytes); sc_mul(tmp.bytes, tmp.bytes, x_ip.bytes); - sc_muladd(z3.bytes, tmp.bytes, weight.bytes, z3.bytes); + sc_muladd(z3.bytes, tmp.bytes, weight_z.bytes, z3.bytes); PERF_TIMER_STOP(VERIFY_line_26_new); } // now check all proofs at once PERF_TIMER_START_BP(VERIFY_step2_check); - ge_p3 check1; - ge_double_scalarmult_base_vartime_p3(&check1, y1.bytes, &ge_p3_H, y0.bytes); - sub_acc_p3(&check1, Y2); - if (!ge_p3_is_point_at_infinity(&check1)) - { - MERROR("Verification failure at step 1"); - return false; - } - ge_p3 check2; - sc_sub(tmp.bytes, rct::zero().bytes, z1.bytes); - ge_double_scalarmult_base_vartime_p3(&check2, z3.bytes, &ge_p3_H, tmp.bytes); - add_acc_p3(&check2, Z0); - - std::vector multiexp_data; - multiexp_data.reserve(2 * maxMN); + sc_sub(tmp.bytes, rct::zero().bytes, y0.bytes); + sc_sub(tmp.bytes, tmp.bytes, z1.bytes); + multiexp_data.emplace_back(tmp, rct::G); + sc_sub(tmp.bytes, z3.bytes, y1.bytes); + multiexp_data.emplace_back(tmp, rct::H); for (size_t i = 0; i < maxMN; ++i) { - multiexp_data.emplace_back(z4[i], Gi_p3[i]); - multiexp_data.emplace_back(z5[i], Hi_p3[i]); + sc_sub(tmp.bytes, rct::zero().bytes, z4[i].bytes); + multiexp_data.emplace_back(tmp, Gi_p3[i]); + sc_sub(tmp.bytes, rct::zero().bytes, z5[i].bytes); + multiexp_data.emplace_back(tmp, Hi_p3[i]); } - sub_acc_p3(&check2, multiexp(multiexp_data, true)); - PERF_TIMER_STOP(VERIFY_step2_check); - - if (!ge_p3_is_point_at_infinity(&check2)) + if (!(multiexp(multiexp_data, false) == rct::identity())) { - MERROR("Verification failure at step 2"); + PERF_TIMER_STOP(VERIFY_step2_check); + MERROR("Verification failure"); return false; } + PERF_TIMER_STOP(VERIFY_step2_check); PERF_TIMER_STOP(VERIFY); return true; From 8629a42cf6e4650b552925f7637761b8e7ee66e3 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Wed, 22 Aug 2018 22:30:14 +0000 Subject: [PATCH 10/16] bulletproofs: rework flow to use sarang's fast batch inversion code --- src/ringct/bulletproofs.cc | 231 ++++++++++++++++++++++++------------- 1 file changed, 154 insertions(+), 77 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 549e52296..d9961cb20 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -29,8 +29,6 @@ // Adapted from Java code by Sarang Noether #include -#include -#include #include #include "misc_log_ex.h" #include "common/perf_timer.h" @@ -289,37 +287,59 @@ static rct::keyV vector_dup(const rct::key &x, size_t N) return rct::keyV(N, x); } -static rct::key switch_endianness(rct::key k) +static rct::key sm(rct::key y, int n, const rct::key &x) { - std::reverse(k.bytes, k.bytes + sizeof(k)); - return k; + while (n--) + sc_mul(y.bytes, y.bytes, y.bytes); + sc_mul(y.bytes, y.bytes, x.bytes); + return y; } -/* Compute the inverse of a scalar, the stupid way */ +/* Compute the inverse of a scalar, the clever way */ static rct::key invert(const rct::key &x) { + rct::key _1, _10, _100, _11, _101, _111, _1001, _1011, _1111; + + _1 = x; + sc_mul(_10.bytes, _1.bytes, _1.bytes); + sc_mul(_100.bytes, _10.bytes, _10.bytes); + sc_mul(_11.bytes, _10.bytes, _1.bytes); + sc_mul(_101.bytes, _10.bytes, _11.bytes); + sc_mul(_111.bytes, _10.bytes, _101.bytes); + sc_mul(_1001.bytes, _10.bytes, _111.bytes); + sc_mul(_1011.bytes, _10.bytes, _1001.bytes); + sc_mul(_1111.bytes, _100.bytes, _1011.bytes); + rct::key inv; + sc_mul(inv.bytes, _1111.bytes, _1.bytes); - BN_CTX *ctx = BN_CTX_new(); - BIGNUM *X = BN_new(); - BIGNUM *L = BN_new(); - BIGNUM *I = BN_new(); - - BN_bin2bn(switch_endianness(x).bytes, sizeof(rct::key), X); - BN_bin2bn(switch_endianness(rct::curveOrder()).bytes, sizeof(rct::key), L); - - CHECK_AND_ASSERT_THROW_MES(BN_mod_inverse(I, X, L, ctx), "Failed to invert"); - - const int len = BN_num_bytes(I); - CHECK_AND_ASSERT_THROW_MES((size_t)len <= sizeof(rct::key), "Invalid number length"); - inv = rct::zero(); - BN_bn2bin(I, inv.bytes); - std::reverse(inv.bytes, inv.bytes + len); - - BN_free(I); - BN_free(L); - BN_free(X); - BN_CTX_free(ctx); + inv = sm(inv, 123 + 3, _101); + inv = sm(inv, 2 + 2, _11); + inv = sm(inv, 1 + 4, _1111); + inv = sm(inv, 1 + 4, _1111); + inv = sm(inv, 4, _1001); + inv = sm(inv, 2, _11); + inv = sm(inv, 1 + 4, _1111); + inv = sm(inv, 1 + 3, _101); + inv = sm(inv, 3 + 3, _101); + inv = sm(inv, 3, _111); + inv = sm(inv, 1 + 4, _1111); + inv = sm(inv, 2 + 3, _111); + inv = sm(inv, 2 + 2, _11); + inv = sm(inv, 1 + 4, _1011); + inv = sm(inv, 2 + 4, _1011); + inv = sm(inv, 6 + 4, _1001); + inv = sm(inv, 2 + 2, _11); + inv = sm(inv, 3 + 2, _11); + inv = sm(inv, 3 + 2, _11); + inv = sm(inv, 1 + 4, _1001); + inv = sm(inv, 1 + 3, _111); + inv = sm(inv, 2 + 4, _1111); + inv = sm(inv, 1 + 4, _1011); + inv = sm(inv, 3, _101); + inv = sm(inv, 2 + 4, _1111); + inv = sm(inv, 3, _101); + inv = sm(inv, 1 + 2, _11); #ifdef DEBUG_BP rct::key tmp; @@ -329,6 +349,34 @@ static rct::key invert(const rct::key &x) return inv; } +static rct::keyV invert(rct::keyV x) +{ + rct::keyV scratch; + scratch.reserve(x.size()); + + rct::key acc = rct::identity(); + for (size_t n = 0; n < x.size(); ++n) + { + scratch.push_back(acc); + if (n == 0) + acc = x[0]; + else + sc_mul(acc.bytes, acc.bytes, x[n].bytes); + } + + acc = invert(acc); + + rct::key tmp; + for (int i = x.size(); i-- > 0; ) + { + sc_mul(tmp.bytes, acc.bytes, x[i].bytes); + sc_mul(x[i].bytes, acc.bytes, scratch[i].bytes); + acc = tmp; + } + + return x; +} + /* Compute the slice of a vector */ static rct::keyV slice(const rct::keyV &a, size_t start, size_t stop) { @@ -702,6 +750,13 @@ Bulletproof bulletproof_PROVE(const std::vector &v, const rct::keyV &g return bulletproof_PROVE(sv, gamma); } +struct proof_data_t +{ + rct::key x, y, z, x_ip; + std::vector w; + size_t logM, inv_offset; +}; + /* Given a range proof, determine if it is valid */ bool bulletproof_VERIFY(const std::vector &proofs) { @@ -709,9 +764,17 @@ bool bulletproof_VERIFY(const std::vector &proofs) PERF_TIMER_START_BP(VERIFY); + const size_t logN = 6; + const size_t N = 1 << logN; + // sanity and figure out which proof is longest size_t max_length = 0; size_t nV = 0; + std::vector proof_data; + proof_data.reserve(proofs.size()); + size_t inv_offset = 0; + std::vector to_invert; + to_invert.reserve(11 * sizeof(proofs)); for (const Bulletproof *p: proofs) { const Bulletproof &proof = *p; @@ -729,46 +792,75 @@ bool bulletproof_VERIFY(const std::vector &proofs) max_length = std::max(max_length, proof.L.size()); nV += proof.V.size(); + + // Reconstruct the challenges + PERF_TIMER_START_BP(VERIFY_start); + proof_data.resize(proof_data.size() + 1); + proof_data_t &pd = proof_data.back(); + rct::key hash_cache = rct::hash_to_scalar(proof.V); + pd.y = hash_cache_mash(hash_cache, proof.A, proof.S); + CHECK_AND_ASSERT_MES(!(pd.y == rct::zero()), false, "y == 0"); + pd.z = hash_cache = rct::hash_to_scalar(pd.y); + CHECK_AND_ASSERT_MES(!(pd.z == rct::zero()), false, "z == 0"); + pd.x = hash_cache_mash(hash_cache, pd.z, proof.T1, proof.T2); + CHECK_AND_ASSERT_MES(!(pd.x == rct::zero()), false, "x == 0"); + pd.x_ip = hash_cache_mash(hash_cache, pd.x, proof.taux, proof.mu, proof.t); + CHECK_AND_ASSERT_MES(!(pd.x_ip == rct::zero()), false, "x_ip == 0"); + PERF_TIMER_STOP(VERIFY_start); + + size_t M; + for (pd.logM = 0; (M = 1< 0, false, "Zero rounds"); + + PERF_TIMER_START_BP(VERIFY_line_21_22); + // PAPER LINES 21-22 + // The inner product challenges are computed per round + pd.w.resize(rounds); + for (size_t i = 0; i < rounds; ++i) + { + pd.w[i] = hash_cache_mash(hash_cache, proof.L[i], proof.R[i]); + CHECK_AND_ASSERT_MES(!(pd.w[i] == rct::zero()), false, "w[i] == 0"); + } + PERF_TIMER_STOP(VERIFY_line_21_22); + + pd.inv_offset = inv_offset; + for (size_t i = 0; i < rounds; ++i) + to_invert.push_back(pd.w[i]); + to_invert.push_back(pd.y); + inv_offset += rounds + 1; } CHECK_AND_ASSERT_MES(max_length < 32, false, "At least one proof is too large"); size_t maxMN = 1u << max_length; - const size_t logN = 6; - const size_t N = 1 << logN; rct::key tmp; std::vector multiexp_data; multiexp_data.reserve(nV + (2 * (10/*logM*/ + logN) + 4) * proofs.size() + 2 * maxMN); + PERF_TIMER_START_BP(VERIFY_line_24_25_invert); + const std::vector inverses = invert(to_invert); + PERF_TIMER_STOP(VERIFY_line_24_25_invert); + // setup weighted aggregates rct::key z1 = rct::zero(); rct::key z3 = rct::zero(); rct::keyV z4(maxMN, rct::zero()), z5(maxMN, rct::zero()); rct::key y0 = rct::zero(), y1 = rct::zero(); + int proof_data_index = 0; for (const Bulletproof *p: proofs) { const Bulletproof &proof = *p; + const proof_data_t &pd = proof_data[proof_data_index++]; - size_t M, logM; - for (logM = 0; (M = 1< &proofs) // PAPER LINE 61 sc_muladd(y0.bytes, proof.taux.bytes, weight_y.bytes, y0.bytes); - const rct::keyV zpow = vector_powers(z, M+3); + const rct::keyV zpow = vector_powers(pd.z, M+3); rct::key k; - const rct::key ip1y = vector_power_sum(y, MN); + const rct::key ip1y = vector_power_sum(pd.y, MN); sc_mulsub(k.bytes, zpow[2].bytes, ip1y.bytes, rct::zero().bytes); for (size_t j = 1; j <= M; ++j) { @@ -795,7 +887,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) PERF_TIMER_STOP(VERIFY_line_61); PERF_TIMER_START_BP(VERIFY_line_61rl_new); - sc_muladd(tmp.bytes, z.bytes, ip1y.bytes, k.bytes); + sc_muladd(tmp.bytes, pd.z.bytes, ip1y.bytes, k.bytes); sc_sub(tmp.bytes, proof.t.bytes, tmp.bytes); sc_muladd(y1.bytes, tmp.bytes, weight_y.bytes, y1.bytes); for (size_t j = 0; j < proof8_V.size(); j++) @@ -803,10 +895,10 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_mul(tmp.bytes, zpow[j+2].bytes, weight_y.bytes); multiexp_data.emplace_back(tmp, proof8_V[j]); } - sc_mul(tmp.bytes, x.bytes, weight_y.bytes); + sc_mul(tmp.bytes, pd.x.bytes, weight_y.bytes); multiexp_data.emplace_back(tmp, proof8_T1); rct::key xsq; - sc_mul(xsq.bytes, x.bytes, x.bytes); + sc_mul(xsq.bytes, pd.x.bytes, pd.x.bytes); sc_mul(tmp.bytes, xsq.bytes, weight_y.bytes); multiexp_data.emplace_back(tmp, proof8_T2); PERF_TIMER_STOP(VERIFY_line_61rl_new); @@ -814,49 +906,34 @@ bool bulletproof_VERIFY(const std::vector &proofs) PERF_TIMER_START_BP(VERIFY_line_62); // PAPER LINE 62 multiexp_data.emplace_back(weight_z, proof8_A); - sc_mul(tmp.bytes, x.bytes, weight_z.bytes); + sc_mul(tmp.bytes, pd.x.bytes, weight_z.bytes); multiexp_data.emplace_back(tmp, proof8_S); PERF_TIMER_STOP(VERIFY_line_62); // Compute the number of rounds for the inner product - const size_t rounds = logM+logN; + const size_t rounds = pd.logM+logN; CHECK_AND_ASSERT_MES(rounds > 0, false, "Zero rounds"); - PERF_TIMER_START_BP(VERIFY_line_21_22); - // PAPER LINES 21-22 - // The inner product challenges are computed per round - rct::keyV w(rounds); - for (size_t i = 0; i < rounds; ++i) - { - w[i] = hash_cache_mash(hash_cache, proof.L[i], proof.R[i]); - CHECK_AND_ASSERT_MES(!(w[i] == rct::zero()), false, "w[i] == 0"); - } - PERF_TIMER_STOP(VERIFY_line_21_22); - PERF_TIMER_START_BP(VERIFY_line_24_25); // Basically PAPER LINES 24-25 // Compute the curvepoints from G[i] and H[i] rct::key yinvpow = rct::identity(); rct::key ypow = rct::identity(); - PERF_TIMER_START_BP(VERIFY_line_24_25_invert); - const rct::key yinv = invert(y); - rct::keyV winv(rounds); - for (size_t i = 0; i < rounds; ++i) - winv[i] = invert(w[i]); - PERF_TIMER_STOP(VERIFY_line_24_25_invert); + const rct::key *winv = &inverses[pd.inv_offset]; + const rct::key yinv = inverses[pd.inv_offset + rounds]; // precalc PERF_TIMER_START_BP(VERIFY_line_24_25_precalc); rct::keyV w_cache(1< 0; --s) { - sc_mul(w_cache[s].bytes, w_cache[s/2].bytes, w[j].bytes); + sc_mul(w_cache[s].bytes, w_cache[s/2].bytes, pd.w[j].bytes); sc_mul(w_cache[s-1].bytes, w_cache[s/2].bytes, winv[j].bytes); } } @@ -876,18 +953,18 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_mul(h_scalar.bytes, h_scalar.bytes, w_cache[(~i) & (MN-1)].bytes); // Adjust the scalars using the exponents from PAPER LINE 62 - sc_add(g_scalar.bytes, g_scalar.bytes, z.bytes); + sc_add(g_scalar.bytes, g_scalar.bytes, pd.z.bytes); CHECK_AND_ASSERT_MES(2+i/N < zpow.size(), false, "invalid zpow index"); CHECK_AND_ASSERT_MES(i%N < twoN.size(), false, "invalid twoN index"); sc_mul(tmp.bytes, zpow[2+i/N].bytes, twoN[i%N].bytes); if (i == 0) { - sc_add(tmp.bytes, tmp.bytes, z.bytes); + sc_add(tmp.bytes, tmp.bytes, pd.z.bytes); sc_sub(h_scalar.bytes, h_scalar.bytes, tmp.bytes); } else { - sc_muladd(tmp.bytes, z.bytes, ypow.bytes, tmp.bytes); + sc_muladd(tmp.bytes, pd.z.bytes, ypow.bytes, tmp.bytes); sc_mulsub(h_scalar.bytes, tmp.bytes, yinvpow.bytes, h_scalar.bytes); } @@ -897,12 +974,12 @@ bool bulletproof_VERIFY(const std::vector &proofs) if (i == 0) { yinvpow = yinv; - ypow = y; + ypow = pd.y; } else if (i != MN-1) { sc_mul(yinvpow.bytes, yinvpow.bytes, yinv.bytes); - sc_mul(ypow.bytes, ypow.bytes, y.bytes); + sc_mul(ypow.bytes, ypow.bytes, pd.y.bytes); } } @@ -913,7 +990,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_muladd(z1.bytes, proof.mu.bytes, weight_z.bytes, z1.bytes); for (size_t i = 0; i < rounds; ++i) { - sc_mul(tmp.bytes, w[i].bytes, w[i].bytes); + sc_mul(tmp.bytes, pd.w[i].bytes, pd.w[i].bytes); sc_mul(tmp.bytes, tmp.bytes, weight_z.bytes); multiexp_data.emplace_back(tmp, proof8_L[i]); sc_mul(tmp.bytes, winv[i].bytes, winv[i].bytes); @@ -921,7 +998,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) multiexp_data.emplace_back(tmp, proof8_R[i]); } sc_mulsub(tmp.bytes, proof.a.bytes, proof.b.bytes, proof.t.bytes); - sc_mul(tmp.bytes, tmp.bytes, x_ip.bytes); + sc_mul(tmp.bytes, tmp.bytes, pd.x_ip.bytes); sc_muladd(z3.bytes, tmp.bytes, weight_z.bytes, z3.bytes); PERF_TIMER_STOP(VERIFY_line_26_new); } From 10e5a9276953ece1f96d6801fe6d91d550c5dfae Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Fri, 24 Aug 2018 18:51:14 +0000 Subject: [PATCH 11/16] bulletproofs: maintain -z4, -z5, and -y0 to avoid subtractions --- src/ringct/bulletproofs.cc | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index d9961cb20..9e4d85534 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -847,8 +847,8 @@ bool bulletproof_VERIFY(const std::vector &proofs) // setup weighted aggregates rct::key z1 = rct::zero(); rct::key z3 = rct::zero(); - rct::keyV z4(maxMN, rct::zero()), z5(maxMN, rct::zero()); - rct::key y0 = rct::zero(), y1 = rct::zero(); + rct::keyV m_z4(maxMN, rct::zero()), m_z5(maxMN, rct::zero()); + rct::key m_y0 = rct::zero(), y1 = rct::zero(); int proof_data_index = 0; for (const Bulletproof *p: proofs) { @@ -872,7 +872,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) PERF_TIMER_START_BP(VERIFY_line_61); // PAPER LINE 61 - sc_muladd(y0.bytes, proof.taux.bytes, weight_y.bytes, y0.bytes); + sc_mulsub(m_y0.bytes, proof.taux.bytes, weight_y.bytes, m_y0.bytes); const rct::keyV zpow = vector_powers(pd.z, M+3); @@ -968,8 +968,8 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_mulsub(h_scalar.bytes, tmp.bytes, yinvpow.bytes, h_scalar.bytes); } - sc_muladd(z4[i].bytes, g_scalar.bytes, weight_z.bytes, z4[i].bytes); - sc_muladd(z5[i].bytes, h_scalar.bytes, weight_z.bytes, z5[i].bytes); + sc_mulsub(m_z4[i].bytes, g_scalar.bytes, weight_z.bytes, m_z4[i].bytes); + sc_mulsub(m_z5[i].bytes, h_scalar.bytes, weight_z.bytes, m_z5[i].bytes); if (i == 0) { @@ -1005,17 +1005,14 @@ bool bulletproof_VERIFY(const std::vector &proofs) // now check all proofs at once PERF_TIMER_START_BP(VERIFY_step2_check); - sc_sub(tmp.bytes, rct::zero().bytes, y0.bytes); - sc_sub(tmp.bytes, tmp.bytes, z1.bytes); + sc_sub(tmp.bytes, m_y0.bytes, z1.bytes); multiexp_data.emplace_back(tmp, rct::G); sc_sub(tmp.bytes, z3.bytes, y1.bytes); multiexp_data.emplace_back(tmp, rct::H); for (size_t i = 0; i < maxMN; ++i) { - sc_sub(tmp.bytes, rct::zero().bytes, z4[i].bytes); - multiexp_data.emplace_back(tmp, Gi_p3[i]); - sc_sub(tmp.bytes, rct::zero().bytes, z5[i].bytes); - multiexp_data.emplace_back(tmp, Hi_p3[i]); + multiexp_data.emplace_back(m_z4[i], Gi_p3[i]); + multiexp_data.emplace_back(m_z5[i], Hi_p3[i]); } if (!(multiexp(multiexp_data, false) == rct::identity())) { From 6f9ae5b6eb535fb748722ed1c81f981474422ead Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Fri, 24 Aug 2018 21:20:56 +0000 Subject: [PATCH 12/16] multiexp: handle pippenger multiexps with part precalc --- src/ringct/bulletproofs.cc | 21 +++++++++++---------- src/ringct/multiexp.cc | 21 ++++++++++++++------- src/ringct/multiexp.h | 4 ++-- tests/performance_tests/multiexp.h | 4 ++-- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 9e4d85534..a3af9264e 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -73,15 +73,15 @@ static const rct::keyV twoN = vector_powers(TWO, maxN); static const rct::key ip12 = inner_product(oneN, twoN); static boost::mutex init_mutex; -static inline rct::key multiexp(const std::vector &data, bool HiGi) +static inline rct::key multiexp(const std::vector &data, size_t HiGi_size) { - if (HiGi) + if (HiGi_size > 0) { static_assert(128 <= STRAUS_SIZE_LIMIT, "Straus in precalc mode can only be calculated till STRAUS_SIZE_LIMIT"); - return data.size() <= 128 ? straus(data, straus_HiGi_cache, 0) : pippenger(data, pippenger_HiGi_cache, get_pippenger_c(data.size())); + return HiGi_size <= 128 && data.size() == HiGi_size ? straus(data, straus_HiGi_cache, 0) : pippenger(data, pippenger_HiGi_cache, HiGi_size, get_pippenger_c(data.size())); } else - return data.size() <= 64 ? straus(data, NULL, 0) : pippenger(data, NULL, get_pippenger_c(data.size())); + return data.size() <= 64 ? straus(data, NULL, 0) : pippenger(data, NULL, 0, get_pippenger_c(data.size())); } static inline bool is_reduced(const rct::key &scalar) @@ -118,7 +118,7 @@ static void init_exponents() } straus_HiGi_cache = straus_init_cache(data, STRAUS_SIZE_LIMIT); - pippenger_HiGi_cache = pippenger_init_cache(data, PIPPENGER_SIZE_LIMIT); + pippenger_HiGi_cache = pippenger_init_cache(data, 0, PIPPENGER_SIZE_LIMIT); MINFO("Hi/Gi cache size: " << (sizeof(Hi)+sizeof(Gi))/1024 << " kB"); MINFO("Hi_p3/Gi_p3 cache size: " << (sizeof(Hi_p3)+sizeof(Gi_p3))/1024 << " kB"); @@ -142,7 +142,7 @@ static rct::key vector_exponent(const rct::keyV &a, const rct::keyV &b) multiexp_data.emplace_back(a[i], Gi_p3[i]); multiexp_data.emplace_back(b[i], Hi_p3[i]); } - return multiexp(multiexp_data, true); + return multiexp(multiexp_data, 2 * a.size()); } /* Compute a custom vector-scalar commitment */ @@ -169,7 +169,7 @@ static rct::key cross_vector_exponent8(size_t size, const std::vector &A, sc_mul(multiexp_data.back().scalar.bytes, extra_scalar->bytes, INV_EIGHT.bytes); multiexp_data.back().point = *extra_point; } - return multiexp(multiexp_data, false); + return multiexp(multiexp_data, 0); } /* Given a scalar, construct a vector of powers */ @@ -839,6 +839,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) std::vector multiexp_data; multiexp_data.reserve(nV + (2 * (10/*logM*/ + logN) + 4) * proofs.size() + 2 * maxMN); + multiexp_data.resize(2 * maxMN); PERF_TIMER_START_BP(VERIFY_line_24_25_invert); const std::vector inverses = invert(to_invert); @@ -1011,10 +1012,10 @@ bool bulletproof_VERIFY(const std::vector &proofs) multiexp_data.emplace_back(tmp, rct::H); for (size_t i = 0; i < maxMN; ++i) { - multiexp_data.emplace_back(m_z4[i], Gi_p3[i]); - multiexp_data.emplace_back(m_z5[i], Hi_p3[i]); + multiexp_data[i * 2] = {m_z4[i], Gi_p3[i]}; + multiexp_data[i * 2 + 1] = {m_z5[i], Hi_p3[i]}; } - if (!(multiexp(multiexp_data, false) == rct::identity())) + if (!(multiexp(multiexp_data, 2 * maxMN) == rct::identity())) { PERF_TIMER_STOP(VERIFY_step2_check); MERROR("Verification failure"); diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc index fb2f18551..178f92267 100644 --- a/src/ringct/multiexp.cc +++ b/src/ringct/multiexp.cc @@ -564,12 +564,13 @@ struct pippenger_cached_data ~pippenger_cached_data() { aligned_free(cached); } }; -std::shared_ptr pippenger_init_cache(const std::vector &data, size_t N) +std::shared_ptr pippenger_init_cache(const std::vector &data, size_t start_offset, size_t N) { MULTIEXP_PERF(PERF_TIMER_START_UNIT(pippenger_init_cache, 1000000)); + CHECK_AND_ASSERT_THROW_MES(start_offset <= data.size(), "Bad cache base data"); if (N == 0) - N = data.size(); - CHECK_AND_ASSERT_THROW_MES(N <= data.size(), "Bad cache base data"); + N = data.size() - start_offset; + CHECK_AND_ASSERT_THROW_MES(N <= data.size() - start_offset, "Bad cache base data"); ge_cached cached; std::shared_ptr cache(new pippenger_cached_data()); @@ -577,7 +578,7 @@ std::shared_ptr pippenger_init_cache(const std::vectorcached = (ge_cached*)aligned_realloc(cache->cached, N * sizeof(ge_cached), 4096); CHECK_AND_ASSERT_THROW_MES(cache->cached, "Out of memory"); for (size_t i = 0; i < N; ++i) - ge_p3_to_cached(&cache->cached[i], &data[i].point); + ge_p3_to_cached(&cache->cached[i], &data[i+start_offset].point); MULTIEXP_PERF(PERF_TIMER_STOP(pippenger_init_cache)); return cache; @@ -588,9 +589,11 @@ size_t pippenger_get_cache_size(const std::shared_ptr &ca return cache->size * sizeof(*cache->cached); } -rct::key pippenger(const std::vector &data, const std::shared_ptr &cache, size_t c) +rct::key pippenger(const std::vector &data, const std::shared_ptr &cache, size_t cache_size, size_t c) { - CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache->size >= data.size(), "Cache is too small"); + if (cache != NULL && cache_size == 0) + cache_size = cache->size; + CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache_size <= cache->size, "Cache is too small"); if (c == 0) c = get_pippenger_c(data.size()); CHECK_AND_ASSERT_THROW_MES(c <= 9, "c is too large"); @@ -598,6 +601,7 @@ rct::key pippenger(const std::vector &data, const std::shared_ptr< ge_p3 result = ge_p3_identity; std::unique_ptr buckets{new ge_p3[1< local_cache = cache == NULL ? pippenger_init_cache(data) : cache; + std::shared_ptr local_cache_2 = data.size() > cache_size ? pippenger_init_cache(data, cache_size) : NULL; rct::key maxscalar = rct::zero(); for (size_t i = 0; i < data.size(); ++i) @@ -641,7 +645,10 @@ rct::key pippenger(const std::vector &data, const std::shared_ptr< CHECK_AND_ASSERT_THROW_MES(bucket < (1u<cached[i]); + if (i < cache_size) + add(buckets[bucket], local_cache->cached[i]); + else + add(buckets[bucket], local_cache_2->cached[i - cache_size]); } else buckets[bucket] = data[i].point; diff --git a/src/ringct/multiexp.h b/src/ringct/multiexp.h index 559ab664a..b52707933 100644 --- a/src/ringct/multiexp.h +++ b/src/ringct/multiexp.h @@ -61,10 +61,10 @@ rct::key bos_coster_heap_conv_robust(std::vector data); std::shared_ptr straus_init_cache(const std::vector &data, size_t N =0); size_t straus_get_cache_size(const std::shared_ptr &cache); rct::key straus(const std::vector &data, const std::shared_ptr &cache = NULL, size_t STEP = 0); -std::shared_ptr pippenger_init_cache(const std::vector &data, size_t N =0); +std::shared_ptr pippenger_init_cache(const std::vector &data, size_t start_offset = 0, size_t N =0); size_t pippenger_get_cache_size(const std::shared_ptr &cache); size_t get_pippenger_c(size_t N); -rct::key pippenger(const std::vector &data, const std::shared_ptr &cache = NULL, size_t c = 0); +rct::key pippenger(const std::vector &data, const std::shared_ptr &cache = NULL, size_t cache_size = 0, size_t c = 0); } diff --git a/tests/performance_tests/multiexp.h b/tests/performance_tests/multiexp.h index b8b87b3a6..b6e86ebd4 100644 --- a/tests/performance_tests/multiexp.h +++ b/tests/performance_tests/multiexp.h @@ -78,9 +78,9 @@ public: case multiexp_straus_cached: return res == straus(data, straus_cache); case multiexp_pippenger: - return res == pippenger(data, NULL, c); + return res == pippenger(data, NULL, 0, c); case multiexp_pippenger_cached: - return res == pippenger(data, pippenger_cache, c); + return res == pippenger(data, pippenger_cache, 0, c); default: return false; } From 8b4767221c9b0ff3015229b167e5be1331a16c12 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Sat, 25 Aug 2018 18:37:21 +0000 Subject: [PATCH 13/16] bulletproofs: speedup prover --- src/ringct/bulletproofs.cc | 54 ++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index a3af9264e..f1b821978 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -146,13 +146,14 @@ static rct::key vector_exponent(const rct::keyV &a, const rct::keyV &b) } /* Compute a custom vector-scalar commitment */ -static rct::key cross_vector_exponent8(size_t size, const std::vector &A, size_t Ao, const std::vector &B, size_t Bo, const rct::keyV &a, size_t ao, const rct::keyV &b, size_t bo, const ge_p3 *extra_point, const rct::key *extra_scalar) +static rct::key cross_vector_exponent8(size_t size, const std::vector &A, size_t Ao, const std::vector &B, size_t Bo, const rct::keyV &a, size_t ao, const rct::keyV &b, size_t bo, const rct::keyV *scale, const ge_p3 *extra_point, const rct::key *extra_scalar) { CHECK_AND_ASSERT_THROW_MES(size + Ao <= A.size(), "Incompatible size for A"); CHECK_AND_ASSERT_THROW_MES(size + Bo <= B.size(), "Incompatible size for B"); CHECK_AND_ASSERT_THROW_MES(size + ao <= a.size(), "Incompatible size for a"); CHECK_AND_ASSERT_THROW_MES(size + bo <= b.size(), "Incompatible size for b"); CHECK_AND_ASSERT_THROW_MES(size <= maxN*maxM, "size is too large"); + CHECK_AND_ASSERT_THROW_MES(!scale || size == scale->size() / 2, "Incompatible size for scale"); CHECK_AND_ASSERT_THROW_MES(!!extra_point == !!extra_scalar, "only one of extra point/scalar present"); std::vector multiexp_data; @@ -162,6 +163,8 @@ static rct::key cross_vector_exponent8(size_t size, const std::vector &A, sc_mul(multiexp_data[i*2].scalar.bytes, a[ao+i].bytes, INV_EIGHT.bytes);; multiexp_data[i*2].point = A[Ao+i]; sc_mul(multiexp_data[i*2+1].scalar.bytes, b[bo+i].bytes, INV_EIGHT.bytes); + if (scale) + sc_mul(multiexp_data[i*2+1].scalar.bytes, multiexp_data[i*2+1].scalar.bytes, (*scale)[Bo+i].bytes); multiexp_data[i*2+1].point = B[Bo+i]; } if (extra_point) @@ -232,7 +235,7 @@ static rct::keyV hadamard(const rct::keyV &a, const rct::keyV &b) } /* folds a curvepoint array using a two way scaled Hadamard product */ -static void hadamard_fold(std::vector &v, const rct::key &a, const rct::key &b) +static void hadamard_fold(std::vector &v, const rct::keyV *scale, const rct::key &a, const rct::key &b) { CHECK_AND_ASSERT_THROW_MES((v.size() & 1) == 0, "Vector size should be even"); const size_t sz = v.size() / 2; @@ -241,7 +244,10 @@ static void hadamard_fold(std::vector &v, const rct::key &a, const rct::k ge_dsmp c[2]; ge_dsm_precomp(c[0], &v[n]); ge_dsm_precomp(c[1], &v[sz + n]); - ge_double_scalarmult_precomp_vartime2_p3(&v[n], a.bytes, c[0], b.bytes, c[1]); + rct::key sa, sb; + if (scale) sc_mul(sa.bytes, a.bytes, (*scale)[n].bytes); else sa = a; + if (scale) sc_mul(sb.bytes, b.bytes, (*scale)[sz + n].bytes); else sb = b; + ge_double_scalarmult_precomp_vartime2_p3(&v[n], sa.bytes, c[0], sb.bytes, c[1]); } v.resize(sz); } @@ -258,14 +264,24 @@ static rct::keyV vector_add(const rct::keyV &a, const rct::keyV &b) return res; } -/* Subtract two vectors */ -static rct::keyV vector_subtract(const rct::keyV &a, const rct::keyV &b) +/* Add a scalar to all elements of a vector */ +static rct::keyV vector_add(const rct::keyV &a, const rct::key &b) { - CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); rct::keyV res(a.size()); for (size_t i = 0; i < a.size(); ++i) { - sc_sub(res[i].bytes, a[i].bytes, b[i].bytes); + sc_add(res[i].bytes, a[i].bytes, b.bytes); + } + return res; +} + +/* Subtract a scalar from all elements of a vector */ +static rct::keyV vector_subtract(const rct::keyV &a, const rct::key &b) +{ + rct::keyV res(a.size()); + for (size_t i = 0; i < a.size(); ++i) + { + sc_sub(res[i].bytes, a[i].bytes, b.bytes); } return res; } @@ -549,8 +565,7 @@ try_again: } // Polynomial construction by coefficients - const auto zMN = vector_dup(z, MN); - rct::keyV l0 = vector_subtract(aL, zMN); + rct::keyV l0 = vector_subtract(aL, z); const rct::keyV &l1 = sL; // This computes the ugly sum/concatenation from PAPER LINE 65 @@ -570,7 +585,7 @@ try_again: } } - rct::keyV r0 = vector_add(aR, zMN); + rct::keyV r0 = vector_add(aR, z); const auto yMN = vector_powers(y, MN); r0 = hadamard(r0, yMN); r0 = vector_add(r0, zero_twos); @@ -658,12 +673,15 @@ try_again: rct::keyV aprime(MN); rct::keyV bprime(MN); const rct::key yinv = invert(y); - rct::key yinvpow = rct::identity(); + rct::keyV yinvpow(MN); + yinvpow[0] = rct::identity(); + yinvpow[1] = yinv; for (size_t i = 0; i < MN; ++i) { Gprime[i] = Gi_p3[i]; - ge_scalarmult_p3(&Hprime[i], yinvpow.bytes, &Hi_p3[i]); - sc_mul(yinvpow.bytes, yinvpow.bytes, yinv.bytes); + Hprime[i] = Hi_p3[i]; + if (i > 1) + sc_mul(yinvpow[i].bytes, yinvpow[i-1].bytes, yinv.bytes); aprime[i] = l[i]; bprime[i] = r[i]; } @@ -675,6 +693,7 @@ try_again: PERF_TIMER_START_BP(PROVE_step4); // PAPER LINE 13 + const rct::keyV *scale = &yinvpow; while (nprime > 1) { // PAPER LINE 15 @@ -689,9 +708,9 @@ try_again: // PAPER LINES 18-19 PERF_TIMER_START_BP(PROVE_LR); sc_mul(tmp.bytes, cL.bytes, x_ip.bytes); - L[round] = cross_vector_exponent8(nprime, Gprime, nprime, Hprime, 0, aprime, 0, bprime, nprime, &ge_p3_H, &tmp); + L[round] = cross_vector_exponent8(nprime, Gprime, nprime, Hprime, 0, aprime, 0, bprime, nprime, scale, &ge_p3_H, &tmp); sc_mul(tmp.bytes, cR.bytes, x_ip.bytes); - R[round] = cross_vector_exponent8(nprime, Gprime, 0, Hprime, nprime, aprime, nprime, bprime, 0, &ge_p3_H, &tmp); + R[round] = cross_vector_exponent8(nprime, Gprime, 0, Hprime, nprime, aprime, nprime, bprime, 0, scale, &ge_p3_H, &tmp); PERF_TIMER_STOP(PROVE_LR); // PAPER LINES 21-22 @@ -708,8 +727,8 @@ try_again: if (nprime > 1) { PERF_TIMER_START_BP(PROVE_hadamard2); - hadamard_fold(Gprime, winv, w[round]); - hadamard_fold(Hprime, w[round], winv); + hadamard_fold(Gprime, NULL, winv, w[round]); + hadamard_fold(Hprime, scale, w[round], winv); PERF_TIMER_STOP(PROVE_hadamard2); } @@ -719,6 +738,7 @@ try_again: bprime = vector_add(vector_scalar(slice(bprime, 0, nprime), winv), vector_scalar(slice(bprime, nprime, bprime.size()), w[round])); PERF_TIMER_STOP(PROVE_prime); + scale = NULL; ++round; } PERF_TIMER_STOP(PROVE_step4); From a110e6aa18928fb0bc13d0e39c854aea9e99e96d Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Mon, 27 Aug 2018 13:23:57 +0000 Subject: [PATCH 14/16] multiexp: tune which variants to use for which number of points --- src/ringct/bulletproofs.cc | 8 ++++---- src/ringct/multiexp.cc | 36 ++++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index f1b821978..4e1c940d1 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -48,7 +48,7 @@ extern "C" #define PERF_TIMER_START_BP(x) PERF_TIMER_START_UNIT(x, 1000000) -#define STRAUS_SIZE_LIMIT 128 +#define STRAUS_SIZE_LIMIT 232 #define PIPPENGER_SIZE_LIMIT 0 namespace rct @@ -77,11 +77,11 @@ static inline rct::key multiexp(const std::vector &data, size_t Hi { if (HiGi_size > 0) { - static_assert(128 <= STRAUS_SIZE_LIMIT, "Straus in precalc mode can only be calculated till STRAUS_SIZE_LIMIT"); - return HiGi_size <= 128 && data.size() == HiGi_size ? straus(data, straus_HiGi_cache, 0) : pippenger(data, pippenger_HiGi_cache, HiGi_size, get_pippenger_c(data.size())); + static_assert(232 <= STRAUS_SIZE_LIMIT, "Straus in precalc mode can only be calculated till STRAUS_SIZE_LIMIT"); + return HiGi_size <= 232 && data.size() == HiGi_size ? straus(data, straus_HiGi_cache, 0) : pippenger(data, pippenger_HiGi_cache, HiGi_size, get_pippenger_c(data.size())); } else - return data.size() <= 64 ? straus(data, NULL, 0) : pippenger(data, NULL, 0, get_pippenger_c(data.size())); + return data.size() <= 95 ? straus(data, NULL, 0) : pippenger(data, NULL, 0, get_pippenger_c(data.size())); } static inline bool is_reduced(const rct::key &scalar) diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc index 178f92267..85b1dfed4 100644 --- a/src/ringct/multiexp.cc +++ b/src/ringct/multiexp.cc @@ -79,6 +79,25 @@ extern "C" // Best/cached Straus Straus Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip // Best/uncached Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip Pip Pip +// New timings: +// Pippenger: +// 2/1 always +// 3/2 at ~13 +// 4/3 at ~29 +// 5/4 at ~83 +// 6/5 < 200 +// 7/6 at ~470 +// 8/7 at ~1180 +// 9/8 at ~2290 +// Cached Pippenger: +// 6/5 < 200 +// 7/6 at 460 +// 8/7 at 1180 +// 9/8 at 2300 +// +// Cached Straus/Pippenger cross at 232 +// + namespace rct { @@ -543,16 +562,13 @@ skipfirst: size_t get_pippenger_c(size_t N) { -// uncached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9 -// cached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9 - if (N <= 2) return 1; - if (N <= 8) return 2; - if (N <= 16) return 3; - if (N <= 64) return 4; - if (N <= 128) return 5; - if (N <= 256) return 6; - if (N <= 1024) return 7; - if (N <= 2048) return 8; + if (N <= 13) return 2; + if (N <= 29) return 3; + if (N <= 83) return 4; + if (N <= 185) return 5; + if (N <= 465) return 6; + if (N <= 1180) return 7; + if (N <= 2295) return 8; return 9; } From a6d2e246f37d5a91a33defb6f156b229f6274531 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Sat, 1 Sep 2018 17:46:34 +0000 Subject: [PATCH 15/16] bulletproofs: only enable profiling on request --- src/ringct/bulletproofs.cc | 62 +++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 4e1c940d1..bed48769a 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -46,7 +46,13 @@ extern "C" //#define DEBUG_BP +#if 1 #define PERF_TIMER_START_BP(x) PERF_TIMER_START_UNIT(x, 1000000) +#define PERF_TIMER_STOP_BP(x) PERF_TIMER_STOP(x) +#else +#define PERF_TIMER_START_BP(x) ((void*)0) +#define PERF_TIMER_STOP_BP(x) ((void*)0) +#endif #define STRAUS_SIZE_LIMIT 232 #define PIPPENGER_SIZE_LIMIT 0 @@ -486,7 +492,7 @@ Bulletproof bulletproof_PROVE(const rct::keyV &sv, const rct::keyV &gamma) sc_mul(sv8.bytes, sv[i].bytes, INV_EIGHT.bytes); rct::addKeys2(V[i], gamma8, sv8, rct::H); } - PERF_TIMER_STOP(PROVE_v); + PERF_TIMER_STOP_BP(PROVE_v); PERF_TIMER_START_BP(PROVE_aLaR); for (size_t j = 0; j < M; ++j) @@ -507,7 +513,7 @@ Bulletproof bulletproof_PROVE(const rct::keyV &sv, const rct::keyV &gamma) } } } - PERF_TIMER_STOP(PROVE_aLaR); + PERF_TIMER_STOP_BP(PROVE_aLaR); // DEBUG: Test to ensure this recovers the value #ifdef DEBUG_BP @@ -552,14 +558,14 @@ try_again: rct::key y = hash_cache_mash(hash_cache, A, S); if (y == rct::zero()) { - PERF_TIMER_STOP(PROVE_step1); + PERF_TIMER_STOP_BP(PROVE_step1); MINFO("y is 0, trying again"); goto try_again; } rct::key z = hash_cache = rct::hash_to_scalar(y); if (z == rct::zero()) { - PERF_TIMER_STOP(PROVE_step1); + PERF_TIMER_STOP_BP(PROVE_step1); MINFO("z is 0, trying again"); goto try_again; } @@ -598,7 +604,7 @@ try_again: sc_add(t1.bytes, t1_1.bytes, t1_2.bytes); rct::key t2 = inner_product(l1, r1); - PERF_TIMER_STOP(PROVE_step1); + PERF_TIMER_STOP_BP(PROVE_step1); PERF_TIMER_START_BP(PROVE_step2); // PAPER LINES 47-48 @@ -619,7 +625,7 @@ try_again: rct::key x = hash_cache_mash(hash_cache, z, T1, T2); if (x == rct::zero()) { - PERF_TIMER_STOP(PROVE_step2); + PERF_TIMER_STOP_BP(PROVE_step2); MINFO("x is 0, trying again"); goto try_again; } @@ -643,7 +649,7 @@ try_again: l = vector_add(l, vector_scalar(l1, x)); rct::keyV r = r0; r = vector_add(r, vector_scalar(r1, x)); - PERF_TIMER_STOP(PROVE_step2); + PERF_TIMER_STOP_BP(PROVE_step2); PERF_TIMER_START_BP(PROVE_step3); rct::key t = inner_product(l, r); @@ -661,7 +667,7 @@ try_again: rct::key x_ip = hash_cache_mash(hash_cache, x, taux, mu, t); if (x_ip == rct::zero()) { - PERF_TIMER_STOP(PROVE_step3); + PERF_TIMER_STOP_BP(PROVE_step3); MINFO("x_ip is 0, trying again"); goto try_again; } @@ -689,7 +695,7 @@ try_again: rct::keyV R(logMN); int round = 0; rct::keyV w(logMN); // this is the challenge x in the inner product protocol - PERF_TIMER_STOP(PROVE_step3); + PERF_TIMER_STOP_BP(PROVE_step3); PERF_TIMER_START_BP(PROVE_step4); // PAPER LINE 13 @@ -703,7 +709,7 @@ try_again: PERF_TIMER_START_BP(PROVE_inner_product); rct::key cL = inner_product(slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); rct::key cR = inner_product(slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); - PERF_TIMER_STOP(PROVE_inner_product); + PERF_TIMER_STOP_BP(PROVE_inner_product); // PAPER LINES 18-19 PERF_TIMER_START_BP(PROVE_LR); @@ -711,13 +717,13 @@ try_again: L[round] = cross_vector_exponent8(nprime, Gprime, nprime, Hprime, 0, aprime, 0, bprime, nprime, scale, &ge_p3_H, &tmp); sc_mul(tmp.bytes, cR.bytes, x_ip.bytes); R[round] = cross_vector_exponent8(nprime, Gprime, 0, Hprime, nprime, aprime, nprime, bprime, 0, scale, &ge_p3_H, &tmp); - PERF_TIMER_STOP(PROVE_LR); + PERF_TIMER_STOP_BP(PROVE_LR); // PAPER LINES 21-22 w[round] = hash_cache_mash(hash_cache, L[round], R[round]); if (w[round] == rct::zero()) { - PERF_TIMER_STOP(PROVE_step4); + PERF_TIMER_STOP_BP(PROVE_step4); MINFO("w[round] is 0, trying again"); goto try_again; } @@ -729,19 +735,19 @@ try_again: PERF_TIMER_START_BP(PROVE_hadamard2); hadamard_fold(Gprime, NULL, winv, w[round]); hadamard_fold(Hprime, scale, w[round], winv); - PERF_TIMER_STOP(PROVE_hadamard2); + PERF_TIMER_STOP_BP(PROVE_hadamard2); } // PAPER LINES 28-29 PERF_TIMER_START_BP(PROVE_prime); aprime = vector_add(vector_scalar(slice(aprime, 0, nprime), w[round]), vector_scalar(slice(aprime, nprime, aprime.size()), winv)); bprime = vector_add(vector_scalar(slice(bprime, 0, nprime), winv), vector_scalar(slice(bprime, nprime, bprime.size()), w[round])); - PERF_TIMER_STOP(PROVE_prime); + PERF_TIMER_STOP_BP(PROVE_prime); scale = NULL; ++round; } - PERF_TIMER_STOP(PROVE_step4); + PERF_TIMER_STOP_BP(PROVE_step4); // PAPER LINE 58 (with inclusions from PAPER LINE 8 and PAPER LINE 20) return Bulletproof(std::move(V), A, S, T1, T2, taux, mu, std::move(L), std::move(R), aprime[0], bprime[0], t); @@ -766,7 +772,7 @@ Bulletproof bulletproof_PROVE(const std::vector &v, const rct::keyV &g sv[i].bytes[6] = (v[i] >> 48) & 255; sv[i].bytes[7] = (v[i] >> 56) & 255; } - PERF_TIMER_STOP(PROVE_v); + PERF_TIMER_STOP_BP(PROVE_v); return bulletproof_PROVE(sv, gamma); } @@ -826,7 +832,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) CHECK_AND_ASSERT_MES(!(pd.x == rct::zero()), false, "x == 0"); pd.x_ip = hash_cache_mash(hash_cache, pd.x, proof.taux, proof.mu, proof.t); CHECK_AND_ASSERT_MES(!(pd.x_ip == rct::zero()), false, "x_ip == 0"); - PERF_TIMER_STOP(VERIFY_start); + PERF_TIMER_STOP_BP(VERIFY_start); size_t M; for (pd.logM = 0; (M = 1< &proofs) pd.w[i] = hash_cache_mash(hash_cache, proof.L[i], proof.R[i]); CHECK_AND_ASSERT_MES(!(pd.w[i] == rct::zero()), false, "w[i] == 0"); } - PERF_TIMER_STOP(VERIFY_line_21_22); + PERF_TIMER_STOP_BP(VERIFY_line_21_22); pd.inv_offset = inv_offset; for (size_t i = 0; i < rounds; ++i) @@ -863,7 +869,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) PERF_TIMER_START_BP(VERIFY_line_24_25_invert); const std::vector inverses = invert(to_invert); - PERF_TIMER_STOP(VERIFY_line_24_25_invert); + PERF_TIMER_STOP_BP(VERIFY_line_24_25_invert); // setup weighted aggregates rct::key z1 = rct::zero(); @@ -905,7 +911,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) CHECK_AND_ASSERT_MES(j+2 < zpow.size(), false, "invalid zpow index"); sc_mulsub(k.bytes, zpow[j+2].bytes, ip12.bytes, k.bytes); } - PERF_TIMER_STOP(VERIFY_line_61); + PERF_TIMER_STOP_BP(VERIFY_line_61); PERF_TIMER_START_BP(VERIFY_line_61rl_new); sc_muladd(tmp.bytes, pd.z.bytes, ip1y.bytes, k.bytes); @@ -922,14 +928,14 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_mul(xsq.bytes, pd.x.bytes, pd.x.bytes); sc_mul(tmp.bytes, xsq.bytes, weight_y.bytes); multiexp_data.emplace_back(tmp, proof8_T2); - PERF_TIMER_STOP(VERIFY_line_61rl_new); + PERF_TIMER_STOP_BP(VERIFY_line_61rl_new); PERF_TIMER_START_BP(VERIFY_line_62); // PAPER LINE 62 multiexp_data.emplace_back(weight_z, proof8_A); sc_mul(tmp.bytes, pd.x.bytes, weight_z.bytes); multiexp_data.emplace_back(tmp, proof8_S); - PERF_TIMER_STOP(VERIFY_line_62); + PERF_TIMER_STOP_BP(VERIFY_line_62); // Compute the number of rounds for the inner product const size_t rounds = pd.logM+logN; @@ -958,7 +964,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_mul(w_cache[s-1].bytes, w_cache[s/2].bytes, winv[j].bytes); } } - PERF_TIMER_STOP(VERIFY_line_24_25_precalc); + PERF_TIMER_STOP_BP(VERIFY_line_24_25_precalc); for (size_t i = 0; i < MN; ++i) { @@ -1004,7 +1010,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) } } - PERF_TIMER_STOP(VERIFY_line_24_25); + PERF_TIMER_STOP_BP(VERIFY_line_24_25); // PAPER LINE 26 PERF_TIMER_START_BP(VERIFY_line_26_new); @@ -1021,7 +1027,7 @@ bool bulletproof_VERIFY(const std::vector &proofs) sc_mulsub(tmp.bytes, proof.a.bytes, proof.b.bytes, proof.t.bytes); sc_mul(tmp.bytes, tmp.bytes, pd.x_ip.bytes); sc_muladd(z3.bytes, tmp.bytes, weight_z.bytes, z3.bytes); - PERF_TIMER_STOP(VERIFY_line_26_new); + PERF_TIMER_STOP_BP(VERIFY_line_26_new); } // now check all proofs at once @@ -1037,13 +1043,13 @@ bool bulletproof_VERIFY(const std::vector &proofs) } if (!(multiexp(multiexp_data, 2 * maxMN) == rct::identity())) { - PERF_TIMER_STOP(VERIFY_step2_check); + PERF_TIMER_STOP_BP(VERIFY_step2_check); MERROR("Verification failure"); return false; } - PERF_TIMER_STOP(VERIFY_step2_check); + PERF_TIMER_STOP_BP(VERIFY_step2_check); - PERF_TIMER_STOP(VERIFY); + PERF_TIMER_STOP_BP(VERIFY); return true; } From 74fb3d882c9a50798795367ca8cd5dac85138c78 Mon Sep 17 00:00:00 2001 From: moneromooo-monero Date: Mon, 22 Oct 2018 11:56:45 +0000 Subject: [PATCH 16/16] multiexp: some minor speedups --- src/ringct/multiexp.cc | 48 ++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc index 85b1dfed4..6f77fed34 100644 --- a/src/ringct/multiexp.cc +++ b/src/ringct/multiexp.cc @@ -473,10 +473,7 @@ rct::key straus(const std::vector &data, const std::shared_ptr &data, const std::shared_ptr> 4; } #elif 1 + unsigned char bytes33[33]; + memcpy(bytes33, data[j].scalar.bytes, 32); + bytes33[32] = 0; + bytes = bytes33; for (size_t i = 0; i < 256; ++i) digits[j*256+i] = ((bytes[i>>3] | (bytes[(i>>3)+1]<<8)) >> (i&7)) & mask; #else @@ -615,7 +616,9 @@ rct::key pippenger(const std::vector &data, const std::shared_ptr< CHECK_AND_ASSERT_THROW_MES(c <= 9, "c is too large"); ge_p3 result = ge_p3_identity; + bool result_init = false; std::unique_ptr buckets{new ge_p3[1< local_cache = cache == NULL ? pippenger_init_cache(data) : cache; std::shared_ptr local_cache_2 = data.size() > cache_size ? pippenger_init_cache(data, cache_size) : NULL; @@ -632,7 +635,7 @@ rct::key pippenger(const std::vector &data, const std::shared_ptr< for (size_t k = groups; k-- > 0; ) { - if (!ge_p3_is_point_at_infinity(&result)) + if (result_init) { ge_p2 p2; ge_p3_to_p2(&p2, &result); @@ -646,8 +649,7 @@ rct::key pippenger(const std::vector &data, const std::shared_ptr< ge_p1p1_to_p2(&p2, &p1); } } - for (size_t i = 0; i < (1u< &data, const std::shared_ptr< if (bucket == 0) continue; CHECK_AND_ASSERT_THROW_MES(bucket < (1u<cached[i]); @@ -667,17 +669,37 @@ rct::key pippenger(const std::vector &data, const std::shared_ptr< add(buckets[bucket], local_cache_2->cached[i - cache_size]); } else + { buckets[bucket] = data[i].point; + buckets_init[bucket] = true; + } } // sum the buckets - ge_p3 pail = ge_p3_identity; + ge_p3 pail; + bool pail_init = false; for (size_t i = (1< 0; --i) { - if (!ge_p3_is_point_at_infinity(&buckets[i])) - add(pail, buckets[i]); - if (!ge_p3_is_point_at_infinity(&pail)) - add(result, pail); + if (buckets_init[i]) + { + if (pail_init) + add(pail, buckets[i]); + else + { + pail = buckets[i]; + pail_init = true; + } + } + if (pail_init) + { + if (result_init) + add(result, pail); + else + { + result = pail; + result_init = true; + } + } } }