mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-30 15:43:32 +01:00
equix: Build cleanly with -Wall -Werror
Fixes some type nitpicks that show up in Tor development builds, which usually run with -Wall -Werror. Tested on x86_64 and aarch64 for clean build and passing equix-tests + hashx-tests. Signed-off-by: Micah Elizabeth Scott <beth@torproject.org>
This commit is contained in:
parent
246ced3a8c
commit
daa08557ad
@ -239,11 +239,6 @@ static FORCE_INLINE void blake2b_increment_counter(blake2b_state* S,
|
|||||||
S->t[1] += (S->t[0] < inc);
|
S->t[1] += (S->t[0] < inc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static FORCE_INLINE void blake2b_invalidate_state(blake2b_state* S) {
|
|
||||||
//clear_internal_memory(S, sizeof(*S)); /* wipe */
|
|
||||||
blake2b_set_lastblock(S); /* invalidate for further use */
|
|
||||||
}
|
|
||||||
|
|
||||||
static FORCE_INLINE void blake2b_init0(blake2b_state* S) {
|
static FORCE_INLINE void blake2b_init0(blake2b_state* S) {
|
||||||
memset(S, 0, sizeof(*S));
|
memset(S, 0, sizeof(*S));
|
||||||
memcpy(S->h, blake2b_IV, sizeof(S->h));
|
memcpy(S->h, blake2b_IV, sizeof(S->h));
|
||||||
|
@ -17,14 +17,14 @@ HASHX_PRIVATE void hashx_compile_a64(const hashx_program* program, uint8_t* code
|
|||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
#define HASHX_COMPILER 1
|
#define HASHX_COMPILER 1
|
||||||
#define HASHX_COMPILER_X86
|
#define HASHX_COMPILER_X86
|
||||||
#define hashx_compile hashx_compile_x86
|
#define hashx_compile(p,c) hashx_compile_x86(p,c)
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
#define HASHX_COMPILER 1
|
#define HASHX_COMPILER 1
|
||||||
#define HASHX_COMPILER_A64
|
#define HASHX_COMPILER_A64
|
||||||
#define hashx_compile hashx_compile_a64
|
#define hashx_compile(p,c) hashx_compile_a64(p,c)
|
||||||
#else
|
#else
|
||||||
#define HASHX_COMPILER 0
|
#define HASHX_COMPILER 0
|
||||||
#define hashx_compile
|
#define hashx_compile(p,c)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
HASHX_PRIVATE bool hashx_compiler_init(hashx_ctx* compiler);
|
HASHX_PRIVATE bool hashx_compiler_init(hashx_ctx* compiler);
|
||||||
|
@ -86,7 +86,7 @@ void hashx_compile_x86(const hashx_program* program, uint8_t* code) {
|
|||||||
uint8_t* pos = code;
|
uint8_t* pos = code;
|
||||||
uint8_t* target = NULL;
|
uint8_t* target = NULL;
|
||||||
EMIT(pos, x86_prologue);
|
EMIT(pos, x86_prologue);
|
||||||
for (int i = 0; i < program->code_size; ++i) {
|
for (size_t i = 0; i < program->code_size; ++i) {
|
||||||
const instruction* instr = &program->code[i];
|
const instruction* instr = &program->code[i];
|
||||||
switch (instr->opcode)
|
switch (instr->opcode)
|
||||||
{
|
{
|
||||||
|
@ -37,10 +37,12 @@ static inline bool is_mul(instr_type type) {
|
|||||||
return type <= INSTR_MUL_R;
|
return type <= INSTR_MUL_R;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HASHX_PROGRAM_STATS
|
||||||
/* If the instruction is a 64x64->128 bit multiplication. */
|
/* If the instruction is a 64x64->128 bit multiplication. */
|
||||||
static inline bool is_wide_mul(instr_type type) {
|
static inline bool is_wide_mul(instr_type type) {
|
||||||
return type < INSTR_MUL_R;
|
return type < INSTR_MUL_R;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Ivy Bridge integer execution ports: P0, P1, P5 */
|
/* Ivy Bridge integer execution ports: P0, P1, P5 */
|
||||||
typedef enum execution_port {
|
typedef enum execution_port {
|
||||||
@ -76,7 +78,7 @@ typedef struct instr_template {
|
|||||||
typedef struct register_info {
|
typedef struct register_info {
|
||||||
int latency; /* cycle when the register value will be ready */
|
int latency; /* cycle when the register value will be ready */
|
||||||
instr_type last_op; /* last op applied to the register */
|
instr_type last_op; /* last op applied to the register */
|
||||||
int last_op_par; /* parameter of the last op (-1 = constant) */
|
uint32_t last_op_par; /* parameter of the last op (~0 = constant) */
|
||||||
} register_info;
|
} register_info;
|
||||||
|
|
||||||
typedef struct program_item {
|
typedef struct program_item {
|
||||||
@ -97,7 +99,7 @@ typedef struct generator_ctx {
|
|||||||
execution_port ports[PORT_MAP_SIZE][NUM_PORTS];
|
execution_port ports[PORT_MAP_SIZE][NUM_PORTS];
|
||||||
} generator_ctx;
|
} generator_ctx;
|
||||||
|
|
||||||
const static instr_template tpl_umulh_r = {
|
static const instr_template tpl_umulh_r = {
|
||||||
.type = INSTR_UMULH_R,
|
.type = INSTR_UMULH_R,
|
||||||
.x86_asm = "mul r",
|
.x86_asm = "mul r",
|
||||||
.x86_size = 9, /* mov, mul, mov */
|
.x86_size = 9, /* mov, mul, mov */
|
||||||
@ -113,7 +115,7 @@ const static instr_template tpl_umulh_r = {
|
|||||||
.has_dst = true,
|
.has_dst = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_smulh_r = {
|
static const instr_template tpl_smulh_r = {
|
||||||
.type = INSTR_SMULH_R,
|
.type = INSTR_SMULH_R,
|
||||||
.x86_asm = "imul r",
|
.x86_asm = "imul r",
|
||||||
.x86_size = 9, /* mov, mul, mov */
|
.x86_size = 9, /* mov, mul, mov */
|
||||||
@ -129,7 +131,7 @@ const static instr_template tpl_smulh_r = {
|
|||||||
.has_dst = true,
|
.has_dst = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_mul_r = {
|
static const instr_template tpl_mul_r = {
|
||||||
.type = INSTR_MUL_R,
|
.type = INSTR_MUL_R,
|
||||||
.x86_asm = "imul r,r",
|
.x86_asm = "imul r,r",
|
||||||
.x86_size = 4,
|
.x86_size = 4,
|
||||||
@ -145,7 +147,7 @@ const static instr_template tpl_mul_r = {
|
|||||||
.has_dst = true,
|
.has_dst = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_sub_r = {
|
static const instr_template tpl_sub_r = {
|
||||||
.type = INSTR_SUB_R,
|
.type = INSTR_SUB_R,
|
||||||
.x86_asm = "sub r,r",
|
.x86_asm = "sub r,r",
|
||||||
.x86_size = 3,
|
.x86_size = 3,
|
||||||
@ -161,7 +163,7 @@ const static instr_template tpl_sub_r = {
|
|||||||
.has_dst = true,
|
.has_dst = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_xor_r = {
|
static const instr_template tpl_xor_r = {
|
||||||
.type = INSTR_XOR_R,
|
.type = INSTR_XOR_R,
|
||||||
.x86_asm = "xor r,r",
|
.x86_asm = "xor r,r",
|
||||||
.x86_size = 3,
|
.x86_size = 3,
|
||||||
@ -177,7 +179,7 @@ const static instr_template tpl_xor_r = {
|
|||||||
.has_dst = true,
|
.has_dst = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_add_rs = {
|
static const instr_template tpl_add_rs = {
|
||||||
.type = INSTR_ADD_RS,
|
.type = INSTR_ADD_RS,
|
||||||
.x86_asm = "lea r,r+r*s",
|
.x86_asm = "lea r,r+r*s",
|
||||||
.x86_size = 4,
|
.x86_size = 4,
|
||||||
@ -193,7 +195,7 @@ const static instr_template tpl_add_rs = {
|
|||||||
.has_dst = true,
|
.has_dst = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_ror_c = {
|
static const instr_template tpl_ror_c = {
|
||||||
.type = INSTR_ROR_C,
|
.type = INSTR_ROR_C,
|
||||||
.x86_asm = "ror r,i",
|
.x86_asm = "ror r,i",
|
||||||
.x86_size = 4,
|
.x86_size = 4,
|
||||||
@ -209,7 +211,7 @@ const static instr_template tpl_ror_c = {
|
|||||||
.has_dst = true,
|
.has_dst = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_add_c = {
|
static const instr_template tpl_add_c = {
|
||||||
.type = INSTR_ADD_C,
|
.type = INSTR_ADD_C,
|
||||||
.x86_asm = "add r,i",
|
.x86_asm = "add r,i",
|
||||||
.x86_size = 7,
|
.x86_size = 7,
|
||||||
@ -225,7 +227,7 @@ const static instr_template tpl_add_c = {
|
|||||||
.has_dst = true,
|
.has_dst = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_xor_c = {
|
static const instr_template tpl_xor_c = {
|
||||||
.type = INSTR_XOR_C,
|
.type = INSTR_XOR_C,
|
||||||
.x86_asm = "xor r,i",
|
.x86_asm = "xor r,i",
|
||||||
.x86_size = 7,
|
.x86_size = 7,
|
||||||
@ -242,7 +244,7 @@ const static instr_template tpl_xor_c = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
const static instr_template tpl_target = {
|
static const instr_template tpl_target = {
|
||||||
.type = INSTR_TARGET,
|
.type = INSTR_TARGET,
|
||||||
.x86_asm = "cmovz esi, edi",
|
.x86_asm = "cmovz esi, edi",
|
||||||
.x86_size = 5, /* test, cmovz */
|
.x86_size = 5, /* test, cmovz */
|
||||||
@ -258,7 +260,7 @@ const static instr_template tpl_target = {
|
|||||||
.has_dst = false,
|
.has_dst = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template tpl_branch = {
|
static const instr_template tpl_branch = {
|
||||||
.type = INSTR_BRANCH,
|
.type = INSTR_BRANCH,
|
||||||
.x86_asm = "jz target",
|
.x86_asm = "jz target",
|
||||||
.x86_size = 10, /* or, test, jz */
|
.x86_size = 10, /* or, test, jz */
|
||||||
@ -274,7 +276,7 @@ const static instr_template tpl_branch = {
|
|||||||
.has_dst = false,
|
.has_dst = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template* instr_lookup[] = {
|
static const instr_template* instr_lookup[] = {
|
||||||
&tpl_ror_c,
|
&tpl_ror_c,
|
||||||
&tpl_xor_c,
|
&tpl_xor_c,
|
||||||
&tpl_add_c,
|
&tpl_add_c,
|
||||||
@ -285,51 +287,51 @@ const static instr_template* instr_lookup[] = {
|
|||||||
&tpl_add_rs,
|
&tpl_add_rs,
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template* wide_mul_lookup[] = {
|
static const instr_template* wide_mul_lookup[] = {
|
||||||
&tpl_smulh_r,
|
&tpl_smulh_r,
|
||||||
&tpl_umulh_r
|
&tpl_umulh_r
|
||||||
};
|
};
|
||||||
|
|
||||||
const static instr_template* mul_lookup = &tpl_mul_r;
|
static const instr_template* mul_lookup = &tpl_mul_r;
|
||||||
const static instr_template* target_lookup = &tpl_target;
|
static const instr_template* target_lookup = &tpl_target;
|
||||||
const static instr_template* branch_lookup = &tpl_branch;
|
static const instr_template* branch_lookup = &tpl_branch;
|
||||||
|
|
||||||
const static program_item item_mul = {
|
static const program_item item_mul = {
|
||||||
.templates = &mul_lookup,
|
.templates = &mul_lookup,
|
||||||
.mask0 = 0,
|
.mask0 = 0,
|
||||||
.mask1 = 0,
|
.mask1 = 0,
|
||||||
.duplicates = true
|
.duplicates = true
|
||||||
};
|
};
|
||||||
|
|
||||||
const static program_item item_target = {
|
static const program_item item_target = {
|
||||||
.templates = &target_lookup,
|
.templates = &target_lookup,
|
||||||
.mask0 = 0,
|
.mask0 = 0,
|
||||||
.mask1 = 0,
|
.mask1 = 0,
|
||||||
.duplicates = true
|
.duplicates = true
|
||||||
};
|
};
|
||||||
|
|
||||||
const static program_item item_branch = {
|
static const program_item item_branch = {
|
||||||
.templates = &branch_lookup,
|
.templates = &branch_lookup,
|
||||||
.mask0 = 0,
|
.mask0 = 0,
|
||||||
.mask1 = 0,
|
.mask1 = 0,
|
||||||
.duplicates = true
|
.duplicates = true
|
||||||
};
|
};
|
||||||
|
|
||||||
const static program_item item_wide_mul = {
|
static const program_item item_wide_mul = {
|
||||||
.templates = wide_mul_lookup,
|
.templates = wide_mul_lookup,
|
||||||
.mask0 = 1,
|
.mask0 = 1,
|
||||||
.mask1 = 1,
|
.mask1 = 1,
|
||||||
.duplicates = true
|
.duplicates = true
|
||||||
};
|
};
|
||||||
|
|
||||||
const static program_item item_any = {
|
static const program_item item_any = {
|
||||||
.templates = instr_lookup,
|
.templates = instr_lookup,
|
||||||
.mask0 = 7,
|
.mask0 = 7,
|
||||||
.mask1 = 3, /* instructions that don't need a src register */
|
.mask1 = 3, /* instructions that don't need a src register */
|
||||||
.duplicates = false
|
.duplicates = false
|
||||||
};
|
};
|
||||||
|
|
||||||
const static program_item* program_layout[] = {
|
static const program_item* program_layout[] = {
|
||||||
&item_mul,
|
&item_mul,
|
||||||
&item_target,
|
&item_target,
|
||||||
&item_any,
|
&item_any,
|
||||||
@ -549,13 +551,13 @@ bool hashx_program_generate(const siphash_state* key, hashx_program* program) {
|
|||||||
.mul_count = 0,
|
.mul_count = 0,
|
||||||
.chain_mul = false,
|
.chain_mul = false,
|
||||||
.latency = 0,
|
.latency = 0,
|
||||||
.ports = { 0 }
|
.ports = {{ 0 }}
|
||||||
};
|
};
|
||||||
hashx_siphash_rng_init(&ctx.gen, key);
|
hashx_siphash_rng_init(&ctx.gen, key);
|
||||||
for (int i = 0; i < 8; ++i) {
|
for (int i = 0; i < 8; ++i) {
|
||||||
ctx.registers[i].last_op = -1;
|
ctx.registers[i].last_op = -1;
|
||||||
ctx.registers[i].latency = 0;
|
ctx.registers[i].latency = 0;
|
||||||
ctx.registers[i].last_op_par = -1;
|
ctx.registers[i].last_op_par = (uint32_t)-1;
|
||||||
}
|
}
|
||||||
program->code_size = 0;
|
program->code_size = 0;
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ static FORCE_INLINE uint64_t rotr64(uint64_t a, unsigned int b) {
|
|||||||
#ifndef HAVE_UMULH
|
#ifndef HAVE_UMULH
|
||||||
#define LO(x) ((x)&0xffffffff)
|
#define LO(x) ((x)&0xffffffff)
|
||||||
#define HI(x) ((x)>>32)
|
#define HI(x) ((x)>>32)
|
||||||
uint64_t umulh(uint64_t a, uint64_t b) {
|
static uint64_t umulh(uint64_t a, uint64_t b) {
|
||||||
uint64_t ah = HI(a), al = LO(a);
|
uint64_t ah = HI(a), al = LO(a);
|
||||||
uint64_t bh = HI(b), bl = LO(b);
|
uint64_t bh = HI(b), bl = LO(b);
|
||||||
uint64_t x00 = al * bl;
|
uint64_t x00 = al * bl;
|
||||||
@ -80,7 +80,7 @@ uint64_t umulh(uint64_t a, uint64_t b) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef HAVE_SMULH
|
#ifndef HAVE_SMULH
|
||||||
int64_t smulh(int64_t a, int64_t b) {
|
static int64_t smulh(int64_t a, int64_t b) {
|
||||||
int64_t hi = umulh(a, b);
|
int64_t hi = umulh(a, b);
|
||||||
if (a < 0LL) hi -= b;
|
if (a < 0LL) hi -= b;
|
||||||
if (b < 0LL) hi -= a;
|
if (b < 0LL) hi -= a;
|
||||||
@ -91,24 +91,28 @@ int64_t smulh(int64_t a, int64_t b) {
|
|||||||
|
|
||||||
static FORCE_INLINE uint64_t sign_extend_2s_compl(uint32_t x) {
|
static FORCE_INLINE uint64_t sign_extend_2s_compl(uint32_t x) {
|
||||||
return (-1 == ~0) ?
|
return (-1 == ~0) ?
|
||||||
(int64_t)(int32_t)(x) :
|
(uint64_t)(int64_t)(int32_t)(x) :
|
||||||
(x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
|
(x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void hashx_program_execute(const hashx_program* program, uint64_t r[8]) {
|
void hashx_program_execute(const hashx_program* program, uint64_t r[8]) {
|
||||||
int target = 0;
|
size_t target = 0;
|
||||||
bool branch_enable = true;
|
bool branch_enable = true;
|
||||||
uint32_t result = 0;
|
uint32_t result = 0;
|
||||||
|
#ifdef HASHX_PROGRAM_STATS
|
||||||
int branch_idx = 0;
|
int branch_idx = 0;
|
||||||
for (int i = 0; i < program->code_size; ++i) {
|
#endif
|
||||||
|
for (size_t i = 0; i < program->code_size; ++i) {
|
||||||
const instruction* instr = &program->code[i];
|
const instruction* instr = &program->code[i];
|
||||||
switch (instr->opcode)
|
switch (instr->opcode)
|
||||||
{
|
{
|
||||||
case INSTR_UMULH_R:
|
case INSTR_UMULH_R:
|
||||||
result = r[instr->dst] = umulh(r[instr->dst], r[instr->src]);
|
result = (uint32_t) (r[instr->dst] = umulh(r[instr->dst],
|
||||||
|
r[instr->src]));
|
||||||
break;
|
break;
|
||||||
case INSTR_SMULH_R:
|
case INSTR_SMULH_R:
|
||||||
result = r[instr->dst] = smulh(r[instr->dst], r[instr->src]);
|
result = (uint32_t) (r[instr->dst] = smulh(r[instr->dst],
|
||||||
|
r[instr->src]));
|
||||||
break;
|
break;
|
||||||
case INSTR_MUL_R:
|
case INSTR_MUL_R:
|
||||||
r[instr->dst] *= r[instr->src];
|
r[instr->dst] *= r[instr->src];
|
||||||
@ -143,7 +147,9 @@ void hashx_program_execute(const hashx_program* program, uint64_t r[8]) {
|
|||||||
((hashx_program*)program)->branches[branch_idx]++;
|
((hashx_program*)program)->branches[branch_idx]++;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#ifdef HASHX_PROGRAM_STATS
|
||||||
branch_idx++;
|
branch_idx++;
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE;
|
UNREACHABLE;
|
||||||
|
@ -27,5 +27,5 @@ uint32_t hashx_siphash_rng_u32(siphash_rng* gen) {
|
|||||||
gen->count32 = sizeof(gen->buffer32) / sizeof(uint32_t);
|
gen->count32 = sizeof(gen->buffer32) / sizeof(uint32_t);
|
||||||
}
|
}
|
||||||
gen->count32--;
|
gen->count32--;
|
||||||
return gen->buffer32 >> (gen->count32 * 32);
|
return (uint32_t)(gen->buffer32 >> (gen->count32 * 32));
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ static const uint64_t counter1 = 0;
|
|||||||
static const uint64_t counter2 = 123456;
|
static const uint64_t counter2 = 123456;
|
||||||
static const uint64_t counter3 = 987654321123456789;
|
static const uint64_t counter3 = 987654321123456789;
|
||||||
|
|
||||||
|
#ifdef HASHX_BLOCK_MODE
|
||||||
static const unsigned char long_input[] = {
|
static const unsigned char long_input[] = {
|
||||||
0x0b, 0x0b, 0x98, 0xbe, 0xa7, 0xe8, 0x05, 0xe0, 0x01, 0x0a, 0x21, 0x26,
|
0x0b, 0x0b, 0x98, 0xbe, 0xa7, 0xe8, 0x05, 0xe0, 0x01, 0x0a, 0x21, 0x26,
|
||||||
0xd2, 0x87, 0xa2, 0xa0, 0xcc, 0x83, 0x3d, 0x31, 0x2c, 0xb7, 0x86, 0x38,
|
0xd2, 0x87, 0xa2, 0xa0, 0xcc, 0x83, 0x3d, 0x31, 0x2c, 0xb7, 0x86, 0x38,
|
||||||
@ -31,6 +32,7 @@ static const unsigned char long_input[] = {
|
|||||||
0x4e, 0xca, 0x62, 0x92, 0x76, 0x81, 0x7b, 0x56, 0xf3, 0x2e, 0x9b, 0x68,
|
0x4e, 0xca, 0x62, 0x92, 0x76, 0x81, 0x7b, 0x56, 0xf3, 0x2e, 0x9b, 0x68,
|
||||||
0xbd, 0x82, 0xf4, 0x16
|
0xbd, 0x82, 0xf4, 0x16
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
#define RUN_TEST(x) run_test(#x, &x)
|
#define RUN_TEST(x) run_test(#x, &x)
|
||||||
|
|
||||||
|
@ -119,6 +119,7 @@ void* hashx_vm_alloc_huge(size_t bytes) {
|
|||||||
|
|
||||||
void hashx_vm_free(void* ptr, size_t bytes) {
|
void hashx_vm_free(void* ptr, size_t bytes) {
|
||||||
#ifdef HASHX_WIN
|
#ifdef HASHX_WIN
|
||||||
|
(void)bytes;
|
||||||
VirtualFree(ptr, 0, MEM_RELEASE);
|
VirtualFree(ptr, 0, MEM_RELEASE);
|
||||||
#else
|
#else
|
||||||
munmap(ptr, bytes);
|
munmap(ptr, bytes);
|
||||||
|
@ -13,12 +13,12 @@
|
|||||||
|
|
||||||
static bool verify_order(const equix_solution* solution) {
|
static bool verify_order(const equix_solution* solution) {
|
||||||
return
|
return
|
||||||
tree_cmp4(&solution->idx[0], &solution->idx[4]) &
|
tree_cmp4(&solution->idx[0], &solution->idx[4]) &&
|
||||||
tree_cmp2(&solution->idx[0], &solution->idx[2]) &
|
tree_cmp2(&solution->idx[0], &solution->idx[2]) &&
|
||||||
tree_cmp2(&solution->idx[4], &solution->idx[6]) &
|
tree_cmp2(&solution->idx[4], &solution->idx[6]) &&
|
||||||
tree_cmp1(&solution->idx[0], &solution->idx[1]) &
|
tree_cmp1(&solution->idx[0], &solution->idx[1]) &&
|
||||||
tree_cmp1(&solution->idx[2], &solution->idx[3]) &
|
tree_cmp1(&solution->idx[2], &solution->idx[3]) &&
|
||||||
tree_cmp1(&solution->idx[4], &solution->idx[5]) &
|
tree_cmp1(&solution->idx[4], &solution->idx[5]) &&
|
||||||
tree_cmp1(&solution->idx[6], &solution->idx[7]);
|
tree_cmp1(&solution->idx[6], &solution->idx[7]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,6 +136,7 @@ static void solve_stage1(solver_heap* heap) {
|
|||||||
CLEAR(heap->scratch_ht.counts);
|
CLEAR(heap->scratch_ht.counts);
|
||||||
u32 cpl_buck_size = STAGE1_SIZE(cpl_bucket);
|
u32 cpl_buck_size = STAGE1_SIZE(cpl_bucket);
|
||||||
for (u32 item_idx = 0; item_idx < cpl_buck_size; ++item_idx) {
|
for (u32 item_idx = 0; item_idx < cpl_buck_size; ++item_idx) {
|
||||||
|
{
|
||||||
stage1_data_item value = STAGE1_DATA(cpl_bucket, item_idx);
|
stage1_data_item value = STAGE1_DATA(cpl_bucket, item_idx);
|
||||||
u32 fine_buck_idx = value % NUM_FINE_BUCKETS;
|
u32 fine_buck_idx = value % NUM_FINE_BUCKETS;
|
||||||
u32 fine_item_idx = SCRATCH_SIZE(fine_buck_idx);
|
u32 fine_item_idx = SCRATCH_SIZE(fine_buck_idx);
|
||||||
@ -143,6 +144,7 @@ static void solve_stage1(solver_heap* heap) {
|
|||||||
continue;
|
continue;
|
||||||
SCRATCH_SIZE(fine_buck_idx) = fine_item_idx + 1;
|
SCRATCH_SIZE(fine_buck_idx) = fine_item_idx + 1;
|
||||||
SCRATCH(fine_buck_idx, fine_item_idx) = item_idx;
|
SCRATCH(fine_buck_idx, fine_item_idx) = item_idx;
|
||||||
|
}
|
||||||
if (cpl_bucket == bucket_idx) {
|
if (cpl_bucket == bucket_idx) {
|
||||||
MAKE_PAIRS1
|
MAKE_PAIRS1
|
||||||
}
|
}
|
||||||
@ -175,7 +177,7 @@ static void solve_stage1(solver_heap* heap) {
|
|||||||
STAGE3_IDX(s3_buck_id, s3_item_id) = \
|
STAGE3_IDX(s3_buck_id, s3_item_id) = \
|
||||||
MAKE_ITEM(bucket_idx, item_idx, cpl_index); \
|
MAKE_ITEM(bucket_idx, item_idx, cpl_index); \
|
||||||
STAGE3_DATA(s3_buck_id, s3_item_id) = \
|
STAGE3_DATA(s3_buck_id, s3_item_id) = \
|
||||||
sum / NUM_COARSE_BUCKETS; /* 22 bits */ \
|
(stage3_data_item)(sum / NUM_COARSE_BUCKETS); /* 22 bits */ \
|
||||||
} \
|
} \
|
||||||
|
|
||||||
static void solve_stage2(solver_heap* heap) {
|
static void solve_stage2(solver_heap* heap) {
|
||||||
@ -185,6 +187,7 @@ static void solve_stage2(solver_heap* heap) {
|
|||||||
CLEAR(heap->scratch_ht.counts);
|
CLEAR(heap->scratch_ht.counts);
|
||||||
u32 cpl_buck_size = STAGE2_SIZE(cpl_bucket);
|
u32 cpl_buck_size = STAGE2_SIZE(cpl_bucket);
|
||||||
for (u32 item_idx = 0; item_idx < cpl_buck_size; ++item_idx) {
|
for (u32 item_idx = 0; item_idx < cpl_buck_size; ++item_idx) {
|
||||||
|
{
|
||||||
stage2_data_item value = STAGE2_DATA(cpl_bucket, item_idx);
|
stage2_data_item value = STAGE2_DATA(cpl_bucket, item_idx);
|
||||||
u32 fine_buck_idx = value % NUM_FINE_BUCKETS;
|
u32 fine_buck_idx = value % NUM_FINE_BUCKETS;
|
||||||
u32 fine_item_idx = SCRATCH_SIZE(fine_buck_idx);
|
u32 fine_item_idx = SCRATCH_SIZE(fine_buck_idx);
|
||||||
@ -192,6 +195,7 @@ static void solve_stage2(solver_heap* heap) {
|
|||||||
continue;
|
continue;
|
||||||
SCRATCH_SIZE(fine_buck_idx) = fine_item_idx + 1;
|
SCRATCH_SIZE(fine_buck_idx) = fine_item_idx + 1;
|
||||||
SCRATCH(fine_buck_idx, fine_item_idx) = item_idx;
|
SCRATCH(fine_buck_idx, fine_item_idx) = item_idx;
|
||||||
|
}
|
||||||
if (cpl_bucket == bucket_idx) {
|
if (cpl_bucket == bucket_idx) {
|
||||||
MAKE_PAIRS2
|
MAKE_PAIRS2
|
||||||
}
|
}
|
||||||
@ -232,10 +236,10 @@ static int solve_stage3(solver_heap* heap, equix_solution output[EQUIX_MAX_SOLS]
|
|||||||
|
|
||||||
for (u32 bucket_idx = BUCK_START; bucket_idx < BUCK_END; ++bucket_idx) {
|
for (u32 bucket_idx = BUCK_START; bucket_idx < BUCK_END; ++bucket_idx) {
|
||||||
u32 cpl_bucket = -bucket_idx & (NUM_COARSE_BUCKETS - 1);
|
u32 cpl_bucket = -bucket_idx & (NUM_COARSE_BUCKETS - 1);
|
||||||
bool nodup = cpl_bucket == bucket_idx;
|
|
||||||
CLEAR(heap->scratch_ht.counts);
|
CLEAR(heap->scratch_ht.counts);
|
||||||
u32 cpl_buck_size = STAGE3_SIZE(cpl_bucket);
|
u32 cpl_buck_size = STAGE3_SIZE(cpl_bucket);
|
||||||
for (u32 item_idx = 0; item_idx < cpl_buck_size; ++item_idx) {
|
for (u32 item_idx = 0; item_idx < cpl_buck_size; ++item_idx) {
|
||||||
|
{
|
||||||
stage3_data_item value = STAGE3_DATA(cpl_bucket, item_idx);
|
stage3_data_item value = STAGE3_DATA(cpl_bucket, item_idx);
|
||||||
u32 fine_buck_idx = value % NUM_FINE_BUCKETS;
|
u32 fine_buck_idx = value % NUM_FINE_BUCKETS;
|
||||||
u32 fine_item_idx = SCRATCH_SIZE(fine_buck_idx);
|
u32 fine_item_idx = SCRATCH_SIZE(fine_buck_idx);
|
||||||
@ -243,6 +247,7 @@ static int solve_stage3(solver_heap* heap, equix_solution output[EQUIX_MAX_SOLS]
|
|||||||
continue;
|
continue;
|
||||||
SCRATCH_SIZE(fine_buck_idx) = fine_item_idx + 1;
|
SCRATCH_SIZE(fine_buck_idx) = fine_item_idx + 1;
|
||||||
SCRATCH(fine_buck_idx, fine_item_idx) = item_idx;
|
SCRATCH(fine_buck_idx, fine_item_idx) = item_idx;
|
||||||
|
}
|
||||||
if (cpl_bucket == bucket_idx) {
|
if (cpl_bucket == bucket_idx) {
|
||||||
MAKE_PAIRS3
|
MAKE_PAIRS3
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user