diff --git a/changes/bug23318 b/changes/bug23318 new file mode 100644 index 0000000000..7fcb8d4487 --- /dev/null +++ b/changes/bug23318 @@ -0,0 +1,11 @@ + o Minor bugfixes (path selection): + - When selecting relays by bandwidth, avoid a rounding error that + could sometimes cause load to be imbalanced incorrectly. Previously, + we would always round upwards; now, we round towards the nearest + integer. This had the biggest effect when a relay's weight adjustments + should have given it weight 0, but it got weight 1 instead. + Fixes bug 23318; bugfix on 0.2.4.3-alpha. + - When calculating the fraction of nodes that have descriptors, and all + all nodes in the network have zero bandwidths, count the number of nodes + instead. + Fixes bug 23318; bugfix on 0.2.4.10-alpha. diff --git a/changes/bug23751 b/changes/bug23751 new file mode 100644 index 0000000000..2fd7021664 --- /dev/null +++ b/changes/bug23751 @@ -0,0 +1,6 @@ + o Minor bugfixes (scheduler, channel): + - Ignore channels that have been closed while flushing cells. This can + happen if the write on the connection fails leading to the channel being + closed while in the scheduler loop. This is not a complete fix, it is a + bandaid until we are able to refactor those interactions. Fixes bug + 23751; bugfix on 0.3.2.1-alpha. diff --git a/changes/bug24170 b/changes/bug24170 new file mode 100644 index 0000000000..d3d7347693 --- /dev/null +++ b/changes/bug24170 @@ -0,0 +1,3 @@ + o Minor bugfixes (path selection): + - Actually log the total bandwidth in compute_weighted_bandwidths(). + Fixes bug 24170; bugfix on 0.2.4.3-alpha. diff --git a/src/or/routerlist.c b/src/or/routerlist.c index c7c1092539..51ad551c0d 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -150,7 +150,8 @@ typedef struct cert_list_t cert_list_t; /* static function prototypes */ static int compute_weighted_bandwidths(const smartlist_t *sl, bandwidth_weight_rule_t rule, - double **bandwidths_out); + double **bandwidths_out, + double *total_bandwidth_out); static const routerstatus_t *router_pick_trusteddirserver_impl( const smartlist_t *sourcelist, dirinfo_type_t auth, int flags, int *n_busy_out); @@ -2506,7 +2507,7 @@ smartlist_choose_node_by_bandwidth_weights(const smartlist_t *sl, double *bandwidths_dbl=NULL; uint64_t *bandwidths_u64=NULL; - if (compute_weighted_bandwidths(sl, rule, &bandwidths_dbl) < 0) + if (compute_weighted_bandwidths(sl, rule, &bandwidths_dbl, NULL) < 0) return NULL; bandwidths_u64 = tor_calloc(smartlist_len(sl), sizeof(uint64_t)); @@ -2526,18 +2527,24 @@ smartlist_choose_node_by_bandwidth_weights(const smartlist_t *sl, * smartlist_choose_node_by_bandwidth_weights, compute weighted bandwidth * values for each node and store them in a freshly allocated * *bandwidths_out of the same length as sl, and holding results - * as doubles. Return 0 on success, -1 on failure. */ + * as doubles. If total_bandwidth_out is non-NULL, set it to the total + * of all the bandwidths. + * Return 0 on success, -1 on failure. */ static int compute_weighted_bandwidths(const smartlist_t *sl, bandwidth_weight_rule_t rule, - double **bandwidths_out) + double **bandwidths_out, + double *total_bandwidth_out) { int64_t weight_scale; double Wg = -1, Wm = -1, We = -1, Wd = -1; double Wgb = -1, Wmb = -1, Web = -1, Wdb = -1; - uint64_t weighted_bw = 0; guardfraction_bandwidth_t guardfraction_bw; - double *bandwidths; + double *bandwidths = NULL; + double total_bandwidth = 0.0; + + tor_assert(sl); + tor_assert(bandwidths_out); /* Can't choose exit and guard at same time */ tor_assert(rule == NO_WEIGHTING || @@ -2546,6 +2553,12 @@ compute_weighted_bandwidths(const smartlist_t *sl, rule == WEIGHT_FOR_MID || rule == WEIGHT_FOR_DIR); + *bandwidths_out = NULL; + + if (total_bandwidth_out) { + *total_bandwidth_out = 0.0; + } + if (smartlist_len(sl) == 0) { log_info(LD_CIRC, "Empty routerlist passed in to consensus weight node " @@ -2706,17 +2719,22 @@ compute_weighted_bandwidths(const smartlist_t *sl, final_weight = weight*this_bw; } - bandwidths[node_sl_idx] = final_weight + 0.5; + bandwidths[node_sl_idx] = final_weight; + total_bandwidth += final_weight; } SMARTLIST_FOREACH_END(node); log_debug(LD_CIRC, "Generated weighted bandwidths for rule %s based " "on weights " - "Wg=%f Wm=%f We=%f Wd=%f with total bw "U64_FORMAT, + "Wg=%f Wm=%f We=%f Wd=%f with total bw %f", bandwidth_weight_rule_to_string(rule), - Wg, Wm, We, Wd, U64_PRINTF_ARG(weighted_bw)); + Wg, Wm, We, Wd, total_bandwidth); *bandwidths_out = bandwidths; + if (total_bandwidth_out) { + *total_bandwidth_out = total_bandwidth; + } + return 0; } @@ -2733,7 +2751,8 @@ frac_nodes_with_descriptors(const smartlist_t *sl, if (smartlist_len(sl) == 0) return 0.0; - if (compute_weighted_bandwidths(sl, rule, &bandwidths) < 0) { + if (compute_weighted_bandwidths(sl, rule, &bandwidths, &total) < 0 || + total <= 0.0) { int n_with_descs = 0; SMARTLIST_FOREACH(sl, const node_t *, node, { if (node_has_descriptor(node)) @@ -2742,19 +2761,14 @@ frac_nodes_with_descriptors(const smartlist_t *sl, return ((double)n_with_descs) / (double)smartlist_len(sl); } - total = present = 0.0; + present = 0.0; SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) { - const double bw = bandwidths[node_sl_idx]; - total += bw; if (node_has_descriptor(node)) - present += bw; + present += bandwidths[node_sl_idx]; } SMARTLIST_FOREACH_END(node); tor_free(bandwidths); - if (total < 1.0) - return 0; - return present / total; } diff --git a/src/or/scheduler_kist.c b/src/or/scheduler_kist.c index a3b74e8cc9..d1726ba345 100644 --- a/src/or/scheduler_kist.c +++ b/src/or/scheduler_kist.c @@ -598,6 +598,15 @@ kist_scheduler_run(void) if (socket_can_write(&socket_table, chan)) { /* flush to channel queue/outbuf */ flush_result = (int)channel_flush_some_cells(chan, 1); // 1 for num cells + /* XXX: While flushing cells, it is possible that the connection write + * fails leading to the channel to be closed which triggers a release + * and free its entry in the socket table. And because of a engineering + * design issue, the error is not propagated back so we don't get an + * error at this poin. So before we continue, make sure the channel is + * open and if not just ignore it. See #23751. */ + if (!CHANNEL_IS_OPEN(chan)) { + continue; + } /* flush_result has the # cells flushed */ if (flush_result > 0) { update_socket_written(&socket_table, chan, flush_result *