diff --git a/changes/bug23318 b/changes/bug23318
new file mode 100644
index 0000000000..7fcb8d4487
--- /dev/null
+++ b/changes/bug23318
@@ -0,0 +1,11 @@
+ o Minor bugfixes (path selection):
+ - When selecting relays by bandwidth, avoid a rounding error that
+ could sometimes cause load to be imbalanced incorrectly. Previously,
+ we would always round upwards; now, we round towards the nearest
+ integer. This had the biggest effect when a relay's weight adjustments
+ should have given it weight 0, but it got weight 1 instead.
+ Fixes bug 23318; bugfix on 0.2.4.3-alpha.
+ - When calculating the fraction of nodes that have descriptors, and all
+ all nodes in the network have zero bandwidths, count the number of nodes
+ instead.
+ Fixes bug 23318; bugfix on 0.2.4.10-alpha.
diff --git a/changes/bug23751 b/changes/bug23751
new file mode 100644
index 0000000000..2fd7021664
--- /dev/null
+++ b/changes/bug23751
@@ -0,0 +1,6 @@
+ o Minor bugfixes (scheduler, channel):
+ - Ignore channels that have been closed while flushing cells. This can
+ happen if the write on the connection fails leading to the channel being
+ closed while in the scheduler loop. This is not a complete fix, it is a
+ bandaid until we are able to refactor those interactions. Fixes bug
+ 23751; bugfix on 0.3.2.1-alpha.
diff --git a/changes/bug24170 b/changes/bug24170
new file mode 100644
index 0000000000..d3d7347693
--- /dev/null
+++ b/changes/bug24170
@@ -0,0 +1,3 @@
+ o Minor bugfixes (path selection):
+ - Actually log the total bandwidth in compute_weighted_bandwidths().
+ Fixes bug 24170; bugfix on 0.2.4.3-alpha.
diff --git a/src/or/routerlist.c b/src/or/routerlist.c
index c7c1092539..51ad551c0d 100644
--- a/src/or/routerlist.c
+++ b/src/or/routerlist.c
@@ -150,7 +150,8 @@ typedef struct cert_list_t cert_list_t;
/* static function prototypes */
static int compute_weighted_bandwidths(const smartlist_t *sl,
bandwidth_weight_rule_t rule,
- double **bandwidths_out);
+ double **bandwidths_out,
+ double *total_bandwidth_out);
static const routerstatus_t *router_pick_trusteddirserver_impl(
const smartlist_t *sourcelist, dirinfo_type_t auth,
int flags, int *n_busy_out);
@@ -2506,7 +2507,7 @@ smartlist_choose_node_by_bandwidth_weights(const smartlist_t *sl,
double *bandwidths_dbl=NULL;
uint64_t *bandwidths_u64=NULL;
- if (compute_weighted_bandwidths(sl, rule, &bandwidths_dbl) < 0)
+ if (compute_weighted_bandwidths(sl, rule, &bandwidths_dbl, NULL) < 0)
return NULL;
bandwidths_u64 = tor_calloc(smartlist_len(sl), sizeof(uint64_t));
@@ -2526,18 +2527,24 @@ smartlist_choose_node_by_bandwidth_weights(const smartlist_t *sl,
* smartlist_choose_node_by_bandwidth_weights, compute weighted bandwidth
* values for each node and store them in a freshly allocated
* *bandwidths_out of the same length as sl, and holding results
- * as doubles. Return 0 on success, -1 on failure. */
+ * as doubles. If total_bandwidth_out is non-NULL, set it to the total
+ * of all the bandwidths.
+ * Return 0 on success, -1 on failure. */
static int
compute_weighted_bandwidths(const smartlist_t *sl,
bandwidth_weight_rule_t rule,
- double **bandwidths_out)
+ double **bandwidths_out,
+ double *total_bandwidth_out)
{
int64_t weight_scale;
double Wg = -1, Wm = -1, We = -1, Wd = -1;
double Wgb = -1, Wmb = -1, Web = -1, Wdb = -1;
- uint64_t weighted_bw = 0;
guardfraction_bandwidth_t guardfraction_bw;
- double *bandwidths;
+ double *bandwidths = NULL;
+ double total_bandwidth = 0.0;
+
+ tor_assert(sl);
+ tor_assert(bandwidths_out);
/* Can't choose exit and guard at same time */
tor_assert(rule == NO_WEIGHTING ||
@@ -2546,6 +2553,12 @@ compute_weighted_bandwidths(const smartlist_t *sl,
rule == WEIGHT_FOR_MID ||
rule == WEIGHT_FOR_DIR);
+ *bandwidths_out = NULL;
+
+ if (total_bandwidth_out) {
+ *total_bandwidth_out = 0.0;
+ }
+
if (smartlist_len(sl) == 0) {
log_info(LD_CIRC,
"Empty routerlist passed in to consensus weight node "
@@ -2706,17 +2719,22 @@ compute_weighted_bandwidths(const smartlist_t *sl,
final_weight = weight*this_bw;
}
- bandwidths[node_sl_idx] = final_weight + 0.5;
+ bandwidths[node_sl_idx] = final_weight;
+ total_bandwidth += final_weight;
} SMARTLIST_FOREACH_END(node);
log_debug(LD_CIRC, "Generated weighted bandwidths for rule %s based "
"on weights "
- "Wg=%f Wm=%f We=%f Wd=%f with total bw "U64_FORMAT,
+ "Wg=%f Wm=%f We=%f Wd=%f with total bw %f",
bandwidth_weight_rule_to_string(rule),
- Wg, Wm, We, Wd, U64_PRINTF_ARG(weighted_bw));
+ Wg, Wm, We, Wd, total_bandwidth);
*bandwidths_out = bandwidths;
+ if (total_bandwidth_out) {
+ *total_bandwidth_out = total_bandwidth;
+ }
+
return 0;
}
@@ -2733,7 +2751,8 @@ frac_nodes_with_descriptors(const smartlist_t *sl,
if (smartlist_len(sl) == 0)
return 0.0;
- if (compute_weighted_bandwidths(sl, rule, &bandwidths) < 0) {
+ if (compute_weighted_bandwidths(sl, rule, &bandwidths, &total) < 0 ||
+ total <= 0.0) {
int n_with_descs = 0;
SMARTLIST_FOREACH(sl, const node_t *, node, {
if (node_has_descriptor(node))
@@ -2742,19 +2761,14 @@ frac_nodes_with_descriptors(const smartlist_t *sl,
return ((double)n_with_descs) / (double)smartlist_len(sl);
}
- total = present = 0.0;
+ present = 0.0;
SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) {
- const double bw = bandwidths[node_sl_idx];
- total += bw;
if (node_has_descriptor(node))
- present += bw;
+ present += bandwidths[node_sl_idx];
} SMARTLIST_FOREACH_END(node);
tor_free(bandwidths);
- if (total < 1.0)
- return 0;
-
return present / total;
}
diff --git a/src/or/scheduler_kist.c b/src/or/scheduler_kist.c
index a3b74e8cc9..d1726ba345 100644
--- a/src/or/scheduler_kist.c
+++ b/src/or/scheduler_kist.c
@@ -598,6 +598,15 @@ kist_scheduler_run(void)
if (socket_can_write(&socket_table, chan)) {
/* flush to channel queue/outbuf */
flush_result = (int)channel_flush_some_cells(chan, 1); // 1 for num cells
+ /* XXX: While flushing cells, it is possible that the connection write
+ * fails leading to the channel to be closed which triggers a release
+ * and free its entry in the socket table. And because of a engineering
+ * design issue, the error is not propagated back so we don't get an
+ * error at this poin. So before we continue, make sure the channel is
+ * open and if not just ignore it. See #23751. */
+ if (!CHANNEL_IS_OPEN(chan)) {
+ continue;
+ }
/* flush_result has the # cells flushed */
if (flush_result > 0) {
update_socket_written(&socket_table, chan, flush_result *