Rewrite the core of parse_short_policy() to be faster.

The old implementation did some funky out-of-order lexing, and
tended to parse every port twice if the %d-%d pattern didn't match.

Closes ticket 28853.
This commit is contained in:
Nick Mathewson 2018-12-14 16:07:10 -05:00
parent f8dac5c900
commit 3dd1f064a7
2 changed files with 45 additions and 38 deletions

3
changes/ticket28853 Normal file
View File

@ -0,0 +1,3 @@
o Minor features (performance):
- Replace parse_short_policy() with a faster implementation, to improve
microdescriptor parsing time. Closes ticket 28853.

View File

@ -2720,7 +2720,7 @@ parse_short_policy(const char *summary)
int is_accept;
int n_entries;
short_policy_entry_t entries[MAX_EXITPOLICY_SUMMARY_LEN]; /* overkill */
const char *next;
char *next;
if (!strcmpstart(summary, "accept ")) {
is_accept = 1;
@ -2735,57 +2735,56 @@ parse_short_policy(const char *summary)
n_entries = 0;
for ( ; *summary; summary = next) {
const char *comma = strchr(summary, ',');
unsigned low, high;
char dummy;
char ent_buf[32];
size_t len;
next = comma ? comma+1 : strchr(summary, '\0');
len = comma ? (size_t)(comma - summary) : strlen(summary);
if (n_entries == MAX_EXITPOLICY_SUMMARY_LEN) {
log_fn(LOG_PROTOCOL_WARN, LD_DIR, "Impossibly long policy summary %s",
escaped(orig_summary));
return NULL;
}
if (! TOR_ISDIGIT(*summary) || len > (sizeof(ent_buf)-1)) {
/* unrecognized entry format. skip it. */
continue;
}
if (len < 1) {
/* empty; skip it. */
/* XXX This happens to be unreachable, since if len==0, then *summary is
* ',' or '\0', and the TOR_ISDIGIT test above would have failed. */
continue;
}
memcpy(ent_buf, summary, len);
ent_buf[len] = '\0';
if (tor_sscanf(ent_buf, "%u-%u%c", &low, &high, &dummy) == 2) {
if (low<1 || low>65535 || high<1 || high>65535 || low>high) {
log_fn(LOG_PROTOCOL_WARN, LD_DIR,
"Found bad entry in policy summary %s", escaped(orig_summary));
return NULL;
}
} else if (tor_sscanf(ent_buf, "%u%c", &low, &dummy) == 1) {
if (low<1 || low>65535) {
log_fn(LOG_PROTOCOL_WARN, LD_DIR,
"Found bad entry in policy summary %s", escaped(orig_summary));
return NULL;
}
high = low;
unsigned low, high;
int ok;
low = (unsigned) tor_parse_ulong(summary, 10, 1, 65535, &ok, &next);
if (!ok) {
if (! TOR_ISDIGIT(*summary) || *summary == ',') {
/* Unrecognized format: skip it. */
goto skip_ent;
} else {
log_fn(LOG_PROTOCOL_WARN, LD_DIR,"Found bad entry in policy summary %s",
escaped(orig_summary));
return NULL;
goto bad_ent;
}
}
switch (*next) {
case ',':
++next;
/* fall through */
case '\0':
high = low;
break;
case '-':
high = (unsigned) tor_parse_ulong(next+1, 10, low, 65535, &ok, &next);
if (!ok)
goto bad_ent;
if (*next == ',')
++next;
else if (*next != '\0')
goto bad_ent;
break;
default:
goto bad_ent;
}
entries[n_entries].min_port = low;
entries[n_entries].max_port = high;
n_entries++;
continue;
skip_ent:
next = strchr(next, ',');
if (!next)
break;
++next;
}
if (n_entries == 0) {
@ -2806,6 +2805,11 @@ parse_short_policy(const char *summary)
result->n_entries = n_entries;
memcpy(result->entries, entries, sizeof(short_policy_entry_t)*n_entries);
return result;
bad_ent:
log_fn(LOG_PROTOCOL_WARN, LD_DIR,"Found bad entry in policy summary %s",
escaped(orig_summary));
return NULL;
}
/** Write <b>policy</b> back out into a string. */