diff --git a/changes/feature27367 b/changes/feature27367 new file mode 100644 index 0000000000..99c0839621 --- /dev/null +++ b/changes/feature27367 @@ -0,0 +1,4 @@ + o Minor features (parsing): + - Directory authorities now validate that router descriptors and ExtraInfo + documents are in a valid subset of UTF-8, and reject them if not. + Closes ticket 27367. diff --git a/src/feature/dirauth/process_descs.c b/src/feature/dirauth/process_descs.c index c379f25bdd..dca87b3eaf 100644 --- a/src/feature/dirauth/process_descs.c +++ b/src/feature/dirauth/process_descs.c @@ -519,7 +519,8 @@ WRA_MORE_SEVERE(was_router_added_t a, was_router_added_t b) /** As for dirserv_add_descriptor(), but accepts multiple documents, and * returns the most severe error that occurred for any one of them. */ was_router_added_t -dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, +dirserv_add_multiple_descriptors(const char *desc, size_t desclen, + uint8_t purpose, const char *source, const char **msg) { @@ -536,6 +537,11 @@ dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, r=ROUTER_ADDED_SUCCESSFULLY; /*Least severe return value. */ + if (!string_is_utf8_no_bom(desc, desclen)) { + *msg = "descriptor(s) or extrainfo(s) not valid UTF-8 or had BOM."; + return ROUTER_AUTHDIR_REJECTS; + } + format_iso_time(time_buf, now); if (tor_snprintf(annotation_buf, sizeof(annotation_buf), "@uploaded-at %s\n" @@ -552,7 +558,7 @@ dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, s = desc; list = smartlist_new(); - if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 0, 0, + if (!router_parse_list_from_string(&s, s+desclen, list, SAVED_NOWHERE, 0, 0, annotation_buf, NULL)) { SMARTLIST_FOREACH(list, routerinfo_t *, ri, { msg_out = NULL; @@ -568,7 +574,7 @@ dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, smartlist_clear(list); s = desc; - if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 1, 0, + if (!router_parse_list_from_string(&s, s+desclen, list, SAVED_NOWHERE, 1, 0, NULL, NULL)) { SMARTLIST_FOREACH(list, extrainfo_t *, ei, { msg_out = NULL; diff --git a/src/feature/dirauth/process_descs.h b/src/feature/dirauth/process_descs.h index ad9d5c3d4c..5a0914acd8 100644 --- a/src/feature/dirauth/process_descs.h +++ b/src/feature/dirauth/process_descs.h @@ -17,7 +17,8 @@ void dirserv_free_fingerprint_list(void); int dirserv_add_own_fingerprint(crypto_pk_t *pk); enum was_router_added_t dirserv_add_multiple_descriptors( - const char *desc, uint8_t purpose, + const char *desc, size_t desclen, + uint8_t purpose, const char *source, const char **msg); enum was_router_added_t dirserv_add_descriptor(routerinfo_t *ri, diff --git a/src/feature/dircache/dircache.c b/src/feature/dircache/dircache.c index dff4b85caa..4032223db4 100644 --- a/src/feature/dircache/dircache.c +++ b/src/feature/dircache/dircache.c @@ -1602,8 +1602,8 @@ directory_handle_command_post,(dir_connection_t *conn, const char *headers, const char *msg = "[None]"; uint8_t purpose = authdir_mode_bridge(options) ? ROUTER_PURPOSE_BRIDGE : ROUTER_PURPOSE_GENERAL; - was_router_added_t r = dirserv_add_multiple_descriptors(body, purpose, - conn->base_.address, &msg); + was_router_added_t r = dirserv_add_multiple_descriptors(body, body_len, + purpose, conn->base_.address, &msg); tor_assert(msg); if (r == ROUTER_ADDED_SUCCESSFULLY) { diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c index b2b85d151d..e76e73046f 100644 --- a/src/lib/string/util_string.c +++ b/src/lib/string/util_string.c @@ -541,3 +541,16 @@ string_is_utf8(const char *str, size_t len) } return true; } + +/** As string_is_utf8(), but returns false if the string begins with a UTF-8 + * byte order mark (BOM). + */ +int +string_is_utf8_no_bom(const char *str, size_t len) +{ + if (len >= 3 && (!strcmpstart(str, "\uFEFF") || + !strcmpstart(str, "\uFFFE"))) { + return false; + } + return string_is_utf8(str, len); +} diff --git a/src/lib/string/util_string.h b/src/lib/string/util_string.h index 746ece0d33..99467a27c3 100644 --- a/src/lib/string/util_string.h +++ b/src/lib/string/util_string.h @@ -53,5 +53,6 @@ const char *find_str_at_start_of_line(const char *haystack, int string_is_C_identifier(const char *string); int string_is_utf8(const char *str, size_t len); +int string_is_utf8_no_bom(const char *str, size_t len); #endif /* !defined(TOR_UTIL_STRING_H) */ diff --git a/src/test/test_util.c b/src/test/test_util.c index 1a71da2794..bcface64fd 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -4024,6 +4024,13 @@ test_util_string_is_utf8(void *ptr) tt_int_op(1, OP_EQ, string_is_utf8("ascii\x7f\n", 7)); tt_int_op(1, OP_EQ, string_is_utf8("Risqu\u00e9=1", 9)); + /* Test the utf8_no_bom function */ + tt_int_op(0, OP_EQ, string_is_utf8_no_bom("\uFEFF", 3)); + tt_int_op(0, OP_EQ, string_is_utf8_no_bom("\uFFFE", 3)); + tt_int_op(0, OP_EQ, string_is_utf8_no_bom("\uFEFFlove", 7)); + tt_int_op(1, OP_EQ, string_is_utf8_no_bom("loveandrespect", + strlen("loveandrespect"))); + // Validate exactly 'len' bytes. tt_int_op(0, OP_EQ, string_is_utf8("\0\x80", 2)); tt_int_op(0, OP_EQ, string_is_utf8("Risqu\u00e9=1", 6));