From 3704c4a01219cca7273fa5fa0bc6751d98ad05ac Mon Sep 17 00:00:00 2001 From: cypherpunks Date: Wed, 29 Aug 2018 13:32:52 +0000 Subject: [PATCH] string: add BOM helper --- src/lib/string/util_string.c | 13 +++++++++++++ src/lib/string/util_string.h | 1 + 2 files changed, 14 insertions(+) diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c index b2b85d151d..e76e73046f 100644 --- a/src/lib/string/util_string.c +++ b/src/lib/string/util_string.c @@ -541,3 +541,16 @@ string_is_utf8(const char *str, size_t len) } return true; } + +/** As string_is_utf8(), but returns false if the string begins with a UTF-8 + * byte order mark (BOM). + */ +int +string_is_utf8_no_bom(const char *str, size_t len) +{ + if (len >= 3 && (!strcmpstart(str, "\uFEFF") || + !strcmpstart(str, "\uFFFE"))) { + return false; + } + return string_is_utf8(str, len); +} diff --git a/src/lib/string/util_string.h b/src/lib/string/util_string.h index 746ece0d33..99467a27c3 100644 --- a/src/lib/string/util_string.h +++ b/src/lib/string/util_string.h @@ -53,5 +53,6 @@ const char *find_str_at_start_of_line(const char *haystack, int string_is_C_identifier(const char *string); int string_is_utf8(const char *str, size_t len); +int string_is_utf8_no_bom(const char *str, size_t len); #endif /* !defined(TOR_UTIL_STRING_H) */