diff --git a/.gitignore b/.gitignore index 343517fdf4..d03e1136d4 100644 --- a/.gitignore +++ b/.gitignore @@ -175,6 +175,8 @@ uptime-*.json /src/lib/libtor-err-testing.a /src/lib/libtor-malloc.a /src/lib/libtor-malloc-testing.a +/src/lib/libtor-string.a +/src/lib/libtor-string-testing.a /src/lib/libtor-tls.a /src/lib/libtor-tls-testing.a /src/lib/libtor-trace.a diff --git a/Makefile.am b/Makefile.am index a83041fbe0..50d1408fe3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -41,6 +41,7 @@ endif TOR_UTIL_LIBS = \ src/common/libor.a \ src/lib/libtor-container.a \ + src/lib/libtor-string.a \ src/lib/libtor-malloc.a \ src/lib/libtor-wallclock.a \ src/lib/libtor-err.a \ @@ -51,6 +52,7 @@ TOR_UTIL_LIBS = \ TOR_UTIL_TESTING_LIBS = \ src/common/libor-testing.a \ src/lib/libtor-container-testing.a \ + src/lib/libtor-string-testing.a \ src/lib/libtor-malloc-testing.a \ src/lib/libtor-wallclock-testing.a \ src/lib/libtor-err-testing.a \ diff --git a/src/common/compat.c b/src/common/compat.c index 9462195ba2..8929f2f3d3 100644 --- a/src/common/compat.c +++ b/src/common/compat.c @@ -404,147 +404,6 @@ tor_munmap_file(tor_mmap_t *handle) #error "cannot implement tor_mmap_file" #endif /* defined(HAVE_MMAP) || ... || ... */ -/** Replacement for snprintf. Differs from platform snprintf in two - * ways: First, always NUL-terminates its output. Second, always - * returns -1 if the result is truncated. (Note that this return - * behavior does not conform to C99; it just happens to be - * easier to emulate "return -1" with conformant implementations than - * it is to emulate "return number that would be written" with - * non-conformant implementations.) */ -int -tor_snprintf(char *str, size_t size, const char *format, ...) -{ - va_list ap; - int r; - va_start(ap,format); - r = tor_vsnprintf(str,size,format,ap); - va_end(ap); - return r; -} - -/** Replacement for vsnprintf; behavior differs as tor_snprintf differs from - * snprintf. - */ -int -tor_vsnprintf(char *str, size_t size, const char *format, va_list args) -{ - int r; - if (size == 0) - return -1; /* no place for the NUL */ - if (size > SIZE_T_CEILING) - return -1; -#ifdef _WIN32 - r = _vsnprintf(str, size, format, args); -#else - r = vsnprintf(str, size, format, args); -#endif - str[size-1] = '\0'; - if (r < 0 || r >= (ssize_t)size) - return -1; - return r; -} - -/** - * Portable asprintf implementation. Does a printf() into a newly malloc'd - * string. Sets *strp to this string, and returns its length (not - * including the terminating NUL character). - * - * You can treat this function as if its implementation were something like -
-     char buf[_INFINITY_];
-     tor_snprintf(buf, sizeof(buf), fmt, args);
-     *strp = tor_strdup(buf);
-     return strlen(*strp):
-   
- * Where _INFINITY_ is an imaginary constant so big that any string can fit - * into it. - */ -int -tor_asprintf(char **strp, const char *fmt, ...) -{ - int r; - va_list args; - va_start(args, fmt); - r = tor_vasprintf(strp, fmt, args); - va_end(args); - if (!*strp || r < 0) { - /* LCOV_EXCL_START */ - log_err(LD_BUG, "Internal error in asprintf"); - tor_assert(0); - /* LCOV_EXCL_STOP */ - } - return r; -} - -/** - * Portable vasprintf implementation. Does a printf() into a newly malloc'd - * string. Differs from regular vasprintf in the same ways that - * tor_asprintf() differs from regular asprintf. - */ -int -tor_vasprintf(char **strp, const char *fmt, va_list args) -{ - /* use a temporary variable in case *strp is in args. */ - char *strp_tmp=NULL; -#ifdef HAVE_VASPRINTF - /* If the platform gives us one, use it. */ - int r = vasprintf(&strp_tmp, fmt, args); - if (r < 0) - *strp = NULL; - else - *strp = strp_tmp; - return r; -#elif defined(HAVE__VSCPRINTF) - /* On Windows, _vsnprintf won't tell us the length of the string if it - * overflows, so we need to use _vcsprintf to tell how much to allocate */ - int len, r; - va_list tmp_args; - va_copy(tmp_args, args); - len = _vscprintf(fmt, tmp_args); - va_end(tmp_args); - if (len < 0) { - *strp = NULL; - return -1; - } - strp_tmp = tor_malloc(len + 1); - r = _vsnprintf(strp_tmp, len+1, fmt, args); - if (r != len) { - tor_free(strp_tmp); - *strp = NULL; - return -1; - } - *strp = strp_tmp; - return len; -#else - /* Everywhere else, we have a decent vsnprintf that tells us how many - * characters we need. We give it a try on a short buffer first, since - * it might be nice to avoid the second vsnprintf call. - */ - char buf[128]; - int len, r; - va_list tmp_args; - va_copy(tmp_args, args); - /* vsnprintf() was properly checked but tor_vsnprintf() available so - * why not use it? */ - len = tor_vsnprintf(buf, sizeof(buf), fmt, tmp_args); - va_end(tmp_args); - if (len < (int)sizeof(buf)) { - *strp = tor_strdup(buf); - return len; - } - strp_tmp = tor_malloc(len+1); - /* use of tor_vsnprintf() will ensure string is null terminated */ - r = tor_vsnprintf(strp_tmp, len+1, fmt, args); - if (r != len) { - tor_free(strp_tmp); - *strp = NULL; - return -1; - } - *strp = strp_tmp; - return len; -#endif /* defined(HAVE_VASPRINTF) || ... */ -} - /** Given hlen bytes at haystack and nlen bytes at * needle, return a pointer to the first occurrence of the needle * within the haystack, or NULL if there is no such occurrence. @@ -591,67 +450,6 @@ tor_memmem(const void *_haystack, size_t hlen, #endif /* defined(HAVE_MEMMEM) && (!defined(__GNUC__) || __GNUC__ >= 2) */ } -/** - * Tables to implement ctypes-replacement TOR_IS*() functions. Each table - * has 256 bits to look up whether a character is in some set or not. This - * fails on non-ASCII platforms, but it is hard to find a platform whose - * character set is not a superset of ASCII nowadays. */ - -/**@{*/ -const uint32_t TOR_ISALPHA_TABLE[8] = - { 0, 0, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 }; -const uint32_t TOR_ISALNUM_TABLE[8] = - { 0, 0x3ff0000, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 }; -const uint32_t TOR_ISSPACE_TABLE[8] = { 0x3e00, 0x1, 0, 0, 0, 0, 0, 0 }; -const uint32_t TOR_ISXDIGIT_TABLE[8] = - { 0, 0x3ff0000, 0x7e, 0x7e, 0, 0, 0, 0 }; -const uint32_t TOR_ISDIGIT_TABLE[8] = { 0, 0x3ff0000, 0, 0, 0, 0, 0, 0 }; -const uint32_t TOR_ISPRINT_TABLE[8] = - { 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 }; -const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 }; -const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 }; - -/** Upper-casing and lowercasing tables to map characters to upper/lowercase - * equivalents. Used by tor_toupper() and tor_tolower(). */ -/**@{*/ -const uint8_t TOR_TOUPPER_TABLE[256] = { - 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, - 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, - 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, - 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, - 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, - 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, - 96,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, - 80,81,82,83,84,85,86,87,88,89,90,123,124,125,126,127, - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, -}; -const uint8_t TOR_TOLOWER_TABLE[256] = { - 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, - 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, - 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, - 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, - 64,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,95, - 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, -}; -/**@}*/ - /** Helper for tor_strtok_r_impl: Advances cp past all characters in * sep, and returns its new value. */ static char * diff --git a/src/common/compat.h b/src/common/compat.h index dff29a42b0..21fd0211a3 100644 --- a/src/common/compat.h +++ b/src/common/compat.h @@ -47,6 +47,8 @@ #include "lib/cc/compat_compiler.h" #include "common/compat_time.h" +#include "lib/string/compat_ctype.h" +#include "lib/string/printf.h" #include #include @@ -97,16 +99,6 @@ typedef struct tor_mmap_t { tor_mmap_t *tor_mmap_file(const char *filename) ATTR_NONNULL((1)); int tor_munmap_file(tor_mmap_t *handle) ATTR_NONNULL((1)); -int tor_snprintf(char *str, size_t size, const char *format, ...) - CHECK_PRINTF(3,4) ATTR_NONNULL((1,3)); -int tor_vsnprintf(char *str, size_t size, const char *format, va_list args) - CHECK_PRINTF(3,0) ATTR_NONNULL((1,3)); - -int tor_asprintf(char **strp, const char *fmt, ...) - CHECK_PRINTF(2,3); -int tor_vasprintf(char **strp, const char *fmt, va_list args) - CHECK_PRINTF(2,0); - const void *tor_memmem(const void *haystack, size_t hlen, const void *needle, size_t nlen) ATTR_NONNULL((1,3)); static const void *tor_memstr(const void *haystack, size_t hlen, @@ -117,28 +109,6 @@ tor_memstr(const void *haystack, size_t hlen, const char *needle) return tor_memmem(haystack, hlen, needle, strlen(needle)); } -/* Much of the time when we're checking ctypes, we're doing spec compliance, - * which all assumes we're doing ASCII. */ -#define DECLARE_CTYPE_FN(name) \ - static int TOR_##name(char c); \ - extern const uint32_t TOR_##name##_TABLE[]; \ - static inline int TOR_##name(char c) { \ - uint8_t u = c; \ - return !!(TOR_##name##_TABLE[(u >> 5) & 7] & (1u << (u & 31))); \ - } -DECLARE_CTYPE_FN(ISALPHA) -DECLARE_CTYPE_FN(ISALNUM) -DECLARE_CTYPE_FN(ISSPACE) -DECLARE_CTYPE_FN(ISDIGIT) -DECLARE_CTYPE_FN(ISXDIGIT) -DECLARE_CTYPE_FN(ISPRINT) -DECLARE_CTYPE_FN(ISLOWER) -DECLARE_CTYPE_FN(ISUPPER) -extern const uint8_t TOR_TOUPPER_TABLE[]; -extern const uint8_t TOR_TOLOWER_TABLE[]; -#define TOR_TOLOWER(c) (TOR_TOLOWER_TABLE[(uint8_t)c]) -#define TOR_TOUPPER(c) (TOR_TOUPPER_TABLE[(uint8_t)c]) - char *tor_strtok_r_impl(char *str, const char *sep, char **lasts); #ifdef HAVE_STRTOK_R #define tor_strtok_r(str, sep, lasts) strtok_r(str, sep, lasts) diff --git a/src/common/util.c b/src/common/util.c index 358a68fd75..597ff2c420 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -385,22 +385,6 @@ n_bits_set_u8(uint8_t v) * String manipulation * ===== */ -/** Remove from the string s every character which appears in - * strip. */ -void -tor_strstrip(char *s, const char *strip) -{ - char *readp = s; - while (*readp) { - if (strchr(strip, *readp)) { - ++readp; - } else { - *s++ = *readp++; - } - } - *s = '\0'; -} - /** Return a pointer to a NUL-terminated hexadecimal string encoding * the first fromlen bytes of from. (fromlen must be \<= 32.) The * result does not need to be deallocated, but repeated calls to @@ -416,145 +400,6 @@ hex_str(const char *from, size_t fromlen) return buf; } -/** Convert all alphabetic characters in the nul-terminated string s to - * lowercase. */ -void -tor_strlower(char *s) -{ - while (*s) { - *s = TOR_TOLOWER(*s); - ++s; - } -} - -/** Convert all alphabetic characters in the nul-terminated string s to - * lowercase. */ -void -tor_strupper(char *s) -{ - while (*s) { - *s = TOR_TOUPPER(*s); - ++s; - } -} - -/** Return 1 if every character in s is printable, else return 0. - */ -int -tor_strisprint(const char *s) -{ - while (*s) { - if (!TOR_ISPRINT(*s)) - return 0; - s++; - } - return 1; -} - -/** Return 1 if no character in s is uppercase, else return 0. - */ -int -tor_strisnonupper(const char *s) -{ - while (*s) { - if (TOR_ISUPPER(*s)) - return 0; - s++; - } - return 1; -} - -/** Return true iff every character in s is whitespace space; else - * return false. */ -int -tor_strisspace(const char *s) -{ - while (*s) { - if (!TOR_ISSPACE(*s)) - return 0; - s++; - } - return 1; -} - -/** As strcmp, except that either string may be NULL. The NULL string is - * considered to be before any non-NULL string. */ -int -strcmp_opt(const char *s1, const char *s2) -{ - if (!s1) { - if (!s2) - return 0; - else - return -1; - } else if (!s2) { - return 1; - } else { - return strcmp(s1, s2); - } -} - -/** Compares the first strlen(s2) characters of s1 with s2. Returns as for - * strcmp. - */ -int -strcmpstart(const char *s1, const char *s2) -{ - size_t n = strlen(s2); - return strncmp(s1, s2, n); -} - -/** Compare the s1_len-byte string s1 with s2, - * without depending on a terminating nul in s1. Sorting order is first by - * length, then lexically; return values are as for strcmp. - */ -int -strcmp_len(const char *s1, const char *s2, size_t s1_len) -{ - size_t s2_len = strlen(s2); - if (s1_len < s2_len) - return -1; - if (s1_len > s2_len) - return 1; - return fast_memcmp(s1, s2, s2_len); -} - -/** Compares the first strlen(s2) characters of s1 with s2. Returns as for - * strcasecmp. - */ -int -strcasecmpstart(const char *s1, const char *s2) -{ - size_t n = strlen(s2); - return strncasecmp(s1, s2, n); -} - -/** Compares the last strlen(s2) characters of s1 with s2. Returns as for - * strcmp. - */ -int -strcmpend(const char *s1, const char *s2) -{ - size_t n1 = strlen(s1), n2 = strlen(s2); - if (n2>n1) - return strcmp(s1,s2); - else - return strncmp(s1+(n1-n2), s2, n2); -} - -/** Compares the last strlen(s2) characters of s1 with s2. Returns as for - * strcasecmp. - */ -int -strcasecmpend(const char *s1, const char *s2) -{ - size_t n1 = strlen(s1), n2 = strlen(s2); - if (n2>n1) /* then they can't be the same; figure out which is bigger */ - return strcasecmp(s1,s2); - else - return strncasecmp(s1+(n1-n2), s2, n2); -} - /** Compare the value of the string prefix with the start of the * memlen-byte memory chunk at mem. Return as for strcmp. * @@ -571,179 +416,6 @@ fast_memcmpstart(const void *mem, size_t memlen, return fast_memcmp(mem, prefix, plen); } -/** Return a pointer to the first char of s that is not whitespace and - * not a comment, or to the terminating NUL if no such character exists. - */ -const char * -eat_whitespace(const char *s) -{ - tor_assert(s); - - while (1) { - switch (*s) { - case '\0': - default: - return s; - case ' ': - case '\t': - case '\n': - case '\r': - ++s; - break; - case '#': - ++s; - while (*s && *s != '\n') - ++s; - } - } -} - -/** Return a pointer to the first char of s that is not whitespace and - * not a comment, or to the terminating NUL if no such character exists. - */ -const char * -eat_whitespace_eos(const char *s, const char *eos) -{ - tor_assert(s); - tor_assert(eos && s <= eos); - - while (s < eos) { - switch (*s) { - case '\0': - default: - return s; - case ' ': - case '\t': - case '\n': - case '\r': - ++s; - break; - case '#': - ++s; - while (s < eos && *s && *s != '\n') - ++s; - } - } - return s; -} - -/** Return a pointer to the first char of s that is not a space or a tab - * or a \\r, or to the terminating NUL if no such character exists. */ -const char * -eat_whitespace_no_nl(const char *s) -{ - while (*s == ' ' || *s == '\t' || *s == '\r') - ++s; - return s; -} - -/** As eat_whitespace_no_nl, but stop at eos whether we have - * found a non-whitespace character or not. */ -const char * -eat_whitespace_eos_no_nl(const char *s, const char *eos) -{ - while (s < eos && (*s == ' ' || *s == '\t' || *s == '\r')) - ++s; - return s; -} - -/** Return a pointer to the first char of s that is whitespace or #, - * or to the terminating NUL if no such character exists. - */ -const char * -find_whitespace(const char *s) -{ - /* tor_assert(s); */ - while (1) { - switch (*s) - { - case '\0': - case '#': - case ' ': - case '\r': - case '\n': - case '\t': - return s; - default: - ++s; - } - } -} - -/** As find_whitespace, but stop at eos whether we have found a - * whitespace or not. */ -const char * -find_whitespace_eos(const char *s, const char *eos) -{ - /* tor_assert(s); */ - while (s < eos) { - switch (*s) - { - case '\0': - case '#': - case ' ': - case '\r': - case '\n': - case '\t': - return s; - default: - ++s; - } - } - return s; -} - -/** Return the first occurrence of needle in haystack that - * occurs at the start of a line (that is, at the beginning of haystack - * or immediately after a newline). Return NULL if no such string is found. - */ -const char * -find_str_at_start_of_line(const char *haystack, const char *needle) -{ - size_t needle_len = strlen(needle); - - do { - if (!strncmp(haystack, needle, needle_len)) - return haystack; - - haystack = strchr(haystack, '\n'); - if (!haystack) - return NULL; - else - ++haystack; - } while (*haystack); - - return NULL; -} - -/** Returns true if string could be a C identifier. - A C identifier must begin with a letter or an underscore and the - rest of its characters can be letters, numbers or underscores. No - length limit is imposed. */ -int -string_is_C_identifier(const char *string) -{ - size_t iter; - size_t length = strlen(string); - if (!length) - return 0; - - for (iter = 0; iter < length ; iter++) { - if (iter == 0) { - if (!(TOR_ISALPHA(string[iter]) || - string[iter] == '_')) - return 0; - } else { - if (!(TOR_ISALPHA(string[iter]) || - TOR_ISDIGIT(string[iter]) || - string[iter] == '_')) - return 0; - } - } - - return 1; -} - /** Return true iff the 'len' bytes at 'mem' are all zero. */ int tor_mem_is_zero(const char *mem, size_t len) @@ -2923,307 +2595,6 @@ expand_filename(const char *filename) #endif /* defined(_WIN32) */ } -#define MAX_SCANF_WIDTH 9999 - -/** Helper: given an ASCII-encoded decimal digit, return its numeric value. - * NOTE: requires that its input be in-bounds. */ -static int -digit_to_num(char d) -{ - int num = ((int)d) - (int)'0'; - tor_assert(num <= 9 && num >= 0); - return num; -} - -/** Helper: Read an unsigned int from *bufp of up to width - * characters. (Handle arbitrary width if width is less than 0.) On - * success, store the result in out, advance bufp to the next - * character, and return 0. On failure, return -1. */ -static int -scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base) -{ - unsigned long result = 0; - int scanned_so_far = 0; - const int hex = base==16; - tor_assert(base == 10 || base == 16); - if (!bufp || !*bufp || !out) - return -1; - if (width<0) - width=MAX_SCANF_WIDTH; - - while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp)) - && scanned_so_far < width) { - unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++); - // Check for overflow beforehand, without actually causing any overflow - // This preserves functionality on compilers that don't wrap overflow - // (i.e. that trap or optimise away overflow) - // result * base + digit > ULONG_MAX - // result * base > ULONG_MAX - digit - if (result > (ULONG_MAX - digit)/base) - return -1; /* Processing this digit would overflow */ - result = result * base + digit; - ++scanned_so_far; - } - - if (!scanned_so_far) /* No actual digits scanned */ - return -1; - - *out = result; - return 0; -} - -/** Helper: Read an signed int from *bufp of up to width - * characters. (Handle arbitrary width if width is less than 0.) On - * success, store the result in out, advance bufp to the next - * character, and return 0. On failure, return -1. */ -static int -scan_signed(const char **bufp, long *out, int width) -{ - int neg = 0; - unsigned long result = 0; - - if (!bufp || !*bufp || !out) - return -1; - if (width<0) - width=MAX_SCANF_WIDTH; - - if (**bufp == '-') { - neg = 1; - ++*bufp; - --width; - } - - if (scan_unsigned(bufp, &result, width, 10) < 0) - return -1; - - if (neg && result > 0) { - if (result > ((unsigned long)LONG_MAX) + 1) - return -1; /* Underflow */ - else if (result == ((unsigned long)LONG_MAX) + 1) - *out = LONG_MIN; - else { - /* We once had a far more clever no-overflow conversion here, but - * some versions of GCC apparently ran it into the ground. Now - * we just check for LONG_MIN explicitly. - */ - *out = -(long)result; - } - } else { - if (result > LONG_MAX) - return -1; /* Overflow */ - *out = (long)result; - } - - return 0; -} - -/** Helper: Read a decimal-formatted double from *bufp of up to - * width characters. (Handle arbitrary width if width is less - * than 0.) On success, store the result in out, advance bufp to the - * next character, and return 0. On failure, return -1. */ -static int -scan_double(const char **bufp, double *out, int width) -{ - int neg = 0; - double result = 0; - int scanned_so_far = 0; - - if (!bufp || !*bufp || !out) - return -1; - if (width<0) - width=MAX_SCANF_WIDTH; - - if (**bufp == '-') { - neg = 1; - ++*bufp; - } - - while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { - const int digit = digit_to_num(*(*bufp)++); - result = result * 10 + digit; - ++scanned_so_far; - } - if (**bufp == '.') { - double fracval = 0, denominator = 1; - ++*bufp; - ++scanned_so_far; - while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { - const int digit = digit_to_num(*(*bufp)++); - fracval = fracval * 10 + digit; - denominator *= 10; - ++scanned_so_far; - } - result += fracval / denominator; - } - - if (!scanned_so_far) /* No actual digits scanned */ - return -1; - - *out = neg ? -result : result; - return 0; -} - -/** Helper: copy up to width non-space characters from bufp to - * out. Make sure out is nul-terminated. Advance bufp - * to the next non-space character or the EOS. */ -static int -scan_string(const char **bufp, char *out, int width) -{ - int scanned_so_far = 0; - if (!bufp || !out || width < 0) - return -1; - while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) { - *out++ = *(*bufp)++; - ++scanned_so_far; - } - *out = '\0'; - return 0; -} - -/** Locale-independent, minimal, no-surprises scanf variant, accepting only a - * restricted pattern format. For more info on what it supports, see - * tor_sscanf() documentation. */ -int -tor_vsscanf(const char *buf, const char *pattern, va_list ap) -{ - int n_matched = 0; - - while (*pattern) { - if (*pattern != '%') { - if (*buf == *pattern) { - ++buf; - ++pattern; - continue; - } else { - return n_matched; - } - } else { - int width = -1; - int longmod = 0; - ++pattern; - if (TOR_ISDIGIT(*pattern)) { - width = digit_to_num(*pattern++); - while (TOR_ISDIGIT(*pattern)) { - width *= 10; - width += digit_to_num(*pattern++); - if (width > MAX_SCANF_WIDTH) - return -1; - } - if (!width) /* No zero-width things. */ - return -1; - } - if (*pattern == 'l') { - longmod = 1; - ++pattern; - } - if (*pattern == 'u' || *pattern == 'x') { - unsigned long u; - const int base = (*pattern == 'u') ? 10 : 16; - if (!*buf) - return n_matched; - if (scan_unsigned(&buf, &u, width, base)<0) - return n_matched; - if (longmod) { - unsigned long *out = va_arg(ap, unsigned long *); - *out = u; - } else { - unsigned *out = va_arg(ap, unsigned *); - if (u > UINT_MAX) - return n_matched; - *out = (unsigned) u; - } - ++pattern; - ++n_matched; - } else if (*pattern == 'f') { - double *d = va_arg(ap, double *); - if (!longmod) - return -1; /* float not supported */ - if (!*buf) - return n_matched; - if (scan_double(&buf, d, width)<0) - return n_matched; - ++pattern; - ++n_matched; - } else if (*pattern == 'd') { - long lng=0; - if (scan_signed(&buf, &lng, width)<0) - return n_matched; - if (longmod) { - long *out = va_arg(ap, long *); - *out = lng; - } else { - int *out = va_arg(ap, int *); -#if LONG_MAX > INT_MAX - if (lng < INT_MIN || lng > INT_MAX) - return n_matched; -#endif - *out = (int)lng; - } - ++pattern; - ++n_matched; - } else if (*pattern == 's') { - char *s = va_arg(ap, char *); - if (longmod) - return -1; - if (width < 0) - return -1; - if (scan_string(&buf, s, width)<0) - return n_matched; - ++pattern; - ++n_matched; - } else if (*pattern == 'c') { - char *ch = va_arg(ap, char *); - if (longmod) - return -1; - if (width != -1) - return -1; - if (!*buf) - return n_matched; - *ch = *buf++; - ++pattern; - ++n_matched; - } else if (*pattern == '%') { - if (*buf != '%') - return n_matched; - if (longmod) - return -1; - ++buf; - ++pattern; - } else { - return -1; /* Unrecognized pattern component. */ - } - } - } - - return n_matched; -} - -/** Minimal sscanf replacement: parse buf according to pattern - * and store the results in the corresponding argument fields. Differs from - * sscanf in that: - * - * - * (As with other locale-independent functions, we need this to parse data that - * is in ASCII without worrying that the C library's locale-handling will make - * miscellaneous characters look like numbers, spaces, and so on.) - */ -int -tor_sscanf(const char *buf, const char *pattern, ...) -{ - int r; - va_list ap; - va_start(ap, pattern); - r = tor_vsscanf(buf, pattern, ap); - va_end(ap); - return r; -} - /** Append the string produced by tor_asprintf(pattern, ...) * to sl. */ void diff --git a/src/common/util.h b/src/common/util.h index 8977b460bf..5833fe567f 100644 --- a/src/common/util.h +++ b/src/common/util.h @@ -25,6 +25,8 @@ #include "lib/err/torerr.h" #include "lib/malloc/util_malloc.h" #include "lib/wallclock/approx_time.h" +#include "lib/string/util_string.h" +#include "lib/string/scanf.h" #include "common/util_bug.h" #ifndef O_BINARY @@ -96,22 +98,6 @@ uint32_t tor_add_u32_nowrap(uint32_t a, uint32_t b); /* String manipulation */ -/** Allowable characters in a hexadecimal string. */ -#define HEX_CHARACTERS "0123456789ABCDEFabcdef" -void tor_strlower(char *s) ATTR_NONNULL((1)); -void tor_strupper(char *s) ATTR_NONNULL((1)); -int tor_strisprint(const char *s) ATTR_NONNULL((1)); -int tor_strisnonupper(const char *s) ATTR_NONNULL((1)); -int tor_strisspace(const char *s); -int strcmp_opt(const char *s1, const char *s2); -int strcmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2)); -int strcmp_len(const char *s1, const char *s2, size_t len) ATTR_NONNULL((1,2)); -int strcasecmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2)); -int strcmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2)); -int strcasecmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2)); -int fast_memcmpstart(const void *mem, size_t memlen, const char *prefix); - -void tor_strstrip(char *s, const char *strip) ATTR_NONNULL((1,2)); long tor_parse_long(const char *s, int base, long min, long max, int *ok, char **next); unsigned long tor_parse_ulong(const char *s, int base, unsigned long min, @@ -120,16 +106,9 @@ double tor_parse_double(const char *s, double min, double max, int *ok, char **next); uint64_t tor_parse_uint64(const char *s, int base, uint64_t min, uint64_t max, int *ok, char **next); + const char *hex_str(const char *from, size_t fromlen) ATTR_NONNULL((1)); -const char *eat_whitespace(const char *s); -const char *eat_whitespace_eos(const char *s, const char *eos); -const char *eat_whitespace_no_nl(const char *s); -const char *eat_whitespace_eos_no_nl(const char *s, const char *eos); -const char *find_whitespace(const char *s); -const char *find_whitespace_eos(const char *s, const char *eos); -const char *find_str_at_start_of_line(const char *haystack, - const char *needle); -int string_is_C_identifier(const char *string); + int string_is_key_value(int severity, const char *string); int string_is_valid_dest(const char *string); int string_is_valid_nonrfc_hostname(const char *string); @@ -139,6 +118,7 @@ int string_is_valid_ipv6_address(const char *string); int tor_mem_is_zero(const char *mem, size_t len); int tor_digest_is_zero(const char *digest); int tor_digest256_is_zero(const char *digest); + char *esc_for_log(const char *string) ATTR_MALLOC; char *esc_for_log_len(const char *chars, size_t n) ATTR_MALLOC; const char *escaped(const char *string); @@ -147,11 +127,6 @@ char *tor_escape_str_for_pt_args(const char *string, const char *chars_to_escape); struct smartlist_t; -int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \ - CHECK_SCANF(2, 0); -int tor_sscanf(const char *buf, const char *pattern, ...) - CHECK_SCANF(2, 3); - void smartlist_add_asprintf(struct smartlist_t *sl, const char *pattern, ...) CHECK_PRINTF(2, 3); void smartlist_add_vasprintf(struct smartlist_t *sl, const char *pattern, diff --git a/src/common/util_format.c b/src/common/util_format.c index 713e87129c..8b299725a4 100644 --- a/src/common/util_format.c +++ b/src/common/util_format.c @@ -465,39 +465,6 @@ base16_encode(char *dest, size_t destlen, const char *src, size_t srclen) *cp = '\0'; } -/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */ -static inline int -hex_decode_digit_(char c) -{ - switch (c) { - case '0': return 0; - case '1': return 1; - case '2': return 2; - case '3': return 3; - case '4': return 4; - case '5': return 5; - case '6': return 6; - case '7': return 7; - case '8': return 8; - case '9': return 9; - case 'A': case 'a': return 10; - case 'B': case 'b': return 11; - case 'C': case 'c': return 12; - case 'D': case 'd': return 13; - case 'E': case 'e': return 14; - case 'F': case 'f': return 15; - default: - return -1; - } -} - -/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */ -int -hex_decode_digit(char c) -{ - return hex_decode_digit_(c); -} - /** Given a hexadecimal string of srclen bytes in src, decode * it and store the result in the destlen-byte buffer at dest. * Return the number of bytes decoded on success, -1 on failure. If @@ -520,8 +487,8 @@ base16_decode(char *dest, size_t destlen, const char *src, size_t srclen) end = src+srclen; while (src #include diff --git a/src/lib/container/smartlist.c b/src/lib/container/smartlist.c index 2d861f566c..3a0a2d1068 100644 --- a/src/lib/container/smartlist.c +++ b/src/lib/container/smartlist.c @@ -14,9 +14,10 @@ #include "lib/malloc/util_malloc.h" #include "lib/container/smartlist.h" #include "lib/err/torerr.h" -#include "common/util.h" // For strstrip. +#include "lib/malloc/util_malloc.h" #include "lib/defs/digest_sizes.h" #include "lib/ctime/di_ops.h" +#include "lib/string/util_string.h" #include #include diff --git a/src/lib/crypt_ops/.may_include b/src/lib/crypt_ops/.may_include index 438e5fcdce..0a9a0dda22 100644 --- a/src/lib/crypt_ops/.may_include +++ b/src/lib/crypt_ops/.may_include @@ -6,6 +6,7 @@ lib/ctime/*.h lib/defs/*.h lib/malloc/*.h lib/err/*.h +lib/string/*.h lib/testsupport/testsupport.h trunnel/pwbox.h diff --git a/src/lib/crypt_ops/crypto_format.c b/src/lib/crypt_ops/crypto_format.c index 2d28090aaa..e4237653f2 100644 --- a/src/lib/crypt_ops/crypto_format.c +++ b/src/lib/crypt_ops/crypto_format.c @@ -20,6 +20,7 @@ #include "lib/crypt_ops/crypto_ed25519.h" #include "lib/crypt_ops/crypto_format.h" #include "lib/crypt_ops/crypto_util.h" +#include "lib/string/util_string.h" #include "common/util.h" #include "common/util_format.h" #include "common/torlog.h" diff --git a/src/lib/crypt_ops/crypto_openssl_mgt.c b/src/lib/crypt_ops/crypto_openssl_mgt.c index b18717a112..2c2c2048e9 100644 --- a/src/lib/crypt_ops/crypto_openssl_mgt.c +++ b/src/lib/crypt_ops/crypto_openssl_mgt.c @@ -12,6 +12,7 @@ #include "lib/crypt_ops/compat_openssl.h" #include "lib/crypt_ops/crypto_openssl_mgt.h" +#include "lib/string/util_string.h" DISABLE_GCC_WARNING(redundant-decls) @@ -158,4 +159,3 @@ crypto_openssl_free_all(void) } #endif /* !defined(NEW_THREAD_API) */ } - diff --git a/src/lib/string/.may_include b/src/lib/string/.may_include new file mode 100644 index 0000000000..8781566d9f --- /dev/null +++ b/src/lib/string/.may_include @@ -0,0 +1,6 @@ +orconfig.h +lib/cc/*.h +lib/err/*.h +lib/malloc/*.h +lib/ctime/*.h +lib/string/*.h diff --git a/src/lib/string/compat_ctype.c b/src/lib/string/compat_ctype.c new file mode 100644 index 0000000000..d1d4ce0ffc --- /dev/null +++ b/src/lib/string/compat_ctype.c @@ -0,0 +1,67 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/compat_ctype.h" + +/** + * Tables to implement ctypes-replacement TOR_IS*() functions. Each table + * has 256 bits to look up whether a character is in some set or not. This + * fails on non-ASCII platforms, but it is hard to find a platform whose + * character set is not a superset of ASCII nowadays. */ + +/**@{*/ +const uint32_t TOR_ISALPHA_TABLE[8] = + { 0, 0, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 }; +const uint32_t TOR_ISALNUM_TABLE[8] = + { 0, 0x3ff0000, 0x7fffffe, 0x7fffffe, 0, 0, 0, 0 }; +const uint32_t TOR_ISSPACE_TABLE[8] = { 0x3e00, 0x1, 0, 0, 0, 0, 0, 0 }; +const uint32_t TOR_ISXDIGIT_TABLE[8] = + { 0, 0x3ff0000, 0x7e, 0x7e, 0, 0, 0, 0 }; +const uint32_t TOR_ISDIGIT_TABLE[8] = { 0, 0x3ff0000, 0, 0, 0, 0, 0, 0 }; +const uint32_t TOR_ISPRINT_TABLE[8] = + { 0, 0xffffffff, 0xffffffff, 0x7fffffff, 0, 0, 0, 0x0 }; +const uint32_t TOR_ISUPPER_TABLE[8] = { 0, 0, 0x7fffffe, 0, 0, 0, 0, 0 }; +const uint32_t TOR_ISLOWER_TABLE[8] = { 0, 0, 0, 0x7fffffe, 0, 0, 0, 0 }; + +/** Upper-casing and lowercasing tables to map characters to upper/lowercase + * equivalents. Used by tor_toupper() and tor_tolower(). */ +/**@{*/ +const uint8_t TOR_TOUPPER_TABLE[256] = { + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, + 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, + 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, + 96,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, + 80,81,82,83,84,85,86,87,88,89,90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, +}; +const uint8_t TOR_TOLOWER_TABLE[256] = { + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, + 64,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,95, + 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, +}; +/**@}*/ diff --git a/src/lib/string/compat_ctype.h b/src/lib/string/compat_ctype.h new file mode 100644 index 0000000000..32f314f332 --- /dev/null +++ b/src/lib/string/compat_ctype.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_COMPAT_CTYPE_H +#define TOR_COMPAT_CTYPE_H + +#include "orconfig.h" +#include "lib/cc/torint.h" + +/* Much of the time when we're checking ctypes, we're doing spec compliance, + * which all assumes we're doing ASCII. */ +#define DECLARE_CTYPE_FN(name) \ + static int TOR_##name(char c); \ + extern const uint32_t TOR_##name##_TABLE[]; \ + static inline int TOR_##name(char c) { \ + uint8_t u = c; \ + return !!(TOR_##name##_TABLE[(u >> 5) & 7] & (1u << (u & 31))); \ + } +DECLARE_CTYPE_FN(ISALPHA) +DECLARE_CTYPE_FN(ISALNUM) +DECLARE_CTYPE_FN(ISSPACE) +DECLARE_CTYPE_FN(ISDIGIT) +DECLARE_CTYPE_FN(ISXDIGIT) +DECLARE_CTYPE_FN(ISPRINT) +DECLARE_CTYPE_FN(ISLOWER) +DECLARE_CTYPE_FN(ISUPPER) +extern const uint8_t TOR_TOUPPER_TABLE[]; +extern const uint8_t TOR_TOLOWER_TABLE[]; +#define TOR_TOLOWER(c) (TOR_TOLOWER_TABLE[(uint8_t)c]) +#define TOR_TOUPPER(c) (TOR_TOUPPER_TABLE[(uint8_t)c]) + +/** Helper: given a hex digit, return its value, or -1 if it isn't hex. */ +inline int +hex_decode_digit(char c) +{ + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'A': case 'a': return 10; + case 'B': case 'b': return 11; + case 'C': case 'c': return 12; + case 'D': case 'd': return 13; + case 'E': case 'e': return 14; + case 'F': case 'f': return 15; + default: + return -1; + } +} + +#endif /* !defined(TOR_COMPAT_CTYPE_H) */ diff --git a/src/lib/string/include.am b/src/lib/string/include.am new file mode 100644 index 0000000000..d458515d28 --- /dev/null +++ b/src/lib/string/include.am @@ -0,0 +1,23 @@ + +noinst_LIBRARIES += src/lib/libtor-string.a + +if UNITTESTS_ENABLED +noinst_LIBRARIES += src/lib/libtor-string-testing.a +endif + +src_lib_libtor_string_a_SOURCES = \ + src/lib/string/compat_ctype.c \ + src/lib/string/util_string.c \ + src/lib/string/printf.c \ + src/lib/string/scanf.c + +src_lib_libtor_string_testing_a_SOURCES = \ + $(src_lib_libtor_string_a_SOURCES) +src_lib_libtor_string_testing_a_CPPFLAGS = $(AM_CPPFLAGS) $(TEST_CPPFLAGS) +src_lib_libtor_string_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) + +noinst_HEADERS += \ + src/lib/string/compat_ctype.h \ + src/lib/string/util_string.h \ + src/lib/string/printf.h \ + src/lib/string/scanf.h diff --git a/src/lib/string/printf.c b/src/lib/string/printf.c new file mode 100644 index 0000000000..4443e25fb4 --- /dev/null +++ b/src/lib/string/printf.c @@ -0,0 +1,152 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/printf.h" +#include "lib/err/torerr.h" +#include "lib/cc/torint.h" +#include "lib/malloc/util_malloc.h" + +#include +#include + +/** Replacement for snprintf. Differs from platform snprintf in two + * ways: First, always NUL-terminates its output. Second, always + * returns -1 if the result is truncated. (Note that this return + * behavior does not conform to C99; it just happens to be + * easier to emulate "return -1" with conformant implementations than + * it is to emulate "return number that would be written" with + * non-conformant implementations.) */ +int +tor_snprintf(char *str, size_t size, const char *format, ...) +{ + va_list ap; + int r; + va_start(ap,format); + r = tor_vsnprintf(str,size,format,ap); + va_end(ap); + return r; +} + +/** Replacement for vsnprintf; behavior differs as tor_snprintf differs from + * snprintf. + */ +int +tor_vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + int r; + if (size == 0) + return -1; /* no place for the NUL */ + if (size > SIZE_T_CEILING) + return -1; +#ifdef _WIN32 + r = _vsnprintf(str, size, format, args); +#else + r = vsnprintf(str, size, format, args); +#endif + str[size-1] = '\0'; + if (r < 0 || r >= (ssize_t)size) + return -1; + return r; +} + +/** + * Portable asprintf implementation. Does a printf() into a newly malloc'd + * string. Sets *strp to this string, and returns its length (not + * including the terminating NUL character). + * + * You can treat this function as if its implementation were something like +
+     char buf[_INFINITY_];
+     tor_snprintf(buf, sizeof(buf), fmt, args);
+     *strp = tor_strdup(buf);
+     return strlen(*strp):
+   
+ * Where _INFINITY_ is an imaginary constant so big that any string can fit + * into it. + */ +int +tor_asprintf(char **strp, const char *fmt, ...) +{ + int r; + va_list args; + va_start(args, fmt); + r = tor_vasprintf(strp, fmt, args); + va_end(args); + if (!*strp || r < 0) { + /* LCOV_EXCL_START */ + raw_assert_unreached_msg("Internal error in asprintf"); + /* LCOV_EXCL_STOP */ + } + return r; +} + +/** + * Portable vasprintf implementation. Does a printf() into a newly malloc'd + * string. Differs from regular vasprintf in the same ways that + * tor_asprintf() differs from regular asprintf. + */ +int +tor_vasprintf(char **strp, const char *fmt, va_list args) +{ + /* use a temporary variable in case *strp is in args. */ + char *strp_tmp=NULL; +#ifdef HAVE_VASPRINTF + /* If the platform gives us one, use it. */ + int r = vasprintf(&strp_tmp, fmt, args); + if (r < 0) + *strp = NULL; + else + *strp = strp_tmp; + return r; +#elif defined(HAVE__VSCPRINTF) + /* On Windows, _vsnprintf won't tell us the length of the string if it + * overflows, so we need to use _vcsprintf to tell how much to allocate */ + int len, r; + va_list tmp_args; + va_copy(tmp_args, args); + len = _vscprintf(fmt, tmp_args); + va_end(tmp_args); + if (len < 0) { + *strp = NULL; + return -1; + } + strp_tmp = tor_malloc(len + 1); + r = _vsnprintf(strp_tmp, len+1, fmt, args); + if (r != len) { + tor_free(strp_tmp); + *strp = NULL; + return -1; + } + *strp = strp_tmp; + return len; +#else + /* Everywhere else, we have a decent vsnprintf that tells us how many + * characters we need. We give it a try on a short buffer first, since + * it might be nice to avoid the second vsnprintf call. + */ + char buf[128]; + int len, r; + va_list tmp_args; + va_copy(tmp_args, args); + /* vsnprintf() was properly checked but tor_vsnprintf() available so + * why not use it? */ + len = tor_vsnprintf(buf, sizeof(buf), fmt, tmp_args); + va_end(tmp_args); + if (len < (int)sizeof(buf)) { + *strp = tor_strdup(buf); + return len; + } + strp_tmp = tor_malloc(len+1); + /* use of tor_vsnprintf() will ensure string is null terminated */ + r = tor_vsnprintf(strp_tmp, len+1, fmt, args); + if (r != len) { + tor_free(strp_tmp); + *strp = NULL; + return -1; + } + *strp = strp_tmp; + return len; +#endif /* defined(HAVE_VASPRINTF) || ... */ +} diff --git a/src/lib/string/printf.h b/src/lib/string/printf.h new file mode 100644 index 0000000000..2f46206545 --- /dev/null +++ b/src/lib/string/printf.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_UTIL_PRINTF_H +#define TOR_UTIL_PRINTF_H + +#include "orconfig.h" +#include "lib/cc/compat_compiler.h" + +#include +#include + +int tor_snprintf(char *str, size_t size, const char *format, ...) + CHECK_PRINTF(3,4) ATTR_NONNULL((1,3)); +int tor_vsnprintf(char *str, size_t size, const char *format, va_list args) + CHECK_PRINTF(3,0) ATTR_NONNULL((1,3)); + +int tor_asprintf(char **strp, const char *fmt, ...) + CHECK_PRINTF(2,3); +int tor_vasprintf(char **strp, const char *fmt, va_list args) + CHECK_PRINTF(2,0); + +#endif /* !defined(TOR_UTIL_STRING_H) */ diff --git a/src/lib/string/scanf.c b/src/lib/string/scanf.c new file mode 100644 index 0000000000..0c5082799c --- /dev/null +++ b/src/lib/string/scanf.c @@ -0,0 +1,312 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/scanf.h" +#include "lib/string/compat_ctype.h" +#include "lib/cc/torint.h" +#include "lib/err/torerr.h" + +#include + +#define MAX_SCANF_WIDTH 9999 + +/** Helper: given an ASCII-encoded decimal digit, return its numeric value. + * NOTE: requires that its input be in-bounds. */ +static int +digit_to_num(char d) +{ + int num = ((int)d) - (int)'0'; + raw_assert(num <= 9 && num >= 0); + return num; +} + +/** Helper: Read an unsigned int from *bufp of up to width + * characters. (Handle arbitrary width if width is less than 0.) On + * success, store the result in out, advance bufp to the next + * character, and return 0. On failure, return -1. */ +static int +scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base) +{ + unsigned long result = 0; + int scanned_so_far = 0; + const int hex = base==16; + raw_assert(base == 10 || base == 16); + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp)) + && scanned_so_far < width) { + unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++); + // Check for overflow beforehand, without actually causing any overflow + // This preserves functionality on compilers that don't wrap overflow + // (i.e. that trap or optimise away overflow) + // result * base + digit > ULONG_MAX + // result * base > ULONG_MAX - digit + if (result > (ULONG_MAX - digit)/base) + return -1; /* Processing this digit would overflow */ + result = result * base + digit; + ++scanned_so_far; + } + + if (!scanned_so_far) /* No actual digits scanned */ + return -1; + + *out = result; + return 0; +} + +/** Helper: Read an signed int from *bufp of up to width + * characters. (Handle arbitrary width if width is less than 0.) On + * success, store the result in out, advance bufp to the next + * character, and return 0. On failure, return -1. */ +static int +scan_signed(const char **bufp, long *out, int width) +{ + int neg = 0; + unsigned long result = 0; + + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + if (**bufp == '-') { + neg = 1; + ++*bufp; + --width; + } + + if (scan_unsigned(bufp, &result, width, 10) < 0) + return -1; + + if (neg && result > 0) { + if (result > ((unsigned long)LONG_MAX) + 1) + return -1; /* Underflow */ + else if (result == ((unsigned long)LONG_MAX) + 1) + *out = LONG_MIN; + else { + /* We once had a far more clever no-overflow conversion here, but + * some versions of GCC apparently ran it into the ground. Now + * we just check for LONG_MIN explicitly. + */ + *out = -(long)result; + } + } else { + if (result > LONG_MAX) + return -1; /* Overflow */ + *out = (long)result; + } + + return 0; +} + +/** Helper: Read a decimal-formatted double from *bufp of up to + * width characters. (Handle arbitrary width if width is less + * than 0.) On success, store the result in out, advance bufp to the + * next character, and return 0. On failure, return -1. */ +static int +scan_double(const char **bufp, double *out, int width) +{ + int neg = 0; + double result = 0; + int scanned_so_far = 0; + + if (!bufp || !*bufp || !out) + return -1; + if (width<0) + width=MAX_SCANF_WIDTH; + + if (**bufp == '-') { + neg = 1; + ++*bufp; + } + + while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { + const int digit = digit_to_num(*(*bufp)++); + result = result * 10 + digit; + ++scanned_so_far; + } + if (**bufp == '.') { + double fracval = 0, denominator = 1; + ++*bufp; + ++scanned_so_far; + while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { + const int digit = digit_to_num(*(*bufp)++); + fracval = fracval * 10 + digit; + denominator *= 10; + ++scanned_so_far; + } + result += fracval / denominator; + } + + if (!scanned_so_far) /* No actual digits scanned */ + return -1; + + *out = neg ? -result : result; + return 0; +} + +/** Helper: copy up to width non-space characters from bufp to + * out. Make sure out is nul-terminated. Advance bufp + * to the next non-space character or the EOS. */ +static int +scan_string(const char **bufp, char *out, int width) +{ + int scanned_so_far = 0; + if (!bufp || !out || width < 0) + return -1; + while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) { + *out++ = *(*bufp)++; + ++scanned_so_far; + } + *out = '\0'; + return 0; +} + +/** Locale-independent, minimal, no-surprises scanf variant, accepting only a + * restricted pattern format. For more info on what it supports, see + * tor_sscanf() documentation. */ +int +tor_vsscanf(const char *buf, const char *pattern, va_list ap) +{ + int n_matched = 0; + + while (*pattern) { + if (*pattern != '%') { + if (*buf == *pattern) { + ++buf; + ++pattern; + continue; + } else { + return n_matched; + } + } else { + int width = -1; + int longmod = 0; + ++pattern; + if (TOR_ISDIGIT(*pattern)) { + width = digit_to_num(*pattern++); + while (TOR_ISDIGIT(*pattern)) { + width *= 10; + width += digit_to_num(*pattern++); + if (width > MAX_SCANF_WIDTH) + return -1; + } + if (!width) /* No zero-width things. */ + return -1; + } + if (*pattern == 'l') { + longmod = 1; + ++pattern; + } + if (*pattern == 'u' || *pattern == 'x') { + unsigned long u; + const int base = (*pattern == 'u') ? 10 : 16; + if (!*buf) + return n_matched; + if (scan_unsigned(&buf, &u, width, base)<0) + return n_matched; + if (longmod) { + unsigned long *out = va_arg(ap, unsigned long *); + *out = u; + } else { + unsigned *out = va_arg(ap, unsigned *); + if (u > UINT_MAX) + return n_matched; + *out = (unsigned) u; + } + ++pattern; + ++n_matched; + } else if (*pattern == 'f') { + double *d = va_arg(ap, double *); + if (!longmod) + return -1; /* float not supported */ + if (!*buf) + return n_matched; + if (scan_double(&buf, d, width)<0) + return n_matched; + ++pattern; + ++n_matched; + } else if (*pattern == 'd') { + long lng=0; + if (scan_signed(&buf, &lng, width)<0) + return n_matched; + if (longmod) { + long *out = va_arg(ap, long *); + *out = lng; + } else { + int *out = va_arg(ap, int *); +#if LONG_MAX > INT_MAX + if (lng < INT_MIN || lng > INT_MAX) + return n_matched; +#endif + *out = (int)lng; + } + ++pattern; + ++n_matched; + } else if (*pattern == 's') { + char *s = va_arg(ap, char *); + if (longmod) + return -1; + if (width < 0) + return -1; + if (scan_string(&buf, s, width)<0) + return n_matched; + ++pattern; + ++n_matched; + } else if (*pattern == 'c') { + char *ch = va_arg(ap, char *); + if (longmod) + return -1; + if (width != -1) + return -1; + if (!*buf) + return n_matched; + *ch = *buf++; + ++pattern; + ++n_matched; + } else if (*pattern == '%') { + if (*buf != '%') + return n_matched; + if (longmod) + return -1; + ++buf; + ++pattern; + } else { + return -1; /* Unrecognized pattern component. */ + } + } + } + + return n_matched; +} + +/** Minimal sscanf replacement: parse buf according to pattern + * and store the results in the corresponding argument fields. Differs from + * sscanf in that: + *
  • It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c. + *
  • It only handles decimal inputs for %lf. (12.3, not 1.23e1) + *
  • It does not handle arbitrarily long widths. + *
  • Numbers do not consume any space characters. + *
  • It is locale-independent. + *
  • %u and %x do not consume any space. + *
  • It returns -1 on malformed patterns.
+ * + * (As with other locale-independent functions, we need this to parse data that + * is in ASCII without worrying that the C library's locale-handling will make + * miscellaneous characters look like numbers, spaces, and so on.) + */ +int +tor_sscanf(const char *buf, const char *pattern, ...) +{ + int r; + va_list ap; + va_start(ap, pattern); + r = tor_vsscanf(buf, pattern, ap); + va_end(ap); + return r; +} diff --git a/src/lib/string/scanf.h b/src/lib/string/scanf.h new file mode 100644 index 0000000000..9cfa9cc6c1 --- /dev/null +++ b/src/lib/string/scanf.h @@ -0,0 +1,19 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_UTIL_SCANF_H +#define TOR_UTIL_SCANF_H + +#include "orconfig.h" +#include "lib/cc/compat_compiler.h" + +#include + +int tor_vsscanf(const char *buf, const char *pattern, va_list ap) \ + CHECK_SCANF(2, 0); +int tor_sscanf(const char *buf, const char *pattern, ...) + CHECK_SCANF(2, 3); + +#endif /* !defined(TOR_UTIL_STRING_H) */ diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c new file mode 100644 index 0000000000..aba37fcc00 --- /dev/null +++ b/src/lib/string/util_string.c @@ -0,0 +1,340 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "lib/string/util_string.h" +#include "lib/string/compat_ctype.h" +#include "lib/err/torerr.h" +#include "lib/ctime/di_ops.h" + +#include +#include + +/** Remove from the string s every character which appears in + * strip. */ +void +tor_strstrip(char *s, const char *strip) +{ + char *readp = s; + while (*readp) { + if (strchr(strip, *readp)) { + ++readp; + } else { + *s++ = *readp++; + } + } + *s = '\0'; +} + +/** Convert all alphabetic characters in the nul-terminated string s to + * lowercase. */ +void +tor_strlower(char *s) +{ + while (*s) { + *s = TOR_TOLOWER(*s); + ++s; + } +} + +/** Convert all alphabetic characters in the nul-terminated string s to + * lowercase. */ +void +tor_strupper(char *s) +{ + while (*s) { + *s = TOR_TOUPPER(*s); + ++s; + } +} + +/** Return 1 if every character in s is printable, else return 0. + */ +int +tor_strisprint(const char *s) +{ + while (*s) { + if (!TOR_ISPRINT(*s)) + return 0; + s++; + } + return 1; +} + +/** Return 1 if no character in s is uppercase, else return 0. + */ +int +tor_strisnonupper(const char *s) +{ + while (*s) { + if (TOR_ISUPPER(*s)) + return 0; + s++; + } + return 1; +} + +/** Return true iff every character in s is whitespace space; else + * return false. */ +int +tor_strisspace(const char *s) +{ + while (*s) { + if (!TOR_ISSPACE(*s)) + return 0; + s++; + } + return 1; +} + +/** As strcmp, except that either string may be NULL. The NULL string is + * considered to be before any non-NULL string. */ +int +strcmp_opt(const char *s1, const char *s2) +{ + if (!s1) { + if (!s2) + return 0; + else + return -1; + } else if (!s2) { + return 1; + } else { + return strcmp(s1, s2); + } +} + +/** Compares the first strlen(s2) characters of s1 with s2. Returns as for + * strcmp. + */ +int +strcmpstart(const char *s1, const char *s2) +{ + size_t n = strlen(s2); + return strncmp(s1, s2, n); +} + +/** Compare the s1_len-byte string s1 with s2, + * without depending on a terminating nul in s1. Sorting order is first by + * length, then lexically; return values are as for strcmp. + */ +int +strcmp_len(const char *s1, const char *s2, size_t s1_len) +{ + size_t s2_len = strlen(s2); + if (s1_len < s2_len) + return -1; + if (s1_len > s2_len) + return 1; + return fast_memcmp(s1, s2, s2_len); +} + +/** Compares the first strlen(s2) characters of s1 with s2. Returns as for + * strcasecmp. + */ +int +strcasecmpstart(const char *s1, const char *s2) +{ + size_t n = strlen(s2); + return strncasecmp(s1, s2, n); +} + +/** Compares the last strlen(s2) characters of s1 with s2. Returns as for + * strcmp. + */ +int +strcmpend(const char *s1, const char *s2) +{ + size_t n1 = strlen(s1), n2 = strlen(s2); + if (n2>n1) + return strcmp(s1,s2); + else + return strncmp(s1+(n1-n2), s2, n2); +} + +/** Compares the last strlen(s2) characters of s1 with s2. Returns as for + * strcasecmp. + */ +int +strcasecmpend(const char *s1, const char *s2) +{ + size_t n1 = strlen(s1), n2 = strlen(s2); + if (n2>n1) /* then they can't be the same; figure out which is bigger */ + return strcasecmp(s1,s2); + else + return strncasecmp(s1+(n1-n2), s2, n2); +} + +/** Return a pointer to the first char of s that is not whitespace and + * not a comment, or to the terminating NUL if no such character exists. + */ +const char * +eat_whitespace(const char *s) +{ + raw_assert(s); + + while (1) { + switch (*s) { + case '\0': + default: + return s; + case ' ': + case '\t': + case '\n': + case '\r': + ++s; + break; + case '#': + ++s; + while (*s && *s != '\n') + ++s; + } + } +} + +/** Return a pointer to the first char of s that is not whitespace and + * not a comment, or to the terminating NUL if no such character exists. + */ +const char * +eat_whitespace_eos(const char *s, const char *eos) +{ + raw_assert(s); + raw_assert(eos && s <= eos); + + while (s < eos) { + switch (*s) { + case '\0': + default: + return s; + case ' ': + case '\t': + case '\n': + case '\r': + ++s; + break; + case '#': + ++s; + while (s < eos && *s && *s != '\n') + ++s; + } + } + return s; +} + +/** Return a pointer to the first char of s that is not a space or a tab + * or a \\r, or to the terminating NUL if no such character exists. */ +const char * +eat_whitespace_no_nl(const char *s) +{ + while (*s == ' ' || *s == '\t' || *s == '\r') + ++s; + return s; +} + +/** As eat_whitespace_no_nl, but stop at eos whether we have + * found a non-whitespace character or not. */ +const char * +eat_whitespace_eos_no_nl(const char *s, const char *eos) +{ + while (s < eos && (*s == ' ' || *s == '\t' || *s == '\r')) + ++s; + return s; +} + +/** Return a pointer to the first char of s that is whitespace or #, + * or to the terminating NUL if no such character exists. + */ +const char * +find_whitespace(const char *s) +{ + /* tor_assert(s); */ + while (1) { + switch (*s) + { + case '\0': + case '#': + case ' ': + case '\r': + case '\n': + case '\t': + return s; + default: + ++s; + } + } +} + +/** As find_whitespace, but stop at eos whether we have found a + * whitespace or not. */ +const char * +find_whitespace_eos(const char *s, const char *eos) +{ + /* tor_assert(s); */ + while (s < eos) { + switch (*s) + { + case '\0': + case '#': + case ' ': + case '\r': + case '\n': + case '\t': + return s; + default: + ++s; + } + } + return s; +} + +/** Return the first occurrence of needle in haystack that + * occurs at the start of a line (that is, at the beginning of haystack + * or immediately after a newline). Return NULL if no such string is found. + */ +const char * +find_str_at_start_of_line(const char *haystack, const char *needle) +{ + size_t needle_len = strlen(needle); + + do { + if (!strncmp(haystack, needle, needle_len)) + return haystack; + + haystack = strchr(haystack, '\n'); + if (!haystack) + return NULL; + else + ++haystack; + } while (*haystack); + + return NULL; +} + +/** Returns true if string could be a C identifier. + A C identifier must begin with a letter or an underscore and the + rest of its characters can be letters, numbers or underscores. No + length limit is imposed. */ +int +string_is_C_identifier(const char *string) +{ + size_t iter; + size_t length = strlen(string); + if (!length) + return 0; + + for (iter = 0; iter < length ; iter++) { + if (iter == 0) { + if (!(TOR_ISALPHA(string[iter]) || + string[iter] == '_')) + return 0; + } else { + if (!(TOR_ISALPHA(string[iter]) || + TOR_ISDIGIT(string[iter]) || + string[iter] == '_')) + return 0; + } + } + + return 1; +} diff --git a/src/lib/string/util_string.h b/src/lib/string/util_string.h new file mode 100644 index 0000000000..f194c36373 --- /dev/null +++ b/src/lib/string/util_string.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2003-2004, Roger Dingledine + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_UTIL_STRING_H +#define TOR_UTIL_STRING_H + +#include "orconfig.h" +#include "lib/cc/compat_compiler.h" + +#include + +/** Allowable characters in a hexadecimal string. */ +#define HEX_CHARACTERS "0123456789ABCDEFabcdef" +void tor_strlower(char *s) ATTR_NONNULL((1)); +void tor_strupper(char *s) ATTR_NONNULL((1)); +int tor_strisprint(const char *s) ATTR_NONNULL((1)); +int tor_strisnonupper(const char *s) ATTR_NONNULL((1)); +int tor_strisspace(const char *s); +int strcmp_opt(const char *s1, const char *s2); +int strcmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2)); +int strcmp_len(const char *s1, const char *s2, size_t len) ATTR_NONNULL((1,2)); +int strcasecmpstart(const char *s1, const char *s2) ATTR_NONNULL((1,2)); +int strcmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2)); +int strcasecmpend(const char *s1, const char *s2) ATTR_NONNULL((1,2)); +int fast_memcmpstart(const void *mem, size_t memlen, const char *prefix); + +void tor_strstrip(char *s, const char *strip) ATTR_NONNULL((1,2)); + +const char *eat_whitespace(const char *s); +const char *eat_whitespace_eos(const char *s, const char *eos); +const char *eat_whitespace_no_nl(const char *s); +const char *eat_whitespace_eos_no_nl(const char *s, const char *eos); +const char *find_whitespace(const char *s); +const char *find_whitespace_eos(const char *s, const char *eos); +const char *find_str_at_start_of_line(const char *haystack, + const char *needle); + +int string_is_C_identifier(const char *string); + +#endif /* !defined(TOR_UTIL_STRING_H) */