7
\$\begingroup\$

I write in C for several projects (e.g. learning / teaching, coding competitions, data processing) and frequently need arrays (e.g. strings) with fast appending / concatenation / reversing.

I've isolated the code below from a project. For simplicity, I assume all arguments are valid, including no pointer aliasing (for each function, no two arguments point to overlapping blocks of memory). I use a doubling strategy on reallocation.

I understand that vector and string from C++ likely have superior performance / features - this is for settings where a simple C solution is more accessible or easier to modify for the specific context of the problem.

stringi.h

#include <assert.h> // contiguous dynamic array for fixed-size type T // note: v[] is not terminated (no '\0') typedef struct { char *v; // T *v (testing: T = char) size_t count; // count <= capacity size_t capacity; // sizeof(v) / sizeof(T) } stringi_raw; // safe non-method mutations: // 1. modifying v[i] for indices i // e.g. v[i] = 'a'; ++v[i]; // 2. if n <= count, assigning n to count will keep // the first n elements; beware of underflow! // e.g. while (count > 0) --count; // maximum capacity: capacity <= CAP_MAX // practical: CAP_MAX = SIZE_MAX / sizeof(T) static const size_t CAP_MAX = 987654321; // pointer to a stringi_raw typedef stringi_raw *stringi; // initializes a stringi with 0 elements // to be deallocated by stringi_free stringi stringi_init(); // frees memory used by target from the heap void stringi_free(stringi target); // allow n items to be stored without realloc // note: assumes n <= CAP_MAX void stringi_reserve(stringi target, size_t n); // appends c to the end of target // note: takes O(1) amortized time void stringi_put(stringi target, char c); // appends the contents of src to dst void stringi_cat(stringi dst, const stringi src); // reverses target->v void stringi_reverse(stringi target); // aka "reverse(src); cat(dst, src); reverse(src)" void stringi_cat_rev(stringi dst, const stringi src); 

stringi.c

#include <stdlib.h> #include "stringi.h" stringi stringi_init() { stringi target = malloc(sizeof(stringi_raw)); assert(target != NULL); target->v = NULL; // realloc(NULL, x) = malloc(x) target->count = 0; target->capacity = 0; return target; } void stringi_free(stringi target) { free(target->v); // free(NULL) is safe free(target); } // sets the capacity of target to cap // note: assumes count <= cap <= CAP_MAX void stringi_set_cap(stringi target, size_t cap) { void *v = realloc(target->v, cap * sizeof(char)); assert(v != NULL); target->v = v; target->capacity = cap; } // get the next capacity from cap size_t double_or_max(size_t cap) { if (cap > CAP_MAX / 2) return CAP_MAX; return 2 * cap; } void stringi_reserve(stringi target, size_t n) { size_t cap = target->capacity; if (n > cap) { if (cap == 0) cap = 1; else { cap = double_or_max(cap); while (n > cap) cap = double_or_max(cap); } stringi_set_cap(target, cap); } } void stringi_put(stringi target, char c) { size_t n = target->count; assert(n < CAP_MAX); size_t n_fin = n + 1; stringi_reserve(target, n_fin); target->v[n] = c; target->count = n_fin; } void stringi_cat(stringi dst, const stringi src) { size_t n_d = dst->count; size_t n_s = src->count; assert(n_d <= CAP_MAX - n_s); size_t n_fin = n_d + n_s; stringi_reserve(dst, n_fin); for (size_t i = 0; i < n_s; ++i) dst->v[n_d + i] = src->v[i]; dst->count = n_fin; } // if min <= max, reverse v[min] to v[max] inclusive void chars_reverse(char *v, size_t min, size_t max) { while (min < max) { char temp = v[min]; v[min] = v[max]; v[max] = temp; ++min; --max; } } void stringi_reverse(stringi target) { // avoid underflow when count = 0 size_t count = target->count; if (count > 0) chars_reverse(target->v, 0, count - 1); } void stringi_cat_rev(stringi dst, const stringi src) { size_t n_d = dst->count; size_t n_s = src->count; assert(n_d <= CAP_MAX - n_s); size_t n_fin = n_d + n_s; stringi_reserve(dst, n_fin); for (size_t i = 0; i < n_s; ++i) dst->v[n_fin - 1 - i] = src->v[i]; dst->count = n_fin; } 

Tests

#include <stdio.h> #include <limits.h> #include <string.h> #include <time.h> #include "stringi.h" void stringi_remove(stringi *target) { stringi_free(*target); *target = NULL; } void stringi_print(stringi target) { printf("'%.*s' (%zu/%zu)\n", (unsigned int) target->count, target->v, target->count, target->capacity); } void stringi_validate(stringi target, char *expected) { size_t n = strlen(expected); assert(n == target->count); assert(0 == strncmp(expected, target->v, n)); } int diff_to_ms(clock_t diff) { return diff * 1000 / CLOCKS_PER_SEC; } clock_t time_putchar(stringi target) { clock_t start = clock(); for (size_t i = 0; i < CAP_MAX; ++i) stringi_put(target, 'c'); return clock() - start; } int main() { assert(CAP_MAX >= 1); assert(CAP_MAX <= SIZE_MAX / sizeof(char)); stringi s1 = stringi_init(); stringi_validate(s1, ""); printf("create: s1 = %p\n", s1); printf("\nexpanding s1:\n"); stringi_reverse(s1); stringi_validate(s1, ""); for (int i = 0; i < 13; ++i) { stringi_print(s1); stringi_put(s1, 'a' + i); } stringi_validate(s1, "abcdefghijklm"); stringi_print(s1); stringi s2 = stringi_init(); stringi_validate(s2, ""); printf("\ncreate: s2 = %p\n", s2); for (int i = 13; i < 26; ++i) stringi_put(s2, 'a' + i); stringi_validate(s2, "nopqrstuvwxyz"); stringi_cat(s1, s2); stringi_validate(s1, "abcdefghijklmnopqrstuvwxyz"); stringi_reverse(s1); stringi_validate(s1, "zyxwvutsrqponmlkjihgfedcba"); s2->count = 2; stringi_validate(s2, "no"); stringi_cat(s1, s2); stringi_validate(s1, "zyxwvutsrqponmlkjihgfedcbano"); stringi_cat_rev(s1, s2); stringi_validate(s1, "zyxwvutsrqponmlkjihgfedcbanoon"); stringi_remove(&s1); printf("remove: s1 = %p\n", s1); stringi_remove(&s2); printf("remove: s2 = %p\n", s2); s1 = stringi_init(); stringi_validate(s1, ""); printf("create: s1 = %p\n", s1); s2 = stringi_init(); stringi_validate(s2, ""); printf("create: s2 = %p\n", s2); int diff1 = diff_to_ms(time_putchar(s1)); printf("\ns1 += CAP_MAX * 'c': %d ms\n", diff1); stringi_cat(s1, s2); // expect error: exceeding CAP_MAX // stringi_put(s1, 'c'); s1->count = 0; stringi_validate(s1, ""); printf("clear s1:\n"); stringi_print(s1); stringi_remove(&s1); stringi_reserve(s2, CAP_MAX); printf("\nreserve for s2: CAP_MAX\n"); int diff2 = diff_to_ms(time_putchar(s2)); printf("s2 += CAP_MAX * 'c': %d ms\n", diff2); s2->count = 0; stringi_validate(s2, ""); printf("clear s2:\n"); stringi_print(s2); stringi_remove(&s2); return 0; } 

Output

create: s1 = 0000015E9D66F050 expanding s1: '' (0/0) 'a' (1/1) 'ab' (2/2) 'abc' (3/4) 'abcd' (4/4) 'abcde' (5/8) 'abcdef' (6/8) 'abcdefg' (7/8) 'abcdefgh' (8/8) 'abcdefghi' (9/16) 'abcdefghij' (10/16) 'abcdefghijk' (11/16) 'abcdefghijkl' (12/16) 'abcdefghijklm' (13/16) create: s2 = 0000015E9D66EEB0 remove: s1 = 0000000000000000 remove: s2 = 0000000000000000 create: s1 = 0000015E9D66EF90 create: s2 = 0000015E9D66F110 s1 += CAP_MAX * 'c': 1120 ms clear s1: '' (0/987654321) reserve for s2: CAP_MAX s2 += CAP_MAX * 'c': 931 ms clear s2: '' (0/987654321) 

I would appreciate any thoughts on performance, readability, or modifiability.

\$\endgroup\$
13
  • 1
    \$\begingroup\$A short note, not worth an answer. Toby below pointed out your use of assert is wrong. In the function stringi_set_cap you have the comment "assumes count <= cap <= CAP_MAX". That assumption should be checked with an assert. That is exactly what asserts are for. It'll be there when debugging, so the user can see when they call your functions in the wrong way, but won't be there in production, to not slow down the function.\$\endgroup\$CommentedOct 3, 2024 at 15:41
  • 2
    \$\begingroup\$I understand that vector and string from C++ likely have superior performance - not for large reallocations, where this is about 2x faster with 70x fewer page faults. C realloc is far superior to C++ new/delete which omits any way to grow an existing allocation. Of course you need link-time optimization to inline across C source files to reduce per-push overhead since you put all your tiny functions in a separate .c instead of directly in the .h with static inline (or inline plus a .c with extern inline instantiation).\$\endgroup\$CommentedOct 4, 2024 at 4:47
  • \$\begingroup\$@PeterCordes Your comment intrigued me so much that I decided to benchmark vs std::vector for 7 billion push_back operations. The code above takes 56176 ms without optimizations and 18947 ms with optimizations, while std::vector takes 485778 ms without optimizations and 10968 ms with optimizations. Unfortunately, the optimized executable for my code appears to be causing a ton of branch mispredictions (I have no idea what magic causes std::vector to avoid this).\$\endgroup\$CommentedOct 4, 2024 at 6:53
  • \$\begingroup\$Benchmarking without optimization is not interesting, especially for heavily templated C++ code that relies on a lot of inlining of tiny functions. (Idiomatic way of performance evaluation?). Did you compile with clang or gcc -flto or gcc -fwhole-program to allow cross-file inlining from your stringi.c, if your benchmark was in a different file? Can you link your benchmark code, e.g. on godbolt.org (with compiler version/options you actually used) so I can see what you did and maybe profile it on my own Skylake system? Also what CPU?\$\endgroup\$CommentedOct 4, 2024 at 7:24
  • 1
    \$\begingroup\$Anyway, your benchmark does a modulo with % 64 every iteration, which costs a movabs r64, imm64 and a 64-bit mul and some shift / LEA for each byte, which is maybe a similar amount of overhead to the bookkeeping to check if realloc is needed. I'm seeing negligible branch mispredicts with perf stat on Skylake, miss rate is 0.00%. IPC is 3.41 instructions per clock. It's not getting optimized away despite not doing anything with the results except read the count, at least not by GCC. Clang can be more aggressive about optimizing away alloc/free even for code that writes the to buffer.\$\endgroup\$CommentedOct 5, 2024 at 1:57

3 Answers 3

12
\$\begingroup\$

I don't like this typedef:

// pointer to a stringi_raw typedef stringi_raw *stringi; 

In C, it's important to know whether any value is a pointer or a value object. Obscuring this distinction is harmful to my understanding.


This assert() is unfounded:

stringi target = malloc(sizeof(stringi_raw)); assert(target != NULL); 

Assertions exist to tell us something that's true by design. However, we know that malloc()can return a null pointer, and that's something that we need to handle - even (or especially) in production builds when automatic checking of our assertions is disabled by setting NDEBUG. If our assertion is untrue, we proceed straight into undefined behaviour, and that's not something we should inflict on our users.


stringi_free() is safe when passed a string with no storage, but (unlike free()), it can't be passed a null pointer. We could document that its target argument must be valid, but I think it's more helpful to users to just deal with null pointers:

void stringi_free(stringi target) { if (!target) { return; } ⋮ } 

stringi.h includes <assert.h>. But that's not needed here - don't impose that cost on every translation unit that includes stringi.h.

Conversely, it is invalid if there isn't a definition of size_t in scope before it's included.

If stringi.h is included multiple times in a translation unit, I think we get errors from redefining stringi_raw. And an include guard to avoid that problem.


In stringi.c, we have

#include <stdlib.h> #include "stringi.h" 

This explains why we didn't spot the need for stringi.h to have size_t in scope. If we include our header first, then that gives us confidence that it's complete and not dependent on any prior declarations.


The tests demonstrate the functionality, but always return a success status. Better tests are self-checking, returning failure status if any of the operations don't produce the expected result.

\$\endgroup\$
3
  • 2
    \$\begingroup\$Good answer. Note that stringi_free() assumes that the pointer argument is not null, so it is coupled to the behaviors of the assert()s after malloc() and realloc().\$\endgroup\$
    – Nayuki
    CommentedOct 3, 2024 at 16:29
  • 2
    \$\begingroup\$Appreciate the answer. I will likely alter the design so that it returns NULL for failed stringi_init() and int error codes for failed extending, leaving the asserts to the users. This would also make it easier to test failures by assert. And the order of includes is a great catch.\$\endgroup\$CommentedOct 3, 2024 at 17:03
  • 1
    \$\begingroup\$Thanks @Nayuki - I've added a note about stringi_free().\$\endgroup\$CommentedOct 10, 2024 at 7:15
9
\$\begingroup\$

A few small items:

  • stringi stringi_init(): In C, an empty parameter list means any number of arguments, not zero arguments. Change this to stringi stringi_init(void). (This is not necessary in C++ or starting in C23.)

  • void chars_reverse(char *v, size_t min, size_t max): This function is only used within stringi.c, so make it "private" by adding static in front, like: static void chars_reverse(...).

  • void stringi_cat(...): Replace the for-loop with memcpy():

    memcpy(dst->v + n_d, src->v, n_s); 
  • void stringi_reserve(...): Use a do-while loop:

    do cap = double_or_max(cap); while (n > cap); 
  • It seems like the canonical definition of size_t exists in stddef.h.

\$\endgroup\$
1
  • \$\begingroup\$Appreciate it! The do while is a lot cleaner.\$\endgroup\$CommentedOct 3, 2024 at 17:55
3
\$\begingroup\$

Review

You've named a few possibilities, but, really, where do you think you will actually use these functions (as a group)?

Every C programmer knows that C strings end with NUL terminators. These don't. These represent a reversion to Pascal conventions (each 'string' was prefixed with a hidden byte count only the compiler and Pascal libraries knew about.) C is not Pascal, and stepping away from this precept voids the utility of many tried and tested C Standard Library functions.

Example:
printf( "%s\n", "foobar" );
becomes
printf( "%.*s\n", ptr->len, (char*)ptr->val );
An improvement? Workable?? And puts() is no longer useful, at all!

In C, a typical application involving lots of strings might be a "dictionary" whose individual words are accessible via an array of pointers...
With a modern compiler, sizeof( char * ) is typically 8 bytes.
The per-entry overhead of this proposal is (likely) 24 bytes!

I have a small English wordlist (~8000 entries of varying length). Running a quick test on that sample, a conventional array of pointers into a block of ragged length C strings required about 90,000 bytes of storage. Summing 24 bytes of overhead with the 'rounded up' allocation requirements for each of these 'Pascal strings' was twice as much... In blocks of powers of 2, the requirement is always the worst of its cohort. This is not good.

Plus, there is no demonstrated awareness of the overheads of malloc() adding to the overheads of the "string fragments" represented by this code. Where's the advantage?


reVerSe vs reSerVe

Sorry, but I'm annoyed that these two different operations, with their too-similar spellings, have been "packaged-up" together.

How useful is reverse(), really?
It's such a tiny & simple function (unrelated to memory allocation) that I'd feel better if it was not present in this collection.

Various SO questions, in the past, used "reverse in place" that took first and last parameters to reverse substrings within a given string. This had more utility, imo.


Summary

Without having made careful evaluation of every line of code, it looks like most of this package is nicely laid-out, readable and probably does what it needs to do.

However... Looking back over many years of programming, I cannot recall ever needing the functionality of stringi_catrev(). (Where is its partner _prfrev() that reverses the first string before appending the second string to it? Wouldn't the aforementioned (simple) 'reverse substring' be able to replace some of this?)

Most of these functions are little more than 'aliases'; wrappers for conventional C functions familiar to all who've already learned the language.

\$\endgroup\$

    Start asking to get answers

    Find the answer to your question by asking.

    Ask question

    Explore related questions

    See similar questions with these tags.