7
\$\begingroup\$

As a small project to refresh my C and data-structure knowledge, I decided to write a generic ArrayList (as in Java). This is my most current version:

ArrayList.h:

#ifndef ARRAYLIST_H #define ARRAYLIST_H struct ArrayList; // ArrayList_create: {{{ // Creates a new ArrayList in memory and returns a pointer to it. // // @param element_size The size of a single element to be stored in the // ArrayList // @require element_size != 0 // // @return A pointer to the created ArrayList. // NULL if something went wrong with the allocation }}} struct ArrayList *ArrayList_create(size_t element_size); // ArrayList_destroy: {{{ // Takes a pointer to an ArrayList and destroys it and its associated data. // All data pointers previously acquired through ArrayList_get become invalid. // // @param list The ArrayList to destroy }}} void ArrayList_destroy(struct ArrayList *list); // ArrayList_getSize: {{{ // Get the current number of elements stored in @param list. // // @param list ArrayList to get the size from // @require list != NULL // // @return The number of elements currently stored in @param list }}} size_t ArrayList_getSize(const struct ArrayList *list); // ArrayList_isEmpty: {{{ // Returns whether ArrayList @param list is empty. // // @param list ArrayList to check emptiness on // @require list != NULL // // @return Non-zero when empty, zero otherwise }}} int ArrayList_isEmpty(const struct ArrayList *list); // ArrayList_trimToSize: {{{ // Trims the capacity of @param list to be the list's current size. // // @param list ArrayList to trim capacity from // @require list != NULL // // @return Non-zero on successful trim, zero otherwise }}} int ArrayList_trimToSize(struct ArrayList *list); // ArrayList_append: {{{ // Appends @param element to @param list. The element gets copied. // // @param list ArrayList to append @param element to // @require list != NULL // @param element Element to append @param list // @require list != NULL // // @return Non-zero on successful append, zero otherwise }}} int ArrayList_append(struct ArrayList *list, const void *element); // ArrayList_add: {{{ // Adds element to the specific index of ArrayList. // // @param list ArrayList to add @param element to // @require list != NULL // @param index Index to place @param element in @param list // @require index <= ArrayList_getSize(list) // @param element The element to add // @require element != NULL // // @return Non-zero on successful add, zero otherwise }}} int ArrayList_add(struct ArrayList *list, size_t index, const void *element); // ArrayList_remove: {{{ // Removes the element stored at specific index from ArrayList. // // @param list ArrayList to remove @param index from // @require list != NULL // @param index Index to remove from @param list // @require index < ArrayList_getSize(list) // // @return Non-zero on successful remove, zero otherwise }}} int ArrayList_remove(struct ArrayList *list, size_t index); // ArrayList_getElement: {{{ // Get pointer to element stored in ArrayList. // // @param list ArrayList to get element from // @param list != NULL // @param index Index of stored element in @param list // @param index < ArrayList_getSize(list) // // @return Pointer to element at given index in ArrayList }}} void *ArrayList_get(const struct ArrayList *list, size_t index); #endif /* ARRAYLIST_H */ 

ArrayList.c:

#include <stdlib.h> #include <string.h> #include <assert.h> #include "ArrayList.h" struct ArrayList { size_t elem_size; // Size of single element to be stored. size_t size; // Number of currently stored elements. size_t capacity; // Number of elements fitting into allocated memory. char *data; // Pointer to allocated memory. }; #define INITIAL_CAPACITY 10 struct ArrayList *ArrayList_create(size_t element_size) { assert(element_size != 0); struct ArrayList *list = calloc(1, sizeof(struct ArrayList)); if (!list) { return NULL; } list->data = calloc(INITIAL_CAPACITY, element_size); if (!list->data) { free(list); return NULL; } list->capacity = INITIAL_CAPACITY; list->elem_size = element_size; return list; } void ArrayList_destroy(struct ArrayList *list) { assert(list != NULL); free(list->data); free(list); } size_t ArrayList_getSize(const struct ArrayList *list) { assert(list != NULL); return list->size; } int ArrayList_isEmpty(const struct ArrayList *list) { assert(list != NULL); return list->size == 0; } int ArrayList_trimToSize(struct ArrayList *list) { assert(list != NULL); // No overflow check, since size <= capacity char * const data = realloc(list->data, list->size * list->elem_size); if (!data) { return 0; } list->data = data; list->capacity = list->size; return 1; } static int ArrayList_ensureCapacity(struct ArrayList *list, size_t capacity) { assert(list != NULL); while(list->capacity < capacity) { // Increase by factor 1.5 const size_t new_capacity = list->capacity + (list->capacity >> 1); // Check for addition overflow (well defined) // and multiplication overflow (prevent) if (new_capacity < list->capacity || SIZE_MAX / list->elem_size < new_capacity) { return 0; } char * const data = realloc(list->data, new_capacity * list->elem_size); if (!data) { return 0; } list->data = data; list->capacity = new_capacity; } return 1; } int ArrayList_append(struct ArrayList *list, const void *element) { assert(list != NULL); assert(element != NULL); return ArrayList_add(list, list->size, element); } int ArrayList_add(struct ArrayList *list, size_t index, const void *element) { assert(list != NULL); assert(index <= list->size); assert(element != NULL); if (index > list->size || !ArrayList_ensureCapacity(list, list->size + 1)) { return 0; } char * const elementDest = list->data + index * list->elem_size; char * const moveDest = elementDest + list->elem_size; // Valid call (even for list->size - index == 0 - case of append) memmove(moveDest, elementDest, list->elem_size * (list->size - index)); memcpy(elementDest, element, list->elem_size); list->size++; return 1; } int ArrayList_remove(struct ArrayList *list, size_t index) { assert(list != NULL); assert(index < list->size); char *const dest = list->data + index * list->elem_size; char *const source = list->data + (index + 1) * list->elem_size; size_t len = list->size - (index + 1); memmove(dest, source, len); list->size--; return 1; } void *ArrayList_get(const struct ArrayList *list, size_t index) { assert(list != NULL); assert(index < list->size); return list->data + (index * list->elem_size); } 

I wrote a small unit test for the most basic functionality as well:

ArrayListTest.h:

#include <inttypes.h> #include <assert.h> #include <stdio.h> #include "ArrayList.h" void ArrayListTest_initialState(void); void ArrayListTest_changeSize(void); void ArrayListTest_AddAppendRemove(void); // ArrayListTest_initialState() {{{ void ArrayListTest_initialState() { struct ArrayList *list = ArrayList_create(sizeof(uint32_t)); if (list) { assert(ArrayList_getSize(list) == 0); assert(ArrayList_isEmpty(list)); ArrayList_destroy(list); } else { printf("Warning: ArrayList in %s couldn't get created\n", "ArrayListTest_initialState()"); } } // }}} // ArrayListTest_changeSize() {{{ void ArrayListTest_changeSize() { struct ArrayList *list = ArrayList_create(sizeof(uint32_t)); if (list) { assert(ArrayList_getSize(list) == 0); assert(ArrayList_isEmpty(list)); uint32_t element = 0; ArrayList_add(list, 0, &element); assert(ArrayList_getSize(list) == 1); assert(!ArrayList_isEmpty(list)); ArrayList_add(list, 0, &element); assert(ArrayList_getSize(list) == 2); assert(!ArrayList_isEmpty(list)); ArrayList_append(list, &element); assert(ArrayList_getSize(list) == 3); assert(!ArrayList_isEmpty(list)); ArrayList_remove(list, 2); assert(ArrayList_getSize(list) == 2); assert(!ArrayList_isEmpty(list)); ArrayList_remove(list, 0); assert(ArrayList_getSize(list) == 1); assert(!ArrayList_isEmpty(list)); ArrayList_remove(list, 0); assert(ArrayList_getSize(list) == 0); assert(ArrayList_isEmpty(list)); ArrayList_destroy(list); } else { printf("Warning: ArrayList in %s couldn't get created\n", "ArrayListTest_changeSize()"); } } // }}} // ArrayListTest_AddAppendRemove() {{{ void ArrayListTest_AddAppendRemove() { struct ArrayList *list = ArrayList_create(sizeof(uint32_t)); if (list) { uint32_t element; element = 0; ArrayList_add(list, 0, &element); assert(*(uint32_t *)ArrayList_get(list, 0) == 0); element = 10; ArrayList_append(list, &element); assert(*(uint32_t *)ArrayList_get(list, 0) == 0); assert(*(uint32_t *)ArrayList_get(list, 1) == 10); element = 12; ArrayList_add(list, 1, &element); assert(*(uint32_t *)ArrayList_get(list, 0) == 0); assert(*(uint32_t *)ArrayList_get(list, 1) == 12); assert(*(uint32_t *)ArrayList_get(list, 2) == 10); ArrayList_remove(list, 1); assert(*(uint32_t *)ArrayList_get(list, 0) == 0); assert(*(uint32_t *)ArrayList_get(list, 1) == 10); ArrayList_remove(list, 0); assert(*(uint32_t *)ArrayList_get(list, 0) == 10); ArrayList_destroy(list); } else { printf("Warning: ArrayList in %s couldn't get created\n", "ArrayListTest_AddAppendRemove()"); } } // }}} int main() { ArrayListTest_initialState(); puts("ArrayListTest_initialState successful."); ArrayListTest_changeSize(); puts("ArrayListTest_changeSize successful."); ArrayListTest_AddAppendRemove(); puts("ArrayListTest_AddAppendRemove successful."); return 0; } 

The code compiles (using clang and C99) without any warnings:

clang ArrayListTest.c ArrayList.c -o ArrayListTest -Weverything -std=c99 -pedantic-errors 

I'm currently mostly concerned about the following things:

  • Readability. The intend of every piece of code is clear to the reader.
  • Correctness. Everything is working as it should (I think it's clear what should happen).
  • Universality. The data structure is capable to hold any kind of data (I'm currently not sure about the alignment of function pointers).
  • Portability. The data structure doesn't break on specific machines.
  • Performance. What can be done to improve the performance of this code?
\$\endgroup\$
2
  • \$\begingroup\$Does it run any test cases without problems?\$\endgroup\$
    – pacmaninbw
    CommentedApr 11, 2017 at 14:44
  • 2
    \$\begingroup\$@pacmaninbw Yes! All the tests do run without problems.\$\endgroup\$
    – Hericks
    CommentedApr 11, 2017 at 14:57

2 Answers 2

5
\$\begingroup\$

Infinite loop bug

This code sequence demonstrates an infinite loop in your code:

void test(void) { uint32_t elem = 5; struct ArrayList *arr = ArrayList_create(sizeof(elem)); ArrayList_append(arr, &elem); ArrayList_trimToSize(arr); ArrayList_append(arr, &elem); ArrayList_destroy(arr); } 

The problem is with ArrayList_ensureCapacity() in this code:

 while(list->capacity < capacity) { // Increase by factor 1.5 const size_t new_capacity = list->capacity + (list->capacity >> 1); // ... } 

If list->capacity is 0 or 1 here, then the loop will never terminate because the capacity remains the same on each iteration. Although capacity starts at 10, it can be reduced to 0 or 1 after a call to ArrayList_trimToSize(), as with the example above.

Bug in remove

This line in ArrayList_remove() is wrong:

 memmove(dest, source, len); 

It should be:

 memmove(dest, source, len * list->elem_size); 

Otherwise, you are moving the wrong number of bytes. Your tests did not catch this bug because you used a 4 byte value to test but only used numbers such as 12 which occupied the first byte. So it just happened to work even though you moved 1 byte instead of 4 bytes. If you had used numbers such as 0x11111111, you would have caught the problem.

Concern about get function

I would suggest changing your get function to return a copy of the element rather than a pointer to it. Something like this:

void ArrayList_get(const struct ArrayList *list, size_t index, void *ret); 

The danger in returning a pointer is that the pointer's lifetime could end unexpectedly. Any call to append, add, trim, or destroy could invalidate the pointer since each of those calls could reallocate the array. You've only documented that the destroy function will do so, but the other three could as well.

\$\endgroup\$
2
  • \$\begingroup\$Thank you! How would you fix the infinite loop caused in ArrayList_ensureCapacity()? One way would be to use const size_t new_capacity = list->capacity + (list->capacity >> 1) + 1; to ensure, that new_capacity is strictly greater than list->capacity. A ternary to check if the ` + 1` is necessary works as well, but will definitely hurt the readability. I'd have to drop the const on new_capacity using an ifstatement. Also would you rather let the library malloc space for the return value of ArrayList_get or should the user provide a pointer to an already allocated memory block?\$\endgroup\$
    – Hericks
    CommentedApr 12, 2017 at 8:48
  • \$\begingroup\$@Herickson Adding one seems like the easiest way to fix that problem. As for the return value of get, I'd require the caller to provide a pointer. The pointer could either be to an allocated block or to a local variable.\$\endgroup\$
    – JS1
    CommentedApr 12, 2017 at 9:17
4
\$\begingroup\$

Nice question. The question indicates that you are considering reuse and portability, these are always good things to keep in mind.

The macros EXIT_SUCCESS and EXIT_FAILURE in stdlib.h. These macros can be used for return values from functions as well as program exit status and might make the code more readable. While there are functions that return 1 or 0 to indicate success or failure, there is no way to identify what the problem was, this will be confusing to users.

Portability

The macro SIZE_MAX is not part of the C99 standard and may not be implemented on all systems, on the systems it is implemented on, it gives the maximum value of an unsigned long, not the maximum addressable memory which seems to be the use in the function ArrayList_ensureCapacity(). This will prevent the program from compiling on some systems and definitely lead to at least a program crash if not a system crash if there is not enough memory on the system.

Asserts

Be careful when using asserts, if the code is not compiled in debug mode, or the optimization switches are used then the assert is optimized out of the function body (NO OP). Use explicit tests for error conditions that can occur at run time. In ArrayList_remove the assert(index < line-size); should be if (index < list-size) {} due to possible user errors. See the definition of assert().

Universality

One of the stated goals is Universality: The data structure is capable to hold any kind of data (I'm currently not sure about the alignment of function pointers).

The current implementation does not implement Universality because it is storing objects rather than pointers to objects. This forces the implementation to use address arithmetic which is error prone. Using void * to point to the objects requires a cast to the proper type by the user, but the library/functions don't need to know the size of the objects and neither does the user of the library.

Using pointers rather than actual objects removes the need for memmove(), memcpy() in the functions ArrayList_append() and ArrayList_remove() as well. This reduces the complexity of ArrayList_remove() to

int ArrayList_remove(ArrayList *list, size_t index) { if (index < list->size) { void **dest = &data[index]; void **src = &data[index + 1]; // Deallocate the void pointer to the object to prevent memory leaks, and invalid references. free(*dest); // Copy the rest of the list to remove the item in data[index]; for (int i = index + 1; i < list->size; i++, dest++, src++ ) { *dest = *src; } list->size--; return 1; } else { return 0; } } 

A Typedef Would Make the Code Easier to Implement and Read

First it is questionable if the struct declaration of ArrayList should be in ArrayList.c rather than ArrayList.h. Since the calling code needs to pass in a pointer to the structure it may be better to define the structure in the header file.

There wouldn't need to be as much code as there is if the structure was defined in a typedef in the header file:

typedef struct arraylist { size_t elem_size; // Size of single element to be stored. size_t size; // Number of currently stored elements. size_t capacity; // Number of elements fitting into allocated memory. void **data; // Array of pointers to allocated memory. } ArrayList; 

The the function declarations would be:

ArrayList *ArrayList_create(size_t element_size); void ArrayList_destroy(ArrayList *list); size_t ArrayList_getSize(const ArrayList *list); int ArrayList_isEmpty(const ArrayList *list); int ArrayList_trimToSize(ArrayList *list); int ArrayList_append(ArrayList *list, const void *element); int ArrayList_add(ArrayList *list, size_t index, const void *element); int ArrayList_remove(ArrayList *list, size_t index); void *ArrayList_get(const ArrayList *list, size_t index); 

No need for struct to be in every fuction declaration or fuction definition.

Performance

Because calloc() zeros the memory that is allocated, the call to calloc may take more time than the call to malloc(), you might want to look at the first answer on this Stack Overflow Question. The same functionallity can be achieved for ArrayList_create() by using a malloc() and then using memset() but the code might be more efficient and clearer if each field was address individually after the struct was created

struct ArrayList *ArrayList_create() { assert(element_size != 0); struct ArrayList *list = malloc(sizeof(struct ArrayList)); if (list) { list->capacity = INITIAL_CAPACITY; list->elem_size = element_size; list->data = (void **) calloc(INITIAL_CAPACITY, sizeof(void *)); if (!list->data) { free(list); list = NULL; } } return list; } 

The previous code is somewhat clearer and easier to read than the current implementation.

Another way to improve performance of malloc(), calloc() and realloc() is to malloc() a rather large block of memory() at the beginning of the program and then free it prior to starting the the rest of the program. This action will reserve that large block of memory for the program/process. The functions malloc(), calloc() and realloc() make system calls to get more memory when they've used up the current allocation (sbrk on Unix or Linux). Any time the program makes a system call it is swapped out (context switch) until the system call is completed, on a server or other heavily used computer other programs will run on the CPU during this time and affect performance. On a personal computer this is less of an issue.

The performance will also be enhanced by allocating less space using pointer to void rather than storing actual objects.

\$\endgroup\$
3
  • \$\begingroup\$Thank you for your review! Do you know, why clang does not warn me on using the SIZE_MAX macro, even though I compiled using -std=c99? How would you fix the occurrence of SIZE_MAX? I'm well aware of how assert work, but I think the user should be responsible for passing valid arguments and don't check the arguments explicitly (these checks would be redundant for library internal calls). I'm thinking about switching to void ** to store the data, but then the user would be able to store different kind of data and keep track of which index points to which type of data.\$\endgroup\$
    – Hericks
    CommentedApr 12, 2017 at 8:59
  • \$\begingroup\$Don't depend on the users doing what is expected. Users found multiple bugs in my code by doing the unexpected when I was writing compilers and linker/loaders. Error checking should be done as soon as possible in the public interfaces, the asserts should catch anything in the internal functions if the tests are well written. For SIZE_MAX you can do one of two things, ask the operating system how large the memory is, or choose an arbitrary large number as a constant, such as one GByte, the first is not portable since the system calls will be different depending on the operating system.\$\endgroup\$
    – pacmaninbw
    CommentedApr 12, 2017 at 11:47
  • \$\begingroup\$Sometimes it is necessary to use #ifdef SYSTEM_PROVIDED_MACRO to truly make the code portable. I'm updating code right now that has #ifdef MAC in it, but is primarily written for windows.\$\endgroup\$
    – pacmaninbw
    CommentedApr 12, 2017 at 12:02

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.