Compare commits

5 Commits

Author SHA1 Message Date
65358fc766 Added benchmark program 2025-11-25 16:46:20 +01:00
5aa15f2276 Fixed multiple bugs in hashmap implementation.
1. Fixed infinite loop issue that occurred when map was full of
   tombstones;
2. Fixed bug related to tombstone count decrement when adding a new
   key on a deleted slot;
3. Added proper map resize when keys are added or removed;
4. Fixed inconsistent error messages;
5. Added proper NULL check on map_remove method.
2025-11-25 14:16:32 +01:00
b885d93b6f Merge pull request #1 from ceticamarco/bignum_experimental
Adding BigInt support
2025-11-18 11:46:42 +01:00
86f2fb20dd Fixed bug in vector unit test 2025-11-17 09:28:30 +01:00
9e52b7bca4 Updated documentation 2025-11-11 13:54:12 +01:00
11 changed files with 313 additions and 44 deletions

View File

@@ -17,3 +17,7 @@ jobs:
- name: Run unit tests
run: |
./test_vector && ./test_map && ./test_bigint
- name: Run benchmarks
run: |
./benchmark_datum

View File

@@ -14,3 +14,7 @@ jobs:
- name: Run unit tests
run: |
./test_vector && ./test_map && ./test_bigint
- name: Run benchmarks
run: |
./benchmark_datum

View File

@@ -3,21 +3,29 @@ CFLAGS = -Wall -Wextra -Werror -pedantic-errors -fstack-protector-strong \
-fsanitize=address -fsanitize=undefined -fstack-clash-protection \
-Wwrite-strings -g -std=c99
BENCH_FLAGS = -Wall -Wextra -Werror -O3
SRC_DIR = src
BENCH_SRC = benchmark
OBJ_DIR = obj
BENCH_OBJ_DIR = bench_obj
TESTS_SRC = tests
TARGET = usage
TEST_V_TARGET = test_vector
TEST_M_TARGET = test_map
TEST_B_TARGET = test_bigint
BENCH_TARGET = benchmark_datum
LIB_OBJS = $(OBJ_DIR)/vector.o $(OBJ_DIR)/map.o $(OBJ_DIR)/bigint.o
PROG_OBJS = $(OBJ_DIR)/usage.o
.PHONY: all clean
all: $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET)
all: $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(BENCH_TARGET)
bench: $(BENCH_TARGET)
$(TARGET): $(PROG_OBJS) $(LIB_OBJS)
$(CC) $(CFLAGS) -o $@ $^
@@ -43,5 +51,18 @@ $(OBJ_DIR)/%.o: $(TESTS_SRC)/%.c | $(OBJ_DIR)
$(OBJ_DIR):
mkdir -p $(OBJ_DIR)
# Benchmark rules
$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o
$(CC) $(BENCH_FLAGS) -o $@ $^
$(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR)
$(CC) $(BENCH_FLAGS) -c -o $@ $<
$(BENCH_OBJ_DIR)/bench.o: $(BENCH_SRC)/benchmark.c | $(BENCH_OBJ_DIR)
$(CC) $(BENCH_FLAGS) -c -o $@ $<
$(BENCH_OBJ_DIR):
mkdir -p $(BENCH_OBJ_DIR)
clean:
rm -rf $(OBJ_DIR) $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET)
rm -rf $(OBJ_DIR) $(BENCH_OBJ_DIR) $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(BENCH_TARGET)

View File

@@ -133,7 +133,7 @@ and a sample usage for every available method. To run it, first issue the follow
$ make clean all
```
This will compile the library as well as the `usage.c` file and the unit tests. After that, you can run it by typing `./usage`.
This will compile the library as well as the `usage.c` file, the unit tests and the benchmark. After that, you can run it by typing `./usage`.
## Documentation
For additional details about this library (internal design, memory
@@ -149,6 +149,15 @@ $ ./test_map
$ ./test_bigint
```
## Benchmark
Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector` and the `Map` data structures. You can run it by issuing the following command:
```sh
$ ./benchmark_datum
Computing Vector average time...average time: 18 ms
Computing Map average time...average time: 31 ms
```
## License
This library is released under the GPLv3 license. You can find a copy of the license with this repository or by visiting
[the following link](https://choosealicense.com/licenses/gpl-3.0/).

92
benchmark/benchmark.c Normal file
View File

@@ -0,0 +1,92 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <stdint.h>
#include "../src/vector.h"
#include "../src/map.h"
typedef void (*test_fn_t)(size_t iterations);
void test_vector(size_t iterations) {
vector_t *vec = vector_new(16, sizeof(int)).value.vector;
for (size_t idx = 0; idx < iterations; idx++) {
vector_push(vec, &idx);
}
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum
for (size_t idx = 0; idx < iterations; idx++) {
const int *val = (int*)vector_get(vec, idx).value.element;
sum += *val;
}
// Another trick to prevent compiler optimization
if (sum == 0xB00B5) {
printf("sum = %llu\n", (unsigned long long)sum);
}
vector_destroy(vec);
}
void test_map(size_t iterations) {
map_t *map = map_new().value.map;
char key[64];
for (size_t idx = 0; idx < iterations; idx++) {
snprintf(key, sizeof(key), "key_%zu", idx);
int *value = malloc(sizeof(int));
*value = (int)idx;
map_add(map, key, (void*)value);
}
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum
for (size_t idx = 0; idx < iterations; idx++) {
snprintf(key, sizeof(key), "key_%zu", idx);
const int *val = (const int*)map_get(map, key).value.element;
sum += *val;
}
// Cleanup values
for (size_t idx = 0; idx < map->capacity; idx++) {
if (map->elements[idx].state == ENTRY_OCCUPIED) {
int *val = (int*)map->elements[idx].value;
free(val);
}
}
map_destroy(map);
}
long long benchmark(test_fn_t fun, size_t iterations, size_t runs) {
long long total = 0;
for (size_t idx = 0; idx < runs; idx++) {
clock_t start = clock();
fun(iterations);
clock_t end = clock();
total += (long long)((end - start) * 1000 / CLOCKS_PER_SEC);
}
return total / runs;
}
int main(void) {
// Do a warmup run
test_vector(1000);
test_map(1000);
printf("Computing Vector average time...");
fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_vector, 1e6, 30));
printf("Computing Map average time...");
fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30));
return 0;
}

View File

@@ -7,5 +7,5 @@ At the time being, this documentation includes the following pages:
- [vector.md](vector.md): vector documentation;
- [map.md](map.md): map documentation;
- [sort.md](sort.md): how to use the `vector_sort` method.
- [sort.md](sort.md): how to use the `vector_sort` method;
- [bigint.md](bigint.md): bigint documentation.

View File

@@ -54,6 +54,7 @@ defined as follows:
typedef enum {
MAP_OK = 0x0,
MAP_ERR_ALLOCATE,
MAP_ERR_OVERFLOW,
MAP_ERR_INVALID,
MAP_ERR_NOT_FOUND
} map_status_t;

154
src/map.c
View File

@@ -6,6 +6,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "map.h"
@@ -74,24 +75,35 @@ map_result_t map_new(void) {
* @map: a non-null map
* @key: a string representing the key to find
*
* Finds next available slot for insertion
* Finds next available slot for insertion (empty or deleted)
* or the slot containing an existing key
*
* Returns the index of available slot
* Returns the index of available slot or SIZE_MAX otherwise
*/
size_t map_insert_index(const map_t *map, const char *key) {
const uint64_t key_digest = hash_key(key);
size_t idx = key_digest % map->capacity;
size_t delete_tracker = map->capacity; // Fallback index
while (map->elements[idx].state == ENTRY_OCCUPIED) {
if (strcmp(map->elements[idx].key, key) == 0) {
// In this case the key already exists, thus we replace it
return idx;
for (size_t probes = 0; probes < map->capacity; probes++) {
if (map->elements[idx].state == ENTRY_EMPTY) {
return (delete_tracker != map->capacity) ? delete_tracker : idx;
}
if (map->elements[idx].state == ENTRY_OCCUPIED) {
if (!strcmp(map->elements[idx].key, key)) {
return idx;
}
} else if (map->elements[idx].state == ENTRY_DELETED) {
if (delete_tracker == map->capacity) {
delete_tracker = idx;
}
}
idx = (idx + 1) % map->capacity;
}
return idx;
return SIZE_MAX;
}
/**
@@ -105,8 +117,17 @@ map_result_t map_resize(map_t *map) {
map_result_t result = {0};
const size_t old_capacity = map->capacity;
const size_t old_size = map->size;
const size_t old_tombstone = map->tombstone_count;
map_element_t *old_elements = map->elements;
if (map->capacity > SIZE_MAX / 2) {
result.status = MAP_ERR_OVERFLOW;
SET_MSG(result, "Capacity overflow on map resize");
return result;
}
map->capacity *= 2;
map->elements = calloc(map->capacity, sizeof(map_element_t));
if (map->elements == NULL) {
@@ -127,10 +148,21 @@ map_result_t map_resize(map_t *map) {
for (size_t idx = 0; idx < old_capacity; idx++) {
if (old_elements[idx].state == ENTRY_OCCUPIED) {
size_t new_idx = map_insert_index(map, old_elements[idx].key);
if (new_idx == SIZE_MAX) {
// if we can't find a free slot, restore previous state and fail
free(map->elements);
map->elements = old_elements;
map->capacity = old_capacity;
map->size = old_size;
map->tombstone_count = old_tombstone;
result.status = MAP_ERR_OVERFLOW;
SET_MSG(result, "Failed to rehash elements during resize");
return result;
}
map->elements[new_idx] = old_elements[idx];
map->size++;
} else if (old_elements[idx].state == ENTRY_DELETED) {
free(old_elements[idx].key);
}
}
@@ -172,7 +204,28 @@ map_result_t map_add(map_t *map, const char *key, void *value) {
}
// Find next available slot for insertion
const size_t idx = map_insert_index(map, key);
size_t idx = map_insert_index(map, key);
// if index is SIZE_MAX then the map is full
if (idx == SIZE_MAX) {
map_result_t resize_res = map_resize(map);
if (resize_res.status != MAP_OK) {
result.status = MAP_ERR_OVERFLOW;
SET_MSG(result, "The map is full and resize has failed");
return result;
}
idx = map_insert_index(map, key);
// This is very uncommon but still...
if (idx == SIZE_MAX) {
result.status = MAP_ERR_OVERFLOW;
SET_MSG(result, "The map is full after resize(!)");
return result;
}
}
// If slot is occupied, it means that the key already exists.
// Therefore we can update it
@@ -185,16 +238,23 @@ map_result_t map_add(map_t *map, const char *key, void *value) {
return result;
}
// Otherwise, the key doesn't exist. Therefore we need to allocate a new key
map->elements[idx].key = malloc(strlen(key) + 1);
if (map->elements[idx].key == NULL) {
// Allocate a new key
char *new_key = malloc(strlen(key) + 1);
if (new_key == NULL) {
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map key");
return result;
}
strcpy(map->elements[idx].key, key);
strcpy(new_key, key);
// If we're reusing a deleted slot, decrement the tombstone count
if (map->elements[idx].state == ENTRY_DELETED) {
if (map->tombstone_count > 0) { map->tombstone_count--; }
}
map->elements[idx].key = new_key;
map->elements[idx].value = value;
map->elements[idx].state = ENTRY_OCCUPIED;
map->size++;
@@ -212,21 +272,31 @@ map_result_t map_add(map_t *map, const char *key, void *value) {
*
* Finds the index where a key is located using linear probing to handle collisions
*
* Returns the index of the key if it is found
* Returns the index of the key if it is found or SIZE_MAX otherwise
*/
size_t map_find_index(const map_t *map, const char *key) {
const uint64_t key_digest = hash_key(key);
size_t idx = key_digest % map->capacity;
const size_t start_idx = key_digest % map->capacity;
for (size_t probes = 0; probes < map->capacity; probes++) {
size_t idx = (start_idx + probes) % map->capacity;
if (map->elements[idx].state == ENTRY_EMPTY) {
// The key is not on the map
return SIZE_MAX;
}
while (map->elements[idx].state != ENTRY_EMPTY) {
if ((map->elements[idx].state == ENTRY_OCCUPIED) &&
(strcmp(map->elements[idx].key, key) == 0)) {
(!strcmp(map->elements[idx].key, key))) {
// The key has been found
return idx;
}
idx = (idx + 1) % map->capacity;
}
return idx;
// If we fail to find an ENTRY_EMPTY slot after probing the entire table,
// fall back by returning SIZE_MAX. This should never
// happen because the map is resized whenever an element is inserted or removed.
return SIZE_MAX;
}
/**
@@ -250,17 +320,20 @@ map_result_t map_get(const map_t *map, const char *key) {
const size_t idx = map_find_index(map, key);
// If slot status is 'occupied' then the key exists
if (map->elements[idx].state == ENTRY_OCCUPIED) {
// otherwise the idx is set to SIZE_MAX
if (idx == SIZE_MAX) {
result.status = MAP_ERR_NOT_FOUND;
SET_MSG(result, "Element not found");
} else if (map->elements[idx].state == ENTRY_OCCUPIED) {
result.status = MAP_OK;
SET_MSG(result, "Value successfully retrieved");
result.value.element = map->elements[idx].value;
return result;
} else {
// Fallback case. Shouldn't happen but better safe than sorry
result.status = MAP_ERR_NOT_FOUND;
SET_MSG(result, "Element not found");
}
result.status = MAP_ERR_NOT_FOUND;
SET_MSG(result, "Element not found");
return result;
}
@@ -276,18 +349,18 @@ map_result_t map_get(const map_t *map, const char *key) {
map_result_t map_remove(map_t *map, const char *key) {
map_result_t result = {0};
if (map == NULL) {
if (map == NULL || key == NULL) {
result.status = MAP_ERR_INVALID;
SET_MSG(result, "Invalid map");
SET_MSG(result, "Invalid map or key");
return result;
}
const size_t idx = map_find_index(map, key);
if (map->elements[idx].state != ENTRY_OCCUPIED) {
result.status = MAP_ERR_INVALID;
SET_MSG(result, "Cannot delete this element");
if (idx == SIZE_MAX || map->elements[idx].state != ENTRY_OCCUPIED) {
result.status = MAP_ERR_NOT_FOUND;
SET_MSG(result, "Element not found");
return result;
}
@@ -304,6 +377,19 @@ map_result_t map_remove(map_t *map, const char *key) {
map->size--;
map->tombstone_count++;
// Check if there are too many tombstone entries
const double load_factor = (double)(map->size + map->tombstone_count) / map->capacity;
if (load_factor > LOAD_FACTOR_THRESHOLD) {
map_result_t resize_res = map_resize(map);
if (resize_res.status != MAP_OK) {
result.status = resize_res.status;
SET_MSG(result, "Key successfully deleted. Resize has failed");
return result;
}
}
result.status = MAP_OK;
SET_MSG(result, "Key successfully deleted");
@@ -329,8 +415,7 @@ map_result_t map_clear(map_t *map) {
}
for (size_t idx = 0; idx < map->capacity; idx++) {
if (map->elements[idx].state == ENTRY_OCCUPIED ||
map->elements[idx].state == ENTRY_DELETED) {
if (map->elements[idx].state == ENTRY_OCCUPIED) {
free(map->elements[idx].key);
map->elements[idx].key = NULL;
map->elements[idx].value = NULL;
@@ -368,8 +453,7 @@ map_result_t map_destroy(map_t *map) {
}
for (size_t idx = 0; idx < map->capacity; idx++) {
if (map->elements[idx].state == ENTRY_OCCUPIED ||
map->elements[idx].state == ENTRY_DELETED) {
if (map->elements[idx].state == ENTRY_OCCUPIED) {
free(map->elements[idx].key);
}
}

View File

@@ -17,6 +17,7 @@
typedef enum {
MAP_OK = 0x0,
MAP_ERR_ALLOCATE,
MAP_ERR_OVERFLOW,
MAP_ERR_INVALID,
MAP_ERR_NOT_FOUND
} map_status_t;

View File

@@ -97,6 +97,58 @@ void test_map_get_invalid(void) {
map_destroy(map);
}
// Get from map full of deleted slots
// If the table contains no ENTRY_EMPTY slots
// (i.e., the table is full of ENTRY_DELETED slots),
// map_get and map_remove should NOT loop forever
void test_map_get_deleted_slots(void) {
map_result_t res = map_new();
assert(res.status == MAP_OK);
map_t *map = res.value.map;
// Fill INITIAL_CAP (=4) without trigger resizing
map_add(map, "x", (void*)1);
map_add(map, "y", (void*)2);
map_add(map, "z", (void*)3);
map_add(map, "j", (void*)4);
// Remove all ENTRY_OCCUPIED slots.
// This function should resize the map when the load factor is too big
// and should also garbage-collect all the ENTRY_DELETED entries.
// Tombstone count should therefore be equal to 3 and capacity should be doubled
map_remove(map, "x");
map_remove(map, "y");
map_remove(map, "z");
map_remove(map, "j");
assert(map->tombstone_count == 3);
assert(map->capacity == 8);
assert(map->size == 0);
// Retrieving a deleted element should return an error
// but should not loop forever
map_result_t get_deleted_res = map_get(map, "y");
assert(get_deleted_res.status == MAP_ERR_NOT_FOUND);
// Adding a new element should increase the size
// and should not loop forever
const int k = 5;
map_result_t add_res = map_add(map, "k", (void*)&k);
assert(add_res.status == MAP_OK);
assert(map->tombstone_count < map->capacity);
assert(map->capacity == 8);
assert(map->size == 1);
// Retrieving an ENTRY_OCCUPIED element should works normally
map_result_t get_res = map_get(map, "k");
assert(get_res.status == MAP_OK);
assert(*(int*)get_res.value.element == 5);
map_destroy(map);
}
// Map with heterogeneous types
void test_map_mixed(void) {
map_result_t res = map_new();
@@ -324,6 +376,7 @@ int main(void) {
TEST(map_add_multiple);
TEST(map_get);
TEST(map_get_invalid);
TEST(map_get_deleted_slots);
TEST(map_mixed);
TEST(map_update);
TEST(map_remove);

View File

@@ -214,7 +214,7 @@ void test_vector_sort_string(void) {
// Sort vector with custom data type
typedef struct {
char name[256];
const char *name;
int age;
} Person;