From a6f6d7dbec59b279a537ac5aceaba454dcb01210 Mon Sep 17 00:00:00 2001 From: Marco Cetica Date: Tue, 25 Nov 2025 10:58:30 +0100 Subject: [PATCH] Fixed multiple bugs in hashmap implementation. 1. Fixed infinite loop issue that occurred when map was full of tombstones; 2. Fixed bug related to tombstone count decrement when adding a new key on a deleted slot; 3. Added proper map resize when keys are added or removed; 4. Fixed inconsistent error messages; 5. Added proper NULL check on map_remove method. --- docs/map.md | 1 + src/map.c | 81 +++++++++++++++++++++++++++++++++++------------- src/map.h | 1 + tests/test_map.c | 53 +++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 22 deletions(-) diff --git a/docs/map.md b/docs/map.md index 02471b1..f825bf8 100644 --- a/docs/map.md +++ b/docs/map.md @@ -54,6 +54,7 @@ defined as follows: typedef enum { MAP_OK = 0x0, MAP_ERR_ALLOCATE, + MAP_ERR_OVERFLOW, MAP_ERR_INVALID, MAP_ERR_NOT_FOUND } map_status_t; diff --git a/src/map.c b/src/map.c index 542065c..729cff5 100644 --- a/src/map.c +++ b/src/map.c @@ -74,24 +74,30 @@ map_result_t map_new(void) { * @map: a non-null map * @key: a string representing the key to find * - * Finds next available slot for insertion + * Finds next available slot for insertion (empty or deleted) + * or the slot containing an existing key * * Returns the index of available slot */ size_t map_insert_index(const map_t *map, const char *key) { const uint64_t key_digest = hash_key(key); size_t idx = key_digest % map->capacity; + size_t delete_tracker = map->capacity; // Fallback index - while (map->elements[idx].state == ENTRY_OCCUPIED) { - if (strcmp(map->elements[idx].key, key) == 0) { - // In this case the key already exists, thus we replace it - return idx; + while (map->elements[idx].state != ENTRY_EMPTY) { + if (map->elements[idx].state == ENTRY_OCCUPIED) { + if (!strcmp(map->elements[idx].key, key)) { return idx; } + } else if (map->elements[idx].state == ENTRY_DELETED) { + // Track first deleted slot we encounter + if (delete_tracker == map->capacity) { + delete_tracker = idx; + } } idx = (idx + 1) % map->capacity; } - return idx; + return (delete_tracker != map->capacity) ? delete_tracker : idx; } /** @@ -107,6 +113,13 @@ map_result_t map_resize(map_t *map) { const size_t old_capacity = map->capacity; map_element_t *old_elements = map->elements; + if (map->capacity > SIZE_MAX / 2) { + result.status = MAP_ERR_OVERFLOW; + SET_MSG(result, "Capacity overflow on map resize"); + + return result; + } + map->capacity *= 2; map->elements = calloc(map->capacity, sizeof(map_element_t)); if (map->elements == NULL) { @@ -129,8 +142,6 @@ map_result_t map_resize(map_t *map) { size_t new_idx = map_insert_index(map, old_elements[idx].key); map->elements[new_idx] = old_elements[idx]; map->size++; - } else if (old_elements[idx].state == ENTRY_DELETED) { - free(old_elements[idx].key); } } @@ -185,7 +196,12 @@ map_result_t map_add(map_t *map, const char *key, void *value) { return result; } - // Otherwise, the key doesn't exist. Therefore we need to allocate a new key + // If reusing a deleted slot, decrement the tombstone count + if (map->elements[idx].state == ENTRY_DELETED) { + if (map->tombstone_count > 0) { map->tombstone_count--; } + } + + // Allocate a new key map->elements[idx].key = malloc(strlen(key) + 1); if (map->elements[idx].key == NULL) { result.status = MAP_ERR_ALLOCATE; @@ -216,17 +232,27 @@ map_result_t map_add(map_t *map, const char *key, void *value) { */ size_t map_find_index(const map_t *map, const char *key) { const uint64_t key_digest = hash_key(key); - size_t idx = key_digest % map->capacity; + const size_t start_idx = key_digest % map->capacity; - while (map->elements[idx].state != ENTRY_EMPTY) { - if ((map->elements[idx].state == ENTRY_OCCUPIED) && - (strcmp(map->elements[idx].key, key) == 0)) { + for (size_t probes = 0; probes < map->capacity; probes++) { + size_t idx = (start_idx + probes) % map->capacity; + + if (map->elements[idx].state == ENTRY_EMPTY) { + // The key is not on the map + return idx; + } + + if ((map->elements[idx].state == ENTRY_OCCUPIED) && + (!strcmp(map->elements[idx].key, key))) { + // The key has been found return idx; } - idx = (idx + 1) % map->capacity; } - return idx; + // If we fail to find an ENTRY_EMPTY slot after probing the entire table, + // fall back to returning the original start index. This should never + // happen because the map is resized whenever an element is inserted or removed. + return start_idx; } /** @@ -276,7 +302,7 @@ map_result_t map_get(const map_t *map, const char *key) { map_result_t map_remove(map_t *map, const char *key) { map_result_t result = {0}; - if (map == NULL) { + if (map == NULL || key == NULL) { result.status = MAP_ERR_INVALID; SET_MSG(result, "Invalid map"); @@ -286,8 +312,8 @@ map_result_t map_remove(map_t *map, const char *key) { const size_t idx = map_find_index(map, key); if (map->elements[idx].state != ENTRY_OCCUPIED) { - result.status = MAP_ERR_INVALID; - SET_MSG(result, "Cannot delete this element"); + result.status = MAP_ERR_NOT_FOUND; + SET_MSG(result, "Element not found"); return result; } @@ -304,6 +330,19 @@ map_result_t map_remove(map_t *map, const char *key) { map->size--; map->tombstone_count++; + // Check if there are too many tombstone entries + const double load_factor = (double)(map->size + map->tombstone_count) / map->capacity; + if (load_factor > LOAD_FACTOR_THRESHOLD) { + map_result_t resize_res = map_resize(map); + if (resize_res.status != MAP_OK) { + result.status = MAP_OK; + SET_MSG(result, "Key successfully deleted. Resize has failed"); + + return result; + } + } + + result.status = MAP_OK; SET_MSG(result, "Key successfully deleted"); @@ -329,8 +368,7 @@ map_result_t map_clear(map_t *map) { } for (size_t idx = 0; idx < map->capacity; idx++) { - if (map->elements[idx].state == ENTRY_OCCUPIED || - map->elements[idx].state == ENTRY_DELETED) { + if (map->elements[idx].state == ENTRY_OCCUPIED) { free(map->elements[idx].key); map->elements[idx].key = NULL; map->elements[idx].value = NULL; @@ -368,8 +406,7 @@ map_result_t map_destroy(map_t *map) { } for (size_t idx = 0; idx < map->capacity; idx++) { - if (map->elements[idx].state == ENTRY_OCCUPIED || - map->elements[idx].state == ENTRY_DELETED) { + if (map->elements[idx].state == ENTRY_OCCUPIED) { free(map->elements[idx].key); } } diff --git a/src/map.h b/src/map.h index a7f3643..5026da7 100644 --- a/src/map.h +++ b/src/map.h @@ -17,6 +17,7 @@ typedef enum { MAP_OK = 0x0, MAP_ERR_ALLOCATE, + MAP_ERR_OVERFLOW, MAP_ERR_INVALID, MAP_ERR_NOT_FOUND } map_status_t; diff --git a/tests/test_map.c b/tests/test_map.c index 47085b9..7580bc6 100644 --- a/tests/test_map.c +++ b/tests/test_map.c @@ -97,6 +97,58 @@ void test_map_get_invalid(void) { map_destroy(map); } +// Get from map full of deleted slots +// If the table contains no ENTRY_EMPTY slots +// (i.e., the table is full of ENTRY_DELETED slots), +// map_get and map_remove should NOT loop forever +void test_map_get_deleted_slots(void) { + map_result_t res = map_new(); + + assert(res.status == MAP_OK); + map_t *map = res.value.map; + + // Fill INITIAL_CAP (=4) without trigger resizing + map_add(map, "x", (void*)1); + map_add(map, "y", (void*)2); + map_add(map, "z", (void*)3); + map_add(map, "j", (void*)4); + + // Remove all ENTRY_OCCUPIED slots. + // This function should resize the map when the load factor is too big + // and should also garbage-collect all the ENTRY_DELETED entries. + // Tombstone count should therefore be equal to 3 and capacity should be doubled + map_remove(map, "x"); + map_remove(map, "y"); + map_remove(map, "z"); + map_remove(map, "j"); + + assert(map->tombstone_count == 3); + assert(map->capacity == 8); + assert(map->size == 0); + + // Retrieving a deleted element should return an error + // but should not loop forever + map_result_t get_deleted_res = map_get(map, "y"); + assert(get_deleted_res.status == MAP_ERR_NOT_FOUND); + + // Adding a new element should increase the size + // and should not loop forever + const int k = 5; + map_result_t add_res = map_add(map, "k", (void*)&k); + assert(add_res.status == MAP_OK); + + assert(map->tombstone_count < map->capacity); + assert(map->capacity == 8); + assert(map->size == 1); + + // Retrieving an ENTRY_OCCUPIED element should works normally + map_result_t get_res = map_get(map, "k"); + assert(get_res.status == MAP_OK); + assert(*(int*)get_res.value.element == 5); + + map_destroy(map); +} + // Map with heterogeneous types void test_map_mixed(void) { map_result_t res = map_new(); @@ -324,6 +376,7 @@ int main(void) { TEST(map_add_multiple); TEST(map_get); TEST(map_get_invalid); + TEST(map_get_deleted_slots); TEST(map_mixed); TEST(map_update); TEST(map_remove);