diff --git a/docs/map.md b/docs/map.md index 02471b1..f825bf8 100644 --- a/docs/map.md +++ b/docs/map.md @@ -54,6 +54,7 @@ defined as follows: typedef enum { MAP_OK = 0x0, MAP_ERR_ALLOCATE, + MAP_ERR_OVERFLOW, MAP_ERR_INVALID, MAP_ERR_NOT_FOUND } map_status_t; diff --git a/src/map.c b/src/map.c index 542065c..3f71b34 100644 --- a/src/map.c +++ b/src/map.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "map.h" @@ -74,24 +75,35 @@ map_result_t map_new(void) { * @map: a non-null map * @key: a string representing the key to find * - * Finds next available slot for insertion + * Finds next available slot for insertion (empty or deleted) + * or the slot containing an existing key * - * Returns the index of available slot + * Returns the index of available slot or SIZE_MAX otherwise */ size_t map_insert_index(const map_t *map, const char *key) { const uint64_t key_digest = hash_key(key); size_t idx = key_digest % map->capacity; + size_t delete_tracker = map->capacity; // Fallback index - while (map->elements[idx].state == ENTRY_OCCUPIED) { - if (strcmp(map->elements[idx].key, key) == 0) { - // In this case the key already exists, thus we replace it - return idx; + for (size_t probes = 0; probes < map->capacity; probes++) { + if (map->elements[idx].state == ENTRY_EMPTY) { + return (delete_tracker != map->capacity) ? delete_tracker : idx; + } + + if (map->elements[idx].state == ENTRY_OCCUPIED) { + if (!strcmp(map->elements[idx].key, key)) { + return idx; + } + } else if (map->elements[idx].state == ENTRY_DELETED) { + if (delete_tracker == map->capacity) { + delete_tracker = idx; + } } idx = (idx + 1) % map->capacity; } - return idx; + return SIZE_MAX; } /** @@ -107,6 +119,13 @@ map_result_t map_resize(map_t *map) { const size_t old_capacity = map->capacity; map_element_t *old_elements = map->elements; + if (map->capacity > SIZE_MAX / 2) { + result.status = MAP_ERR_OVERFLOW; + SET_MSG(result, "Capacity overflow on map resize"); + + return result; + } + map->capacity *= 2; map->elements = calloc(map->capacity, sizeof(map_element_t)); if (map->elements == NULL) { @@ -127,10 +146,19 @@ map_result_t map_resize(map_t *map) { for (size_t idx = 0; idx < old_capacity; idx++) { if (old_elements[idx].state == ENTRY_OCCUPIED) { size_t new_idx = map_insert_index(map, old_elements[idx].key); + if (new_idx == SIZE_MAX) { + // if we can't find a free slot, restore previous state and fail + free(map->elements); + map->elements = old_elements; + map->capacity = old_capacity; + result.status = MAP_ERR_OVERFLOW; + SET_MSG(result, "Failed to rehash elements during resize"); + + return result; + } + map->elements[new_idx] = old_elements[idx]; map->size++; - } else if (old_elements[idx].state == ENTRY_DELETED) { - free(old_elements[idx].key); } } @@ -172,7 +200,28 @@ map_result_t map_add(map_t *map, const char *key, void *value) { } // Find next available slot for insertion - const size_t idx = map_insert_index(map, key); + size_t idx = map_insert_index(map, key); + + // if index is SIZE_MAX then the map is full + if (idx == SIZE_MAX) { + map_result_t resize_res = map_resize(map); + if (resize_res.status != MAP_OK) { + result.status = MAP_ERR_OVERFLOW; + SET_MSG(result, "The map is full and resize has failed"); + + return result; + } + + idx = map_insert_index(map, key); + + // This is very uncommon but still... + if (idx == SIZE_MAX) { + result.status = MAP_ERR_OVERFLOW; + SET_MSG(result, "The map is full after resize(!)"); + + return result; + } + } // If slot is occupied, it means that the key already exists. // Therefore we can update it @@ -185,16 +234,23 @@ map_result_t map_add(map_t *map, const char *key, void *value) { return result; } - // Otherwise, the key doesn't exist. Therefore we need to allocate a new key - map->elements[idx].key = malloc(strlen(key) + 1); - if (map->elements[idx].key == NULL) { + // Allocate a new key + char *new_key = malloc(strlen(key) + 1); + if (new_key == NULL) { result.status = MAP_ERR_ALLOCATE; SET_MSG(result, "Failed to allocate memory for map key"); return result; } - strcpy(map->elements[idx].key, key); + strcpy(new_key, key); + + // If we're reusing a deleted slot, decrement the tombstone count + if (map->elements[idx].state == ENTRY_DELETED) { + if (map->tombstone_count > 0) { map->tombstone_count--; } + } + + map->elements[idx].key = new_key; map->elements[idx].value = value; map->elements[idx].state = ENTRY_OCCUPIED; map->size++; @@ -212,21 +268,31 @@ map_result_t map_add(map_t *map, const char *key, void *value) { * * Finds the index where a key is located using linear probing to handle collisions * - * Returns the index of the key if it is found + * Returns the index of the key if it is found or SIZE_MAX otherwise */ size_t map_find_index(const map_t *map, const char *key) { const uint64_t key_digest = hash_key(key); - size_t idx = key_digest % map->capacity; + const size_t start_idx = key_digest % map->capacity; - while (map->elements[idx].state != ENTRY_EMPTY) { - if ((map->elements[idx].state == ENTRY_OCCUPIED) && - (strcmp(map->elements[idx].key, key) == 0)) { + for (size_t probes = 0; probes < map->capacity; probes++) { + size_t idx = (start_idx + probes) % map->capacity; + + if (map->elements[idx].state == ENTRY_EMPTY) { + // The key is not on the map + return SIZE_MAX; + } + + if ((map->elements[idx].state == ENTRY_OCCUPIED) && + (!strcmp(map->elements[idx].key, key))) { + // The key has been found return idx; } - idx = (idx + 1) % map->capacity; } - return idx; + // If we fail to find an ENTRY_EMPTY slot after probing the entire table, + // fall back by returning SIZE_MAX. This should never + // happen because the map is resized whenever an element is inserted or removed. + return SIZE_MAX; } /** @@ -250,17 +316,20 @@ map_result_t map_get(const map_t *map, const char *key) { const size_t idx = map_find_index(map, key); // If slot status is 'occupied' then the key exists - if (map->elements[idx].state == ENTRY_OCCUPIED) { + // otherwise the idx is set to SIZE_MAX + if (idx == SIZE_MAX) { + result.status = MAP_ERR_NOT_FOUND; + SET_MSG(result, "Element not found"); + } else if (map->elements[idx].state == ENTRY_OCCUPIED) { result.status = MAP_OK; SET_MSG(result, "Value successfully retrieved"); result.value.element = map->elements[idx].value; - - return result; + } else { + // Fallback case. Shouldn't happen but better safe than sorry + result.status = MAP_ERR_NOT_FOUND; + SET_MSG(result, "Element not found"); } - result.status = MAP_ERR_NOT_FOUND; - SET_MSG(result, "Element not found"); - return result; } @@ -276,18 +345,18 @@ map_result_t map_get(const map_t *map, const char *key) { map_result_t map_remove(map_t *map, const char *key) { map_result_t result = {0}; - if (map == NULL) { + if (map == NULL || key == NULL) { result.status = MAP_ERR_INVALID; - SET_MSG(result, "Invalid map"); + SET_MSG(result, "Invalid map or key"); return result; } const size_t idx = map_find_index(map, key); - if (map->elements[idx].state != ENTRY_OCCUPIED) { - result.status = MAP_ERR_INVALID; - SET_MSG(result, "Cannot delete this element"); + if (idx == SIZE_MAX || map->elements[idx].state != ENTRY_OCCUPIED) { + result.status = MAP_ERR_NOT_FOUND; + SET_MSG(result, "Element not found"); return result; } @@ -304,6 +373,19 @@ map_result_t map_remove(map_t *map, const char *key) { map->size--; map->tombstone_count++; + // Check if there are too many tombstone entries + const double load_factor = (double)(map->size + map->tombstone_count) / map->capacity; + if (load_factor > LOAD_FACTOR_THRESHOLD) { + map_result_t resize_res = map_resize(map); + if (resize_res.status != MAP_OK) { + result.status = resize_res.status; + SET_MSG(result, "Key successfully deleted. Resize has failed"); + + return result; + } + } + + result.status = MAP_OK; SET_MSG(result, "Key successfully deleted"); @@ -329,8 +411,7 @@ map_result_t map_clear(map_t *map) { } for (size_t idx = 0; idx < map->capacity; idx++) { - if (map->elements[idx].state == ENTRY_OCCUPIED || - map->elements[idx].state == ENTRY_DELETED) { + if (map->elements[idx].state == ENTRY_OCCUPIED) { free(map->elements[idx].key); map->elements[idx].key = NULL; map->elements[idx].value = NULL; @@ -368,8 +449,7 @@ map_result_t map_destroy(map_t *map) { } for (size_t idx = 0; idx < map->capacity; idx++) { - if (map->elements[idx].state == ENTRY_OCCUPIED || - map->elements[idx].state == ENTRY_DELETED) { + if (map->elements[idx].state == ENTRY_OCCUPIED) { free(map->elements[idx].key); } } diff --git a/src/map.h b/src/map.h index a7f3643..5026da7 100644 --- a/src/map.h +++ b/src/map.h @@ -17,6 +17,7 @@ typedef enum { MAP_OK = 0x0, MAP_ERR_ALLOCATE, + MAP_ERR_OVERFLOW, MAP_ERR_INVALID, MAP_ERR_NOT_FOUND } map_status_t; diff --git a/tests/test_map.c b/tests/test_map.c index 47085b9..7580bc6 100644 --- a/tests/test_map.c +++ b/tests/test_map.c @@ -97,6 +97,58 @@ void test_map_get_invalid(void) { map_destroy(map); } +// Get from map full of deleted slots +// If the table contains no ENTRY_EMPTY slots +// (i.e., the table is full of ENTRY_DELETED slots), +// map_get and map_remove should NOT loop forever +void test_map_get_deleted_slots(void) { + map_result_t res = map_new(); + + assert(res.status == MAP_OK); + map_t *map = res.value.map; + + // Fill INITIAL_CAP (=4) without trigger resizing + map_add(map, "x", (void*)1); + map_add(map, "y", (void*)2); + map_add(map, "z", (void*)3); + map_add(map, "j", (void*)4); + + // Remove all ENTRY_OCCUPIED slots. + // This function should resize the map when the load factor is too big + // and should also garbage-collect all the ENTRY_DELETED entries. + // Tombstone count should therefore be equal to 3 and capacity should be doubled + map_remove(map, "x"); + map_remove(map, "y"); + map_remove(map, "z"); + map_remove(map, "j"); + + assert(map->tombstone_count == 3); + assert(map->capacity == 8); + assert(map->size == 0); + + // Retrieving a deleted element should return an error + // but should not loop forever + map_result_t get_deleted_res = map_get(map, "y"); + assert(get_deleted_res.status == MAP_ERR_NOT_FOUND); + + // Adding a new element should increase the size + // and should not loop forever + const int k = 5; + map_result_t add_res = map_add(map, "k", (void*)&k); + assert(add_res.status == MAP_OK); + + assert(map->tombstone_count < map->capacity); + assert(map->capacity == 8); + assert(map->size == 1); + + // Retrieving an ENTRY_OCCUPIED element should works normally + map_result_t get_res = map_get(map, "k"); + assert(get_res.status == MAP_OK); + assert(*(int*)get_res.value.element == 5); + + map_destroy(map); +} + // Map with heterogeneous types void test_map_mixed(void) { map_result_t res = map_new(); @@ -324,6 +376,7 @@ int main(void) { TEST(map_add_multiple); TEST(map_get); TEST(map_get_invalid); + TEST(map_get_deleted_slots); TEST(map_mixed); TEST(map_update); TEST(map_remove);