Fixed multiple bugs in hashmap implementation.
Some checks failed
gcc-build / gcc-build (push) Has been cancelled
clang-build / clang-build (push) Has been cancelled

1. Fixed infinite loop issue that occurred when map was full of
   tombstones;
2. Fixed bug related to tombstone count decrement when adding a new
   key on a deleted slot;
3. Added proper map resize when keys are added or removed;
4. Fixed inconsistent error messages;
5. Added proper NULL check on map_remove method.
This commit is contained in:
2025-11-25 10:58:30 +01:00
parent b885d93b6f
commit a6f6d7dbec
4 changed files with 114 additions and 22 deletions

View File

@@ -54,6 +54,7 @@ defined as follows:
typedef enum { typedef enum {
MAP_OK = 0x0, MAP_OK = 0x0,
MAP_ERR_ALLOCATE, MAP_ERR_ALLOCATE,
MAP_ERR_OVERFLOW,
MAP_ERR_INVALID, MAP_ERR_INVALID,
MAP_ERR_NOT_FOUND MAP_ERR_NOT_FOUND
} map_status_t; } map_status_t;

View File

@@ -74,24 +74,30 @@ map_result_t map_new(void) {
* @map: a non-null map * @map: a non-null map
* @key: a string representing the key to find * @key: a string representing the key to find
* *
* Finds next available slot for insertion * Finds next available slot for insertion (empty or deleted)
* or the slot containing an existing key
* *
* Returns the index of available slot * Returns the index of available slot
*/ */
size_t map_insert_index(const map_t *map, const char *key) { size_t map_insert_index(const map_t *map, const char *key) {
const uint64_t key_digest = hash_key(key); const uint64_t key_digest = hash_key(key);
size_t idx = key_digest % map->capacity; size_t idx = key_digest % map->capacity;
size_t delete_tracker = map->capacity; // Fallback index
while (map->elements[idx].state == ENTRY_OCCUPIED) { while (map->elements[idx].state != ENTRY_EMPTY) {
if (strcmp(map->elements[idx].key, key) == 0) { if (map->elements[idx].state == ENTRY_OCCUPIED) {
// In this case the key already exists, thus we replace it if (!strcmp(map->elements[idx].key, key)) { return idx; }
return idx; } else if (map->elements[idx].state == ENTRY_DELETED) {
// Track first deleted slot we encounter
if (delete_tracker == map->capacity) {
delete_tracker = idx;
}
} }
idx = (idx + 1) % map->capacity; idx = (idx + 1) % map->capacity;
} }
return idx; return (delete_tracker != map->capacity) ? delete_tracker : idx;
} }
/** /**
@@ -107,6 +113,13 @@ map_result_t map_resize(map_t *map) {
const size_t old_capacity = map->capacity; const size_t old_capacity = map->capacity;
map_element_t *old_elements = map->elements; map_element_t *old_elements = map->elements;
if (map->capacity > SIZE_MAX / 2) {
result.status = MAP_ERR_OVERFLOW;
SET_MSG(result, "Capacity overflow on map resize");
return result;
}
map->capacity *= 2; map->capacity *= 2;
map->elements = calloc(map->capacity, sizeof(map_element_t)); map->elements = calloc(map->capacity, sizeof(map_element_t));
if (map->elements == NULL) { if (map->elements == NULL) {
@@ -129,8 +142,6 @@ map_result_t map_resize(map_t *map) {
size_t new_idx = map_insert_index(map, old_elements[idx].key); size_t new_idx = map_insert_index(map, old_elements[idx].key);
map->elements[new_idx] = old_elements[idx]; map->elements[new_idx] = old_elements[idx];
map->size++; map->size++;
} else if (old_elements[idx].state == ENTRY_DELETED) {
free(old_elements[idx].key);
} }
} }
@@ -185,7 +196,12 @@ map_result_t map_add(map_t *map, const char *key, void *value) {
return result; return result;
} }
// Otherwise, the key doesn't exist. Therefore we need to allocate a new key // If reusing a deleted slot, decrement the tombstone count
if (map->elements[idx].state == ENTRY_DELETED) {
if (map->tombstone_count > 0) { map->tombstone_count--; }
}
// Allocate a new key
map->elements[idx].key = malloc(strlen(key) + 1); map->elements[idx].key = malloc(strlen(key) + 1);
if (map->elements[idx].key == NULL) { if (map->elements[idx].key == NULL) {
result.status = MAP_ERR_ALLOCATE; result.status = MAP_ERR_ALLOCATE;
@@ -216,17 +232,27 @@ map_result_t map_add(map_t *map, const char *key, void *value) {
*/ */
size_t map_find_index(const map_t *map, const char *key) { size_t map_find_index(const map_t *map, const char *key) {
const uint64_t key_digest = hash_key(key); const uint64_t key_digest = hash_key(key);
size_t idx = key_digest % map->capacity; const size_t start_idx = key_digest % map->capacity;
while (map->elements[idx].state != ENTRY_EMPTY) { for (size_t probes = 0; probes < map->capacity; probes++) {
if ((map->elements[idx].state == ENTRY_OCCUPIED) && size_t idx = (start_idx + probes) % map->capacity;
(strcmp(map->elements[idx].key, key) == 0)) {
if (map->elements[idx].state == ENTRY_EMPTY) {
// The key is not on the map
return idx;
}
if ((map->elements[idx].state == ENTRY_OCCUPIED) &&
(!strcmp(map->elements[idx].key, key))) {
// The key has been found
return idx; return idx;
} }
idx = (idx + 1) % map->capacity;
} }
return idx; // If we fail to find an ENTRY_EMPTY slot after probing the entire table,
// fall back to returning the original start index. This should never
// happen because the map is resized whenever an element is inserted or removed.
return start_idx;
} }
/** /**
@@ -276,7 +302,7 @@ map_result_t map_get(const map_t *map, const char *key) {
map_result_t map_remove(map_t *map, const char *key) { map_result_t map_remove(map_t *map, const char *key) {
map_result_t result = {0}; map_result_t result = {0};
if (map == NULL) { if (map == NULL || key == NULL) {
result.status = MAP_ERR_INVALID; result.status = MAP_ERR_INVALID;
SET_MSG(result, "Invalid map"); SET_MSG(result, "Invalid map");
@@ -286,8 +312,8 @@ map_result_t map_remove(map_t *map, const char *key) {
const size_t idx = map_find_index(map, key); const size_t idx = map_find_index(map, key);
if (map->elements[idx].state != ENTRY_OCCUPIED) { if (map->elements[idx].state != ENTRY_OCCUPIED) {
result.status = MAP_ERR_INVALID; result.status = MAP_ERR_NOT_FOUND;
SET_MSG(result, "Cannot delete this element"); SET_MSG(result, "Element not found");
return result; return result;
} }
@@ -304,6 +330,19 @@ map_result_t map_remove(map_t *map, const char *key) {
map->size--; map->size--;
map->tombstone_count++; map->tombstone_count++;
// Check if there are too many tombstone entries
const double load_factor = (double)(map->size + map->tombstone_count) / map->capacity;
if (load_factor > LOAD_FACTOR_THRESHOLD) {
map_result_t resize_res = map_resize(map);
if (resize_res.status != MAP_OK) {
result.status = MAP_OK;
SET_MSG(result, "Key successfully deleted. Resize has failed");
return result;
}
}
result.status = MAP_OK; result.status = MAP_OK;
SET_MSG(result, "Key successfully deleted"); SET_MSG(result, "Key successfully deleted");
@@ -329,8 +368,7 @@ map_result_t map_clear(map_t *map) {
} }
for (size_t idx = 0; idx < map->capacity; idx++) { for (size_t idx = 0; idx < map->capacity; idx++) {
if (map->elements[idx].state == ENTRY_OCCUPIED || if (map->elements[idx].state == ENTRY_OCCUPIED) {
map->elements[idx].state == ENTRY_DELETED) {
free(map->elements[idx].key); free(map->elements[idx].key);
map->elements[idx].key = NULL; map->elements[idx].key = NULL;
map->elements[idx].value = NULL; map->elements[idx].value = NULL;
@@ -368,8 +406,7 @@ map_result_t map_destroy(map_t *map) {
} }
for (size_t idx = 0; idx < map->capacity; idx++) { for (size_t idx = 0; idx < map->capacity; idx++) {
if (map->elements[idx].state == ENTRY_OCCUPIED || if (map->elements[idx].state == ENTRY_OCCUPIED) {
map->elements[idx].state == ENTRY_DELETED) {
free(map->elements[idx].key); free(map->elements[idx].key);
} }
} }

View File

@@ -17,6 +17,7 @@
typedef enum { typedef enum {
MAP_OK = 0x0, MAP_OK = 0x0,
MAP_ERR_ALLOCATE, MAP_ERR_ALLOCATE,
MAP_ERR_OVERFLOW,
MAP_ERR_INVALID, MAP_ERR_INVALID,
MAP_ERR_NOT_FOUND MAP_ERR_NOT_FOUND
} map_status_t; } map_status_t;

View File

@@ -97,6 +97,58 @@ void test_map_get_invalid(void) {
map_destroy(map); map_destroy(map);
} }
// Get from map full of deleted slots
// If the table contains no ENTRY_EMPTY slots
// (i.e., the table is full of ENTRY_DELETED slots),
// map_get and map_remove should NOT loop forever
void test_map_get_deleted_slots(void) {
map_result_t res = map_new();
assert(res.status == MAP_OK);
map_t *map = res.value.map;
// Fill INITIAL_CAP (=4) without trigger resizing
map_add(map, "x", (void*)1);
map_add(map, "y", (void*)2);
map_add(map, "z", (void*)3);
map_add(map, "j", (void*)4);
// Remove all ENTRY_OCCUPIED slots.
// This function should resize the map when the load factor is too big
// and should also garbage-collect all the ENTRY_DELETED entries.
// Tombstone count should therefore be equal to 3 and capacity should be doubled
map_remove(map, "x");
map_remove(map, "y");
map_remove(map, "z");
map_remove(map, "j");
assert(map->tombstone_count == 3);
assert(map->capacity == 8);
assert(map->size == 0);
// Retrieving a deleted element should return an error
// but should not loop forever
map_result_t get_deleted_res = map_get(map, "y");
assert(get_deleted_res.status == MAP_ERR_NOT_FOUND);
// Adding a new element should increase the size
// and should not loop forever
const int k = 5;
map_result_t add_res = map_add(map, "k", (void*)&k);
assert(add_res.status == MAP_OK);
assert(map->tombstone_count < map->capacity);
assert(map->capacity == 8);
assert(map->size == 1);
// Retrieving an ENTRY_OCCUPIED element should works normally
map_result_t get_res = map_get(map, "k");
assert(get_res.status == MAP_OK);
assert(*(int*)get_res.value.element == 5);
map_destroy(map);
}
// Map with heterogeneous types // Map with heterogeneous types
void test_map_mixed(void) { void test_map_mixed(void) {
map_result_t res = map_new(); map_result_t res = map_new();
@@ -324,6 +376,7 @@ int main(void) {
TEST(map_add_multiple); TEST(map_add_multiple);
TEST(map_get); TEST(map_get);
TEST(map_get_invalid); TEST(map_get_invalid);
TEST(map_get_deleted_slots);
TEST(map_mixed); TEST(map_mixed);
TEST(map_update); TEST(map_update);
TEST(map_remove); TEST(map_remove);