Fixed multiple bugs in hashmap implementation.
Some checks failed
clang-build / clang-build (push) Has been cancelled
gcc-build / gcc-build (push) Has been cancelled
Some checks failed
clang-build / clang-build (push) Has been cancelled
gcc-build / gcc-build (push) Has been cancelled
1. Fixed infinite loop issue that occurred when map was full of tombstones; 2. Fixed bug related to tombstone count decrement when adding a new key on a deleted slot; 3. Added proper map resize when keys are added or removed; 4. Fixed inconsistent error messages; 5. Added proper NULL check on map_remove method.
This commit is contained in:
@@ -54,6 +54,7 @@ defined as follows:
|
||||
typedef enum {
|
||||
MAP_OK = 0x0,
|
||||
MAP_ERR_ALLOCATE,
|
||||
MAP_ERR_OVERFLOW,
|
||||
MAP_ERR_INVALID,
|
||||
MAP_ERR_NOT_FOUND
|
||||
} map_status_t;
|
||||
|
||||
152
src/map.c
152
src/map.c
@@ -6,6 +6,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "map.h"
|
||||
|
||||
@@ -74,24 +75,35 @@ map_result_t map_new(void) {
|
||||
* @map: a non-null map
|
||||
* @key: a string representing the key to find
|
||||
*
|
||||
* Finds next available slot for insertion
|
||||
* Finds next available slot for insertion (empty or deleted)
|
||||
* or the slot containing an existing key
|
||||
*
|
||||
* Returns the index of available slot
|
||||
* Returns the index of available slot or SIZE_MAX otherwise
|
||||
*/
|
||||
size_t map_insert_index(const map_t *map, const char *key) {
|
||||
const uint64_t key_digest = hash_key(key);
|
||||
size_t idx = key_digest % map->capacity;
|
||||
size_t delete_tracker = map->capacity; // Fallback index
|
||||
|
||||
while (map->elements[idx].state == ENTRY_OCCUPIED) {
|
||||
if (strcmp(map->elements[idx].key, key) == 0) {
|
||||
// In this case the key already exists, thus we replace it
|
||||
return idx;
|
||||
for (size_t probes = 0; probes < map->capacity; probes++) {
|
||||
if (map->elements[idx].state == ENTRY_EMPTY) {
|
||||
return (delete_tracker != map->capacity) ? delete_tracker : idx;
|
||||
}
|
||||
|
||||
if (map->elements[idx].state == ENTRY_OCCUPIED) {
|
||||
if (!strcmp(map->elements[idx].key, key)) {
|
||||
return idx;
|
||||
}
|
||||
} else if (map->elements[idx].state == ENTRY_DELETED) {
|
||||
if (delete_tracker == map->capacity) {
|
||||
delete_tracker = idx;
|
||||
}
|
||||
}
|
||||
|
||||
idx = (idx + 1) % map->capacity;
|
||||
}
|
||||
|
||||
return idx;
|
||||
return SIZE_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -107,6 +119,13 @@ map_result_t map_resize(map_t *map) {
|
||||
const size_t old_capacity = map->capacity;
|
||||
map_element_t *old_elements = map->elements;
|
||||
|
||||
if (map->capacity > SIZE_MAX / 2) {
|
||||
result.status = MAP_ERR_OVERFLOW;
|
||||
SET_MSG(result, "Capacity overflow on map resize");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
map->capacity *= 2;
|
||||
map->elements = calloc(map->capacity, sizeof(map_element_t));
|
||||
if (map->elements == NULL) {
|
||||
@@ -127,10 +146,19 @@ map_result_t map_resize(map_t *map) {
|
||||
for (size_t idx = 0; idx < old_capacity; idx++) {
|
||||
if (old_elements[idx].state == ENTRY_OCCUPIED) {
|
||||
size_t new_idx = map_insert_index(map, old_elements[idx].key);
|
||||
if (new_idx == SIZE_MAX) {
|
||||
// if we can't find a free slot, restore previous state and fail
|
||||
free(map->elements);
|
||||
map->elements = old_elements;
|
||||
map->capacity = old_capacity;
|
||||
result.status = MAP_ERR_OVERFLOW;
|
||||
SET_MSG(result, "Failed to rehash elements during resize");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
map->elements[new_idx] = old_elements[idx];
|
||||
map->size++;
|
||||
} else if (old_elements[idx].state == ENTRY_DELETED) {
|
||||
free(old_elements[idx].key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,7 +200,28 @@ map_result_t map_add(map_t *map, const char *key, void *value) {
|
||||
}
|
||||
|
||||
// Find next available slot for insertion
|
||||
const size_t idx = map_insert_index(map, key);
|
||||
size_t idx = map_insert_index(map, key);
|
||||
|
||||
// if index is SIZE_MAX then the map is full
|
||||
if (idx == SIZE_MAX) {
|
||||
map_result_t resize_res = map_resize(map);
|
||||
if (resize_res.status != MAP_OK) {
|
||||
result.status = MAP_ERR_OVERFLOW;
|
||||
SET_MSG(result, "The map is full and resize has failed");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
idx = map_insert_index(map, key);
|
||||
|
||||
// This is very uncommon but still...
|
||||
if (idx == SIZE_MAX) {
|
||||
result.status = MAP_ERR_OVERFLOW;
|
||||
SET_MSG(result, "The map is full after resize(!)");
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// If slot is occupied, it means that the key already exists.
|
||||
// Therefore we can update it
|
||||
@@ -185,16 +234,23 @@ map_result_t map_add(map_t *map, const char *key, void *value) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Otherwise, the key doesn't exist. Therefore we need to allocate a new key
|
||||
map->elements[idx].key = malloc(strlen(key) + 1);
|
||||
if (map->elements[idx].key == NULL) {
|
||||
// Allocate a new key
|
||||
char *new_key = malloc(strlen(key) + 1);
|
||||
if (new_key == NULL) {
|
||||
result.status = MAP_ERR_ALLOCATE;
|
||||
SET_MSG(result, "Failed to allocate memory for map key");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
strcpy(map->elements[idx].key, key);
|
||||
strcpy(new_key, key);
|
||||
|
||||
// If we're reusing a deleted slot, decrement the tombstone count
|
||||
if (map->elements[idx].state == ENTRY_DELETED) {
|
||||
if (map->tombstone_count > 0) { map->tombstone_count--; }
|
||||
}
|
||||
|
||||
map->elements[idx].key = new_key;
|
||||
map->elements[idx].value = value;
|
||||
map->elements[idx].state = ENTRY_OCCUPIED;
|
||||
map->size++;
|
||||
@@ -212,21 +268,31 @@ map_result_t map_add(map_t *map, const char *key, void *value) {
|
||||
*
|
||||
* Finds the index where a key is located using linear probing to handle collisions
|
||||
*
|
||||
* Returns the index of the key if it is found
|
||||
* Returns the index of the key if it is found or SIZE_MAX otherwise
|
||||
*/
|
||||
size_t map_find_index(const map_t *map, const char *key) {
|
||||
const uint64_t key_digest = hash_key(key);
|
||||
size_t idx = key_digest % map->capacity;
|
||||
const size_t start_idx = key_digest % map->capacity;
|
||||
|
||||
while (map->elements[idx].state != ENTRY_EMPTY) {
|
||||
if ((map->elements[idx].state == ENTRY_OCCUPIED) &&
|
||||
(strcmp(map->elements[idx].key, key) == 0)) {
|
||||
for (size_t probes = 0; probes < map->capacity; probes++) {
|
||||
size_t idx = (start_idx + probes) % map->capacity;
|
||||
|
||||
if (map->elements[idx].state == ENTRY_EMPTY) {
|
||||
// The key is not on the map
|
||||
return SIZE_MAX;
|
||||
}
|
||||
|
||||
if ((map->elements[idx].state == ENTRY_OCCUPIED) &&
|
||||
(!strcmp(map->elements[idx].key, key))) {
|
||||
// The key has been found
|
||||
return idx;
|
||||
}
|
||||
idx = (idx + 1) % map->capacity;
|
||||
}
|
||||
|
||||
return idx;
|
||||
// If we fail to find an ENTRY_EMPTY slot after probing the entire table,
|
||||
// fall back by returning SIZE_MAX. This should never
|
||||
// happen because the map is resized whenever an element is inserted or removed.
|
||||
return SIZE_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -250,17 +316,20 @@ map_result_t map_get(const map_t *map, const char *key) {
|
||||
const size_t idx = map_find_index(map, key);
|
||||
|
||||
// If slot status is 'occupied' then the key exists
|
||||
if (map->elements[idx].state == ENTRY_OCCUPIED) {
|
||||
// otherwise the idx is set to SIZE_MAX
|
||||
if (idx == SIZE_MAX) {
|
||||
result.status = MAP_ERR_NOT_FOUND;
|
||||
SET_MSG(result, "Element not found");
|
||||
} else if (map->elements[idx].state == ENTRY_OCCUPIED) {
|
||||
result.status = MAP_OK;
|
||||
SET_MSG(result, "Value successfully retrieved");
|
||||
result.value.element = map->elements[idx].value;
|
||||
|
||||
return result;
|
||||
} else {
|
||||
// Fallback case. Shouldn't happen but better safe than sorry
|
||||
result.status = MAP_ERR_NOT_FOUND;
|
||||
SET_MSG(result, "Element not found");
|
||||
}
|
||||
|
||||
result.status = MAP_ERR_NOT_FOUND;
|
||||
SET_MSG(result, "Element not found");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -276,18 +345,18 @@ map_result_t map_get(const map_t *map, const char *key) {
|
||||
map_result_t map_remove(map_t *map, const char *key) {
|
||||
map_result_t result = {0};
|
||||
|
||||
if (map == NULL) {
|
||||
if (map == NULL || key == NULL) {
|
||||
result.status = MAP_ERR_INVALID;
|
||||
SET_MSG(result, "Invalid map");
|
||||
SET_MSG(result, "Invalid map or key");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const size_t idx = map_find_index(map, key);
|
||||
|
||||
if (map->elements[idx].state != ENTRY_OCCUPIED) {
|
||||
result.status = MAP_ERR_INVALID;
|
||||
SET_MSG(result, "Cannot delete this element");
|
||||
if (idx == SIZE_MAX || map->elements[idx].state != ENTRY_OCCUPIED) {
|
||||
result.status = MAP_ERR_NOT_FOUND;
|
||||
SET_MSG(result, "Element not found");
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -304,6 +373,19 @@ map_result_t map_remove(map_t *map, const char *key) {
|
||||
map->size--;
|
||||
map->tombstone_count++;
|
||||
|
||||
// Check if there are too many tombstone entries
|
||||
const double load_factor = (double)(map->size + map->tombstone_count) / map->capacity;
|
||||
if (load_factor > LOAD_FACTOR_THRESHOLD) {
|
||||
map_result_t resize_res = map_resize(map);
|
||||
if (resize_res.status != MAP_OK) {
|
||||
result.status = resize_res.status;
|
||||
SET_MSG(result, "Key successfully deleted. Resize has failed");
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
result.status = MAP_OK;
|
||||
SET_MSG(result, "Key successfully deleted");
|
||||
|
||||
@@ -329,8 +411,7 @@ map_result_t map_clear(map_t *map) {
|
||||
}
|
||||
|
||||
for (size_t idx = 0; idx < map->capacity; idx++) {
|
||||
if (map->elements[idx].state == ENTRY_OCCUPIED ||
|
||||
map->elements[idx].state == ENTRY_DELETED) {
|
||||
if (map->elements[idx].state == ENTRY_OCCUPIED) {
|
||||
free(map->elements[idx].key);
|
||||
map->elements[idx].key = NULL;
|
||||
map->elements[idx].value = NULL;
|
||||
@@ -368,8 +449,7 @@ map_result_t map_destroy(map_t *map) {
|
||||
}
|
||||
|
||||
for (size_t idx = 0; idx < map->capacity; idx++) {
|
||||
if (map->elements[idx].state == ENTRY_OCCUPIED ||
|
||||
map->elements[idx].state == ENTRY_DELETED) {
|
||||
if (map->elements[idx].state == ENTRY_OCCUPIED) {
|
||||
free(map->elements[idx].key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
typedef enum {
|
||||
MAP_OK = 0x0,
|
||||
MAP_ERR_ALLOCATE,
|
||||
MAP_ERR_OVERFLOW,
|
||||
MAP_ERR_INVALID,
|
||||
MAP_ERR_NOT_FOUND
|
||||
} map_status_t;
|
||||
|
||||
@@ -97,6 +97,58 @@ void test_map_get_invalid(void) {
|
||||
map_destroy(map);
|
||||
}
|
||||
|
||||
// Get from map full of deleted slots
|
||||
// If the table contains no ENTRY_EMPTY slots
|
||||
// (i.e., the table is full of ENTRY_DELETED slots),
|
||||
// map_get and map_remove should NOT loop forever
|
||||
void test_map_get_deleted_slots(void) {
|
||||
map_result_t res = map_new();
|
||||
|
||||
assert(res.status == MAP_OK);
|
||||
map_t *map = res.value.map;
|
||||
|
||||
// Fill INITIAL_CAP (=4) without trigger resizing
|
||||
map_add(map, "x", (void*)1);
|
||||
map_add(map, "y", (void*)2);
|
||||
map_add(map, "z", (void*)3);
|
||||
map_add(map, "j", (void*)4);
|
||||
|
||||
// Remove all ENTRY_OCCUPIED slots.
|
||||
// This function should resize the map when the load factor is too big
|
||||
// and should also garbage-collect all the ENTRY_DELETED entries.
|
||||
// Tombstone count should therefore be equal to 3 and capacity should be doubled
|
||||
map_remove(map, "x");
|
||||
map_remove(map, "y");
|
||||
map_remove(map, "z");
|
||||
map_remove(map, "j");
|
||||
|
||||
assert(map->tombstone_count == 3);
|
||||
assert(map->capacity == 8);
|
||||
assert(map->size == 0);
|
||||
|
||||
// Retrieving a deleted element should return an error
|
||||
// but should not loop forever
|
||||
map_result_t get_deleted_res = map_get(map, "y");
|
||||
assert(get_deleted_res.status == MAP_ERR_NOT_FOUND);
|
||||
|
||||
// Adding a new element should increase the size
|
||||
// and should not loop forever
|
||||
const int k = 5;
|
||||
map_result_t add_res = map_add(map, "k", (void*)&k);
|
||||
assert(add_res.status == MAP_OK);
|
||||
|
||||
assert(map->tombstone_count < map->capacity);
|
||||
assert(map->capacity == 8);
|
||||
assert(map->size == 1);
|
||||
|
||||
// Retrieving an ENTRY_OCCUPIED element should works normally
|
||||
map_result_t get_res = map_get(map, "k");
|
||||
assert(get_res.status == MAP_OK);
|
||||
assert(*(int*)get_res.value.element == 5);
|
||||
|
||||
map_destroy(map);
|
||||
}
|
||||
|
||||
// Map with heterogeneous types
|
||||
void test_map_mixed(void) {
|
||||
map_result_t res = map_new();
|
||||
@@ -324,6 +376,7 @@ int main(void) {
|
||||
TEST(map_add_multiple);
|
||||
TEST(map_get);
|
||||
TEST(map_get_invalid);
|
||||
TEST(map_get_deleted_slots);
|
||||
TEST(map_mixed);
|
||||
TEST(map_update);
|
||||
TEST(map_remove);
|
||||
|
||||
Reference in New Issue
Block a user