Started hash map implementation

This commit is contained in:
2025-10-25 17:21:12 +02:00
parent bbdec328eb
commit 3859628a23
6 changed files with 461 additions and 78 deletions

263
src/map.c Normal file
View File

@@ -0,0 +1,263 @@
#define SET_MSG(result, msg) \
snprintf((char *)result.message, RESULT_MSG_SIZE, msg)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "map.h"
// Internal methods
static uint64_t hash_key(const char *key);
static size_t map_insert_index(const map_t *map, const char *key);
static size_t map_find_index(const map_t *map, const char *key);
static map_result_t map_resize(map_t *map);
/**
* hash_key
* @key: The input string for the hash function
*
* Returns the digest of @key using the Fowler-Noll-Vo hashing algorithm
*/
uint64_t hash_key(const char *key) {
uint64_t hash = FNV_OFFSET_BASIS_64;
while (*key) {
hash ^= (uint64_t)*(key++);
hash *= FNV_PRIME_64;
}
return hash;
}
/**
* map_new
*
* Returns a map_result_t data type containing a new hash map
*/
map_result_t map_new() {
map_result_t result = {0};
map_t *map = malloc(sizeof(map_t));
if (map == NULL) {
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map");
return result;
}
map->elements = calloc(INITIAL_CAP, sizeof(map_element_t));
if (map->elements == NULL) {
free(map);
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map elements");
return result;
}
// Initialize map
map->capacity = INITIAL_CAP;
map->size = 0;
map->tombstone_count = 0;
result.status = MAP_OK;
SET_MSG(result, "Map successfully created");
result.value.map = map;
return result;
}
/**
* map_insert_index
* @map: a non-null map
* @key: a string representing the key to find
*
* Finds next available slot for insertion
*
* Returns the index of available slot
*/
size_t map_insert_index(const map_t *map, const char *key) {
const uint64_t key_digest = hash_key(key);
size_t idx = key_digest % map->capacity;
while (map->elements[idx].state == ENTRY_OCCUPIED) {
if (strcmp(map->elements[idx].key, key) == 0) {
// In this case the key already exists, thus we replace it
return idx;
}
idx = (idx + 1) % map->capacity;
}
return idx;
}
/**
* @map: a non-null map
*
* Increases the size of @map
*
* Returns a a map_result_t data type containing the status
*/
map_result_t map_resize(map_t *map) {
map_result_t result = {0};
const size_t old_capacity = map->capacity;
map_element_t *old_elements = map->elements;
map->capacity *= 2;
map->elements = calloc(map->capacity, sizeof(map_element_t));
if (map->elements == NULL) {
// Restore old parameters if resize failed
map->capacity = old_capacity;
map->elements = old_elements;
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to reallocate memory for map");
return result;
}
map->size = 0;
map->tombstone_count = 0;
// Rehash all existing elements
for (size_t idx = 0; idx < old_capacity; idx++) {
if (old_elements[idx].state == ENTRY_OCCUPIED) {
size_t new_idx = map_insert_index(map, old_elements[idx].key);
map->elements[new_idx] = old_elements[idx];
map->size++;
} else if (old_elements[idx].state == ENTRY_DELETED) {
free(old_elements[idx].key);
}
}
free(old_elements);
result.status = MAP_OK;
SET_MSG(result, "Map successfully resized");
return result;
}
/**
* map_add
* @map: a non-null map
* @key: a string representing the index key
* @value: a generic value to add to the map
*
* Adds (@key, @value) to @map
*
* Returns a map_result_t data type containing the status
*/
map_result_t map_add(map_t *map, const char *key, void *value) {
map_result_t result = {0};
if (map == NULL || key == NULL) {
result.status = MAP_ERR_INVALID;
SET_MSG(result, "Invalid map or key");
return result;
}
// Check whether there's enough space available
const double load_factor = (double)(map->size + map->tombstone_count) / map->capacity;
if (load_factor > LOAD_FACTOR_THRESHOLD) {
result = map_resize(map);
if (result.status != MAP_OK) {
return result;
}
}
// Find next available slot for insertion
const size_t idx = map_insert_index(map, key);
// If slot is occupied, it means that the key already exists.
// Therefore we can update it
if (map->elements[idx].state == ENTRY_OCCUPIED) {
map->elements[idx].value = value;
result.status = MAP_OK;
SET_MSG(result, "Element successfully updated");
return result;
}
// Otherwise, the key doesn't exist. Therefore we need to allocate a new key
map->elements[idx].key = malloc(strlen(key) + 1);
if (map->elements[idx].key == NULL) {
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map key");
return result;
}
strcpy(map->elements[idx].key, key);
map->elements[idx].value = value;
map->elements[idx].state = ENTRY_OCCUPIED;
map->size++;
result.status = MAP_OK;
SET_MSG(result, "Element successfully added");
return result;
}
/**
* map_find_index
* @map: a non-null map
* @key: a string representing the index key to find
*
* Finds the index where a key is located using linear probing to handle collisions
*
* Returns the index of the key if it is found
*/
size_t map_find_index(const map_t *map, const char *key) {
const uint64_t key_digest = hash_key(key);
size_t idx = key_digest % map->capacity;
while (map->elements[idx].state != ENTRY_EMPTY) {
if ((map->elements[idx].state == ENTRY_OCCUPIED) &&
(strcmp(map->elements[idx].key, key) == 0)) {
return idx;
}
idx = (idx + 1) % map->capacity;
}
return idx;
}
/**
* map_get
* @map: a non-null map
* @key: a string representing the index key
*
* Returns a map_result_t data type containing the element indexed by @key if available
*/
map_result_t map_get(const map_t *map, const char *key) {
map_result_t result = {0};
if (map == NULL || key == NULL) {
result.status = MAP_ERR_INVALID;
SET_MSG(result, "Invalid map or key");
return result;
}
// Retrieve key index
const size_t idx = map_find_index(map, key);
// If slot status is 'occupied' then the key exists
if (map->elements[idx].state == ENTRY_OCCUPIED) {
result.status = MAP_OK;
SET_MSG(result, "Value successfully retrieved");
result.value.element = map->elements[idx].value;
return result;
}
result.status = MAP_ERR_NOT_FOUND;
SET_MSG(result, "Element not found");
return result;
}

64
src/map.h Normal file
View File

@@ -0,0 +1,64 @@
#ifndef MAP_H
#define MAP_H
#define RESULT_MSG_SIZE 64
// Initial capacity and load factor threshold
#define INITIAL_CAP 4
#define LOAD_FACTOR_THRESHOLD 0.75
// FNV-1a constants
#define FNV_OFFSET_BASIS_64 0xCBF29CE484222325
#define FNV_PRIME_64 0x00000100000001B3
#include <stdint.h>
#include <stddef.h>
typedef enum {
MAP_OK = 0x0,
MAP_ERR_ALLOCATE,
MAP_ERR_INVALID,
MAP_ERR_NOT_FOUND
} map_status_t;
typedef enum {
ENTRY_EMPTY = 0x0,
ENTRY_OCCUPIED,
ENTRY_DELETED
} element_state_t;
typedef struct {
char *key;
void *value;
element_state_t state;
} map_element_t;
typedef struct {
map_element_t *elements;
size_t capacity;
size_t size;
size_t tombstone_count;
} map_t;
typedef struct {
map_status_t status;
uint8_t message[RESULT_MSG_SIZE];
union {
map_t *map;
void *element;
} value;
} map_result_t;
#ifdef __cplusplus
extern "C" {
#endif
map_result_t map_new();
map_result_t map_add(map_t *map, const char *key, void *value);
map_result_t map_get(const map_t *map, const char *key);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1,3 +1,6 @@
#define SET_MSG(result, msg) \
snprintf((char *)result.message, RESULT_MSG_SIZE, msg)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -5,23 +8,23 @@
#include "vector.h"
// Internal method to increase vector size
static VectorResult vector_resize(Vector *vector);
static vector_result_t vector_resize(vector_t *vector);
/**
* vector_new
* @size: initial number of elements
* @data_size: size of each element in bytes
*
* Returns a VectorResult data type containing a new vector
* Returns a vector_result_t data type containing a new vector
*/
VectorResult vector_new(size_t size, size_t data_size) {
VectorResult result = {0};
vector_result_t vector_new(size_t size, size_t data_size) {
vector_result_t result = {0};
// Allocate a new vector
Vector *vector = malloc(sizeof(Vector));
vector_t *vector = malloc(sizeof(vector_t));
if (vector == NULL) {
result.status = VECTOR_ERR_ALLOCATE;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Failed to allocate memory for vector");
SET_MSG(result, "Failed to allocate memory for vector");
return result;
}
@@ -32,14 +35,15 @@ VectorResult vector_new(size_t size, size_t data_size) {
vector->data_size = data_size;
vector->elements = calloc(size, data_size);
if (vector->elements == NULL) {
free(vector);
result.status = VECTOR_ERR_ALLOCATE;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Failed to allocate memory for vector elements");
SET_MSG(result, "Failed to allocate memory for vector elements");
return result;
}
result.status = VECTOR_OK;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Vector successfully created");
SET_MSG(result, "Vector successfully created");
result.value.vector = vector;
return result;
@@ -51,10 +55,10 @@ VectorResult vector_new(size_t size, size_t data_size) {
*
* Increases the size of @vector
*
* Returns a VectorResult data type containing the status
* Returns a vector_result_t data type containing the status
*/
VectorResult vector_resize(Vector *vector) {
VectorResult result = {0};
vector_result_t vector_resize(vector_t *vector) {
vector_result_t result = {0};
size_t old_capacity = vector->capacity;
vector->capacity = (old_capacity > 0 ? ((old_capacity * 3) / 2) : 1);
@@ -62,7 +66,7 @@ VectorResult vector_resize(Vector *vector) {
// Check for stack overflow errors
if (vector->capacity > SIZE_MAX / vector->data_size) {
result.status = VECTOR_ERR_OVERFLOW;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Exceeded maximum size while resizing vector");
SET_MSG(result, "Exceeded maximum size while resizing vector");
return result;
}
@@ -70,7 +74,7 @@ VectorResult vector_resize(Vector *vector) {
void *new_elements = realloc(vector->elements, (vector->capacity * vector->data_size));
if (new_elements == NULL) {
result.status = VECTOR_ERR_ALLOCATE;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Failed to reallocate memory for vector");
SET_MSG(result, "Failed to reallocate memory for vector");
return result;
}
@@ -78,7 +82,7 @@ VectorResult vector_resize(Vector *vector) {
vector->elements = new_elements;
result.status = VECTOR_OK;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Vector successfully resized");
SET_MSG(result, "Vector successfully resized");
return result;
}
@@ -90,14 +94,14 @@ VectorResult vector_resize(Vector *vector) {
*
* Adds @value at the end of @vector
*
* Returns a VectorResult data type containing the status
* Returns a vector_result_t data type containing the status
*/
VectorResult vector_push(Vector *vector, void *value) {
VectorResult result = {0};
vector_result_t vector_push(vector_t *vector, void *value) {
vector_result_t result = {0};
if (vector == NULL || value == NULL) {
result.status = VECTOR_ERR_INVALID;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Invalid vector or value");
SET_MSG(result, "Invalid vector or value");
return result;
}
@@ -130,7 +134,7 @@ VectorResult vector_push(Vector *vector, void *value) {
vector->count++;
result.status = VECTOR_OK;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Value successfully added");
SET_MSG(result, "Value successfully added");
return result;
}
@@ -143,21 +147,21 @@ VectorResult vector_push(Vector *vector, void *value) {
*
* Writes @value at @index
*
* Returns a VectorResult data type
* Returns a vector_result_t data type
*/
VectorResult vector_set(Vector *vector, size_t index, void *value) {
VectorResult result = {0};
vector_result_t vector_set(vector_t *vector, size_t index, void *value) {
vector_result_t result = {0};
if (vector == NULL || value == NULL) {
result.status = VECTOR_ERR_INVALID;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Invalid vector or value");
SET_MSG(result, "Invalid vector or value");
return result;
}
if (index >= vector->count) {
result.status = VECTOR_ERR_OVERFLOW;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Index out of bounds");
SET_MSG(result, "Index out of bounds");
return result;
}
@@ -178,7 +182,7 @@ VectorResult vector_set(Vector *vector, size_t index, void *value) {
}
result.status = VECTOR_OK;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Value successfully set");
SET_MSG(result, "Value successfully set");
return result;
}
@@ -188,27 +192,27 @@ VectorResult vector_set(Vector *vector, size_t index, void *value) {
* @vector: a non-null vector
* @index: a non-negative integer representing the position of an element
*
* Returns a VectorResult data type containing the element at position @index if present
* Returns a vector_result_t data type containing the element at position @index if available
*/
VectorResult vector_get(Vector *vector, size_t index) {
VectorResult result = {0};
vector_result_t vector_get(vector_t *vector, size_t index) {
vector_result_t result = {0};
if (vector == NULL) {
result.status = VECTOR_ERR_INVALID;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Invalid vector");
SET_MSG(result, "Invalid vector");
return result;
}
if (index >= vector->count) {
result.status = VECTOR_ERR_OVERFLOW;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Index out of bounds");
SET_MSG(result, "Index out of bounds");
return result;
}
result.status = VECTOR_OK;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Value successfully retrieved");
SET_MSG(result, "Value successfully retrieved");
result.value.element = (uint8_t *)vector->elements + (index * vector->data_size);
return result;
@@ -221,28 +225,28 @@ VectorResult vector_get(Vector *vector, size_t index) {
* Logically extract an element from the vector by following the LIFO policy.
* This method does NOT de-allocate memory
*
* Returns a VectorResult data type
* Returns a vector_result_t data type
*/
VectorResult vector_pop(Vector *vector) {
VectorResult result = {0};
vector_result_t vector_pop(vector_t *vector) {
vector_result_t result = {0};
if (vector == NULL) {
result.status = VECTOR_ERR_INVALID;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Invalid vector");
SET_MSG(result, "Invalid vector");
return result;
}
if (vector->count == 0) {
result.status = VECTOR_ERR_UNDERFLOW;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Vector is empty");
SET_MSG(result, "Vector is empty");
return result;
}
// Pop an element from the vector
const size_t index = (vector->count - 1);
VectorResult popped_res = vector_get(vector, index);
vector_result_t popped_res = vector_get(vector, index);
if (popped_res.status != VECTOR_OK) {
return popped_res;
@@ -251,7 +255,7 @@ VectorResult vector_pop(Vector *vector) {
vector->count--;
result.status = VECTOR_OK;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Value successfully popped");
SET_MSG(result, "Value successfully popped");
result.value.element = popped_res.value.element;
return result;
@@ -263,14 +267,14 @@ VectorResult vector_pop(Vector *vector) {
*
* Resets the vector to an empty state without de-allocating memory
*
* Returns a VectorResult data type
* Returns a vector_result_t data type
*/
VectorResult vector_clear(Vector *vector) {
VectorResult result = {0};
vector_result_t vector_clear(vector_t *vector) {
vector_result_t result = {0};
if (vector == NULL) {
result.status = VECTOR_ERR_INVALID;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Invalid vector");
SET_MSG(result, "Invalid vector");
return result;
}
@@ -278,7 +282,7 @@ VectorResult vector_clear(Vector *vector) {
vector->count = 0;
result.status = VECTOR_OK;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Vector successfully cleared");
SET_MSG(result, "Vector successfully cleared");
return result;
}
@@ -289,10 +293,10 @@ VectorResult vector_clear(Vector *vector) {
*
* Deletes the vector and all its elements from the memory
*
* Returns a VectorResult data type
* Returns a vector_result_t data type
*/
VectorResult vector_free(Vector *vector) {
VectorResult result = {0};
vector_result_t vector_free(vector_t *vector) {
vector_result_t result = {0};
if (vector != NULL) {
free(vector->elements);
@@ -300,7 +304,7 @@ VectorResult vector_free(Vector *vector) {
}
result.status = VECTOR_OK;
snprintf((char *)result.message, RESULT_MSG_SIZE, "Vector successfully deleted");
SET_MSG(result, "Vector successfully deleted");
return result;
}

View File

@@ -6,52 +6,49 @@
#include <stdint.h>
#include <stddef.h>
// Vector data type
typedef struct {
size_t count;
size_t capacity;
size_t data_size;
void *elements;
} Vector;
// Result status codes
typedef enum {
VECTOR_OK = 0x0,
VECTOR_ERR_ALLOCATE,
VECTOR_ERR_OVERFLOW,
VECTOR_ERR_UNDERFLOW,
VECTOR_ERR_INVALID
} VectorStatus;
} vector_status_t;
// Wrapper data type for vector APIs
typedef struct {
VectorStatus status;
size_t count;
size_t capacity;
size_t data_size;
void *elements;
} vector_t;
typedef struct {
vector_status_t status;
uint8_t message[RESULT_MSG_SIZE];
union {
Vector *vector;
vector_t *vector;
void *element;
} value;
} VectorResult;
} vector_result_t;
#ifdef __cplusplus
extern "C" {
#endif
// public APIs
VectorResult vector_new(size_t size, size_t data_size);
VectorResult vector_push(Vector *vector, void *value);
VectorResult vector_set(Vector *vector, size_t index, void *value);
VectorResult vector_get(Vector *vector, size_t index);
VectorResult vector_pop(Vector *vector);
VectorResult vector_clear(Vector *vector);
VectorResult vector_free(Vector *vector);
vector_result_t vector_new(size_t size, size_t data_size);
vector_result_t vector_push(vector_t *vector, void *value);
vector_result_t vector_set(vector_t *vector, size_t index, void *value);
vector_result_t vector_get(vector_t *vector, size_t index);
vector_result_t vector_pop(vector_t *vector);
vector_result_t vector_clear(vector_t *vector);
vector_result_t vector_free(vector_t *vector);
// Inline methods
static inline size_t vector_size(const Vector *vector) {
static inline size_t vector_size(const vector_t *vector) {
return vector ? vector->count : 0;
}
static inline size_t vector_capacity(const Vector *vector) {
static inline size_t vector_capacity(const vector_t *vector) {
return vector ? vector->capacity : 0;
}