Compare commits

..

4 Commits

Author SHA1 Message Date
db766ad891 Updated documentation
All checks were successful
clang-build / clang-build (push) Successful in 41s
gcc-build / gcc-build (push) Successful in 20s
2026-01-23 17:25:30 +01:00
78909ea0fe Migrating to Gitea CI
All checks were successful
clang-build / clang-build (push) Successful in 42s
gcc-build / gcc-build (push) Successful in 19s
2026-01-16 11:56:20 +01:00
912faca620 Added benchmark for String type 2026-01-12 15:58:42 +01:00
b1cca113ce Refactored string_set_at and added documentation 2026-01-12 14:16:07 +01:00
6 changed files with 162 additions and 92 deletions

View File

@@ -56,7 +56,7 @@ $(OBJ_DIR):
mkdir -p $(OBJ_DIR) mkdir -p $(OBJ_DIR)
# Benchmark rules # Benchmark rules
$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_OBJ_DIR)/string.o
$(CC) $(BENCH_FLAGS) -o $@ $^ $(CC) $(BENCH_FLAGS) -o $@ $^
$(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR) $(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR)

100
README.md
View File

@@ -2,8 +2,9 @@
<h1>Datum</h1> <h1>Datum</h1>
<h6><i>Collection of dynamic and generic data structures.</i></h6> <h6><i>Collection of dynamic and generic data structures.</i></h6>
[![](https://github.com/ceticamarco/datum/actions/workflows/gcc-build.yml/badge.svg)](https://github.com/ceticamarco/datum/actions/workflows/gcc-build.yml) ![](https://git.marcocetica.com/marco/datum/actions/workflows/gcc-build.yml/badge.svg)
[![](https://github.com/ceticamarco/datum/actions/workflows/clang-build.yml/badge.svg)](https://github.com/ceticamarco/datum/actions/workflows/clang-build.yml) ![](https://git.marcocetica.com/marco/datum/actions/workflows/clang-build.yml/badge.svg)
</div> </div>
Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond
@@ -23,63 +24,11 @@ At its simplest, you can use this library as follows:
#include <stdio.h> #include <stdio.h>
#include "src/vector.h" #include "src/vector.h"
/* vector_order_t cmp_asc(const void *a, const void *b) {
* Compile with: gcc main.c src/vector.c const int x = *(int *)a, y = *(int *)b;
* Output: First element: 1
* Head of vector: 16, size is now: 1
*/
// Callback functions if (x < y) return VECTOR_ORDER_LT;
vector_order_t cmp_int_asc(const void *x, const void *y); return (x > y) ? VECTOR_ORDER_GT : VECTOR_ORDER_EQ;
void square(void *element, void *env);
int is_even(const void *element, void *env);
void add(void *accumulator, const void *element, void *env);
int main(void) {
// Create an integer vector of initial capacity equal to 5
vector_t *vec = vector_new(5, sizeof(int)).value.vector;
// Add some elements
vector_push(vec, &(int){1}); // Equivalent as below
int nums[] = {5, 2, 4, 3};
for (int idx = 0; idx < 4; idx++) { vector_push(vec, &nums[idx]); }
// Sort array in ascending order: [1, 2, 3, 4, 5]
vector_sort(vec, cmp_int_asc);
// Print 1st element
const int first = *(int*)vector_get(vec, 0).value.element;
printf("First element: %d\n", first);
int sum = 0;
vector_map(vec, square, NULL); // Square elements: [1, 2, 3, 4, 5] -> [1, 4, 9, 16, 25]
vector_filter(vec, is_even, NULL); // Filter even elements: [1, 4, 9, 16, 25] -> [4, 16]
vector_reduce(vec, &sum, add, NULL); // Sum elements: [4, 16] -> 20
// Pop second element using LIFO policy
const int head = *(int*)vector_pop(vec).value.element;
printf("Head of vector: %d, size is now: %zu\n", head, vector_size(vec));
// Remove vector from memory
vector_destroy(vec);
return 0;
}
vector_order_t cmp_int_asc(const void *x, const void *y) {
int x_int = *(const int*)x;
int y_int = *(const int*)y;
if (x_int < y_int) return VECTOR_ORDER_LT;
if (x_int > y_int) return VECTOR_ORDER_GT;
return VECTOR_ORDER_EQ;
}
void square(void *element, void *env) {
(void)(env);
int *value = (int*)element;
*value = (*value) * (*value);
} }
int is_even(const void *element, void *env) { int is_even(const void *element, void *env) {
@@ -89,9 +38,25 @@ int is_even(const void *element, void *env) {
return (value % 2) == 0; return (value % 2) == 0;
} }
void add(void *accumulator, const void *element, void *env) { /* Compile with: gcc main.c src/vector.c
(void)(env); * Output: '2 4'
*(int*)accumulator += *(int*)element; */
int main(void) {
vector_t *vec = vector_new(5, sizeof(int)).value.vector; // Create a vector of integers
int nums[] = {5, 4, 1, 2, 3}; // Push some elements
for (int idx = 0; idx < 5; idx++) { vector_push(vec, &nums[idx]); }
vector_sort(vec, cmp_asc); // Sort vector
vector_filter(vec, is_even, NULL); // Filter even elements
for (int idx = 0; idx < 2; idx++) {
printf("%d ", *(int *)vector_get(vec, idx).value.element);
}
putchar('\n');
vector_destroy(vec); // Remove vector from memory
return 0;
} }
``` ```
@@ -176,7 +141,7 @@ int main(void) {
#include "src/string.h" #include "src/string.h"
/* /*
* Compile with: gcc -O3 main.c src/string.c * Compile with: gcc main.c src/string.c
* Output: Final string: "Hello,World,😀" Splitted: ["Hello" "World" "😀" ] * Output: Final string: "Hello,World,😀" Splitted: ["Hello" "World" "😀" ]
*/ */
int main(void) { int main(void) {
@@ -218,8 +183,7 @@ This will compile the library as well as the `usage.c` file, the unit tests and
> GNU Multiple Precision Arithmetic Library (GMP). > GNU Multiple Precision Arithmetic Library (GMP).
## Documentation ## Documentation
For additional details about this library (internal design, memory For additional details about this library (internal design, memory management, data ownership, etc.) go to the [docs folder](/docs).
management, data ownership, etc.) go to the [docs folder](/docs).
## Unit tests ## Unit tests
Datum provides some unit tests for `Vector`, `Map` and `BigInt`. To run them, you can issue the following commands: Datum provides some unit tests for `Vector`, `Map` and `BigInt`. To run them, you can issue the following commands:
@@ -232,12 +196,14 @@ $ ./test_bigint
``` ```
## Benchmark ## Benchmark
Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector` and the `Map` data structures. You can run it by issuing the following command: Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector`, `Map` and the `String` data structures.
You can run it by issuing the following command:
```sh ```sh
$ ./benchmark_datum $ ./benchmark_datum
Computing Vector average time...average time: 18 ms Computing Vector average time...average time: 19 ms
Computing Map average time...average time: 31 ms Computing Map average time...average time: 55 ms
Computing String average time...average time: 24 ms
``` ```

View File

@@ -1,3 +1,5 @@
#define _POSIX_C_SOURCE 200809L
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <time.h> #include <time.h>
@@ -6,6 +8,7 @@
#include "../src/vector.h" #include "../src/vector.h"
#include "../src/map.h" #include "../src/map.h"
#include "../src/string.h"
typedef void (*test_fn_t)(size_t iterations); typedef void (*test_fn_t)(size_t iterations);
@@ -13,20 +16,15 @@ void test_vector(size_t iterations) {
vector_t *vec = vector_new(16, sizeof(int)).value.vector; vector_t *vec = vector_new(16, sizeof(int)).value.vector;
for (size_t idx = 0; idx < iterations; idx++) { for (size_t idx = 0; idx < iterations; idx++) {
vector_push(vec, &idx); vector_push(vec, &(int){idx});
} }
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum volatile uint64_t sum = 0;
for (size_t idx = 0; idx < iterations; idx++) { for (size_t idx = 0; idx < iterations; idx++) {
const int *val = (int*)vector_get(vec, idx).value.element; const int *val = (int*)vector_get(vec, idx).value.element;
sum += *val; sum += *val;
} }
// Another trick to prevent compiler optimization
if (sum == 0xB00B5) {
printf("sum = %llu\n", (unsigned long long)sum);
}
vector_destroy(vec); vector_destroy(vec);
} }
@@ -43,7 +41,7 @@ void test_map(size_t iterations) {
map_add(map, key, (void*)value); map_add(map, key, (void*)value);
} }
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum volatile uint64_t sum = 0;
for (size_t idx = 0; idx < iterations; idx++) { for (size_t idx = 0; idx < iterations; idx++) {
snprintf(key, sizeof(key), "key_%zu", idx); snprintf(key, sizeof(key), "key_%zu", idx);
@@ -53,32 +51,68 @@ void test_map(size_t iterations) {
// Cleanup values // Cleanup values
for (size_t idx = 0; idx < map->capacity; idx++) { for (size_t idx = 0; idx < map->capacity; idx++) {
if (map->elements[idx].state == ENTRY_OCCUPIED) { snprintf(key, sizeof(key), "key_%zu", idx);
int *val = (int*)map->elements[idx].value;
int *val = (int*)map_get(map, key).value.element;
free(val); free(val);
}
map_remove(map, key);
} }
map_destroy(map); map_destroy(map);
} }
long long benchmark(test_fn_t fun, size_t iterations, size_t runs) { void test_string(size_t iterations) {
long long total = 0; volatile size_t total_len = 0;
for (size_t idx = 0; idx < runs; idx++) {
clock_t start = clock();
fun(iterations);
clock_t end = clock();
total += (long long)((end - start) * 1000 / CLOCKS_PER_SEC); for (size_t idx = 0; idx < iterations; idx++) {
string_t *str1 = string_new("hello").value.string;
string_t *str2 = string_new(" World").value.string;
string_result_t concat = string_concat(str1, str2);
string_result_t upper = string_to_upper(concat.value.string);
total_len += string_size(upper.value.string);
string_result_t needle = string_new("WORLD");
string_result_t contains = string_contains(upper.value.string, needle.value.string);
if (contains.value.idx >= 0) {
total_len += contains.value.idx;
} }
return total / runs; string_destroy(str1);
string_destroy(str2);
string_destroy(concat.value.string);
string_destroy(upper.value.string);
string_destroy(needle.value.string);
}
}
static inline uint64_t now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
long long benchmark(test_fn_t fun, size_t iterations, size_t runs) {
long long total = 0;
for (size_t idx = 0; idx < runs; idx++) {
uint64_t start = now_ns();
fun(iterations);
uint64_t end = now_ns();
total += (end - start);
}
return (long long)(total / runs / 1000000);
} }
int main(void) { int main(void) {
// Do a warmup run // Do a warmup run
test_vector(1000); test_vector(1000);
test_map(1000); test_map(1000);
test_string(1000);
printf("Computing Vector average time..."); printf("Computing Vector average time...");
fflush(stdout); fflush(stdout);
@@ -88,5 +122,9 @@ int main(void) {
fflush(stdout); fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30)); printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30));
printf("Computing String average time...");
fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_string, 1e5, 30));
return 0; return 0;
} }

View File

@@ -29,7 +29,7 @@ At the time being, `Vector` supports the following methods:
- `vector_result_t vector_push(vector, value)`: add a new value to the vector; - `vector_result_t vector_push(vector, value)`: add a new value to the vector;
- `vector_result_t vector_set(vector, index, value)`: update the value of a given index if it exists; - `vector_result_t vector_set(vector, index, value)`: update the value of a given index if it exists;
- `vector_result_t vector_get(vector, index)`: return the value indexed by `index` if it exists; - `vector_result_t vector_get(vector, index)`: return the value indexed by `index` if it exists;
- `map_result_t vector_sort(map, cmp)`: sort array using `cmp` function; - `vector_result_t vector_sort(vector, cmp)`: sort vector using `cmp` function;
- `vector_result_t vector_pop(vector)`: pop last element from the vector following the LIFO policy; - `vector_result_t vector_pop(vector)`: pop last element from the vector following the LIFO policy;
- `vector_result_t vector_map(vector, callback, env)`: apply `callback` function to vector (in-place); - `vector_result_t vector_map(vector, callback, env)`: apply `callback` function to vector (in-place);
- `vector_result_t vector_filter(vector, callback, env)`: filter vector using `callback` (in-place); - `vector_result_t vector_filter(vector, callback, env)`: filter vector using `callback` (in-place);
@@ -85,14 +85,80 @@ In particular, you should be aware of the following design choices:
- The `vector_reduce` callback method requires the caller to initialize an _"accumulator"_ variable before calling this method; - The `vector_reduce` callback method requires the caller to initialize an _"accumulator"_ variable before calling this method;
- The `vector_filter` callback method is expected to return non-zero to keep the element and zero to filter it out. - The `vector_filter` callback method is expected to return non-zero to keep the element and zero to filter it out.
- The `env` argument is an optional parameter to pass the external environment to the callback function. It is used to mock the behavior of closures, where - The `env` argument is an optional parameter to pass the external environment to the callback function. It is used to mock the behavior of closures, where
the lexical environment is captured when the closure is created. the lexical environment is captured when the closure is created;
- Callback functions must be self-contained and handle all their resources. Additionally, they are responsible for ensuring their operations
don't cause any undefined behavior.
Let's look at an example:
```c
#include <stdio.h>
#include "src/vector.h"
// Callback functions
void square(void *element, void *env);
int is_even(const void *element, void *env);
void add(void *accumulator, const void *element, void *env);
int main(void) {
// Create an integer vector of initial capacity equal to 5
vector_t *vec = vector_new(5, sizeof(int)).value.vector;
int nums[] = {1, 2, 3, 4, 5};
for (int idx = 0; idx < 5; idx++) {
vector_push(vec, &nums[idx]);
}
// Square elements: [1, 2, 3, 4, 5] -> [1, 4, 9, 16, 25]
vector_map(vec, square, NULL);
for (int idx = 0; idx < 5; idx++) {
printf("%d ", *(int *)vector_get(vec, idx).value.element);
}
putchar('\n');
// Filter even elements: [1, 4, 9, 16, 25] -> [4, 16]
vector_filter(vec, is_even, NULL);
for (int idx = 0; idx < 2; idx++) {
printf("%d ", *(int *)vector_get(vec, idx).value.element);
}
putchar('\n');
// Sum elements: [4, 16] -> 20
int sum = 0;
vector_reduce(vec, &sum, add, NULL);
printf("%d\n", sum);
vector_destroy(vec);
return 0;
}
void square(void *element, void *env) {
(void)(env);
int *value = (int*)element;
*value = (*value) * (*value);
}
int is_even(const void *element, void *env) {
(void)(env);
int value = *(int*)element;
return (value % 2) == 0;
}
void add(void *accumulator, const void *element, void *env) {
(void)(env);
*(int*)accumulator += *(int*)element;
}
```
## Sorting ## Sorting
As indicated in the [its documentation](/docs/vector.md), the `Vector` data type As indicated in the [its documentation](/docs/vector.md), the `Vector` data type
provides an efficient in-place sorting function called `vector_sort` that uses provides an efficient in-place sorting function called `vector_sort` that uses
a builtin implementation of the [Quicksort algorithm](https://en.wikipedia.org/wiki/Quicksort). This method requires an user-defined comparison procedure which allows the a builtin implementation of the [Quicksort algorithm](https://en.wikipedia.org/wiki/Quicksort). This method requires an user-defined comparison procedure which allows the
caller to customize the sorting behavior. The comparison procedure must adhere to the caller to customize the sorting behavior.
following specification:
The comparison procedure must adhere to the following specification:
1. Must return `vector_order_t`, which is defined as follows: 1. Must return `vector_order_t`, which is defined as follows:
@@ -107,7 +173,7 @@ typedef enum {
and indicates the ordering relationship between any two elements. and indicates the ordering relationship between any two elements.
2. Must accept two `const void*` parameters representing two elements to compare; 2. Must accept two `const void*` parameters representing two elements to compare;
3. Must be self-contained and handle all its own resources. 3. Must be self-contained and handle all its resources. Additionally, it's responsible for ensuring its operations don't cause any undefined behavior.
Let's look at some examples. For instance, let's say that we want to sort an array Let's look at some examples. For instance, let's say that we want to sort an array
of integers in ascending and descending order: of integers in ascending and descending order:
@@ -117,8 +183,8 @@ of integers in ascending and descending order:
#include "src/vector.h" #include "src/vector.h"
vector_order_t cmp_int_asc(const void *x, const void *y) { vector_order_t cmp_int_asc(const void *x, const void *y) {
int x_int = *(const int*)x; const int x_int = *(const int*)x;
int y_int = *(const int*)y; const int y_int = *(const int*)y;
if (x_int < y_int) return VECTOR_ORDER_LT; if (x_int < y_int) return VECTOR_ORDER_LT;
if (x_int > y_int) return VECTOR_ORDER_GT; if (x_int > y_int) return VECTOR_ORDER_GT;