Compare commits

6 Commits

Author SHA1 Message Date
40d343c02b Updated documentation
All checks were successful
clang-build / clang-build (push) Successful in 36s
gcc-build / gcc-build (push) Successful in 18s
clang-build / clang-build (pull_request) Successful in 36s
gcc-build / gcc-build (pull_request) Successful in 21s
2026-02-26 09:46:48 +01:00
eb670e26a5 Improved bigint_printf method
All checks were successful
clang-build / clang-build (push) Successful in 37s
gcc-build / gcc-build (push) Successful in 18s
2026-02-26 09:36:46 +01:00
a02f2dff40 Added Knuth's "Algorithm D" from TAOCP "Seminumerical algorithms"
All checks were successful
clang-build / clang-build (push) Successful in 41s
gcc-build / gcc-build (push) Successful in 19s
2026-02-25 17:13:49 +01:00
ea9ef9de4b Updated documentation
All checks were successful
clang-build / clang-build (push) Successful in 36s
gcc-build / gcc-build (push) Successful in 8s
2026-01-23 17:06:13 +01:00
dd6e7a9c9e Updated documentation 2026-01-12 11:58:32 +01:00
6cd90467c6 General refactoring 2026-01-07 11:08:53 +01:00
13 changed files with 1589 additions and 1365 deletions

View File

@@ -52,7 +52,7 @@ $(OBJ_DIR):
mkdir -p $(OBJ_DIR) mkdir -p $(OBJ_DIR)
# Benchmark rules # Benchmark rules
$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_OBJ_DIR)/bigint.o
$(CC) $(BENCH_FLAGS) -o $@ $^ $(CC) $(BENCH_FLAGS) -o $@ $^
$(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR) $(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR)

View File

@@ -2,15 +2,15 @@
<h1>Datum</h1> <h1>Datum</h1>
<h6><i>Collection of dynamic and generic data structures.</i></h6> <h6><i>Collection of dynamic and generic data structures.</i></h6>
[![](https://github.com/ceticamarco/datum/actions/workflows/gcc-build.yml/badge.svg)](https://github.com/ceticamarco/datum/actions/workflows/gcc-build.yml) ![](https://git.marcocetica.com/marco/datum/actions/workflows/gcc-build.yml/badge.svg)
[![](https://github.com/ceticamarco/datum/actions/workflows/clang-build.yml/badge.svg)](https://github.com/ceticamarco/datum/actions/workflows/clang-build.yml) ![](https://git.marcocetica.com/marco/datum/actions/workflows/clang-build.yml/badge.svg)
</div> </div>
Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond
the standard library. It currently features: the standard library. It currently features:
- [**Vector**](/docs/vector.md): a growable, contiguous array of homogenous generic data types; - [**Vector**](/docs/vector.md): a growable, contiguous array of homogenous generic data types;
- [**Map**](/docs/map.md): an associative array that handles generic heterogenous data types; - [**Map**](/docs/map.md): an associative array of generic heterogenous data types;
- [**BigInt**](/docs/bigint.md): a data type for arbitrary large integers. - [**BigInt**](/docs/bigint.md): a data type for arbitrary large integers.
## Usage ## Usage
@@ -22,63 +22,35 @@ At its simplest, you can use this library as follows:
#include <stdio.h> #include <stdio.h>
#include "src/vector.h" #include "src/vector.h"
/* vector_order_t cmp_asc(const void *a, const void *b);
* Compile with: gcc main.c src/vector.c
* Output: First element: 1
* Head of vector: 16, size is now: 1
*/
// Callback functions
vector_order_t cmp_int_asc(const void *x, const void *y);
void square(void *element, void *env);
int is_even(const void *element, void *env); int is_even(const void *element, void *env);
void add(void *accumulator, const void *element, void *env);
/* Compile with: gcc main.c src/vector.c
* Output: '2 4'
*/
int main(void) { int main(void) {
// Create an integer vector of initial capacity equal to 5 vector_t *vec = vector_new(5, sizeof(int)).value.vector; // Create a vector of integers
vector_t *vec = vector_new(5, sizeof(int)).value.vector;
// Add some elements int nums[] = {5, 4, 1, 2, 3}; // Push some elements
vector_push(vec, &(int){1}); // Equivalent as below for (int idx = 0; idx < 5; idx++) { vector_push(vec, &nums[idx]); }
int nums[] = {5, 2, 4, 3};
for (int idx = 0; idx < 4; idx++) { vector_push(vec, &nums[idx]); }
// Sort array in ascending order: [1, 2, 3, 4, 5] vector_sort(vec, cmp_asc); // Sort vector
vector_sort(vec, cmp_int_asc); vector_filter(vec, is_even, NULL); // Filter even elements
// Print 1st element for (int idx = 0; idx < 2; idx++) {
const int first = *(int*)vector_get(vec, 0).value.element; printf("%d ", *(int *)vector_get(vec, idx).value.element);
printf("First element: %d\n", first); }
putchar('\n');
int sum = 0;
vector_map(vec, square, NULL); // Square elements: [1, 2, 3, 4, 5] -> [1, 4, 9, 16, 25]
vector_filter(vec, is_even, NULL); // Filter even elements: [1, 4, 9, 16, 25] -> [4, 16]
vector_reduce(vec, &sum, add, NULL); // Sum elements: [4, 16] -> 20
// Pop second element using LIFO policy
const int head = *(int*)vector_pop(vec).value.element;
printf("Head of vector: %d, size is now: %zu\n", head, vector_size(vec));
// Remove vector from memory
vector_destroy(vec);
vector_destroy(vec); // Remove vector from memory
return 0; return 0;
} }
vector_order_t cmp_int_asc(const void *x, const void *y) { vector_order_t cmp_asc(const void *a, const void *b) {
int x_int = *(const int*)x; const int x = *(int *)a, y = *(int *)b;
int y_int = *(const int*)y;
if (x_int < y_int) return VECTOR_ORDER_LT; if (x < y) return VECTOR_ORDER_LT;
if (x_int > y_int) return VECTOR_ORDER_GT; return (x > y) ? VECTOR_ORDER_GT : VECTOR_ORDER_EQ;
return VECTOR_ORDER_EQ;
}
void square(void *element, void *env) {
(void)(env);
int *value = (int*)element;
*value = (*value) * (*value);
} }
int is_even(const void *element, void *env) { int is_even(const void *element, void *env) {
@@ -87,11 +59,6 @@ int is_even(const void *element, void *env) {
return (value % 2) == 0; return (value % 2) == 0;
} }
void add(void *accumulator, const void *element, void *env) {
(void)(env);
*(int*)accumulator += *(int*)element;
}
``` ```
### `Map` usage ### `Map` usage
@@ -143,9 +110,9 @@ int main(void) {
#include "src/bigint.h" #include "src/bigint.h"
/* /*
* Compile with: gcc -O3 main.c src/bigint.c src/vector.c * Compile with: clang -O3 fact.c src/bigint.c src/vector.c -o fact
* Output: 20000! = 1819206320230345134827641... * Output: 20000! = 1819206320230345134827641...
* Time: 4.01s user 0.00s system 99% cpu 4.021 total * Time: 1.49s user 0.00s system 99% cpu 1.501 total
*/ */
int main(void) { int main(void) {
const int n = 20000; const int n = 20000;
@@ -178,14 +145,11 @@ This will compile the library as well as the `usage.c` file, the unit tests and
> [!NOTE] > [!NOTE]
> This project is primarily developed for learning purposes and was not created with industrial > This project is primarily developed for learning purposes and was not created with industrial
> or production use in mind. As such, it is not intended to compete with any existing C library. > or production use in mind. As such, it is not intended to compete with any existing C library such as the
> In particular, the big number implementation does not aim to match the design, the maturity and
> the performance of established solutions such as the
> GNU Multiple Precision Arithmetic Library (GMP). > GNU Multiple Precision Arithmetic Library (GMP).
## Documentation ## Documentation
For additional details about this library (internal design, memory For additional details about this library (internal design, memory management, data ownership, etc.) go to the [docs folder](/docs).
management, data ownership, etc.) go to the [docs folder](/docs).
## Unit tests ## Unit tests
Datum provides some unit tests for `Vector`, `Map` and `BigInt`. To run them, you can issue the following commands: Datum provides some unit tests for `Vector`, `Map` and `BigInt`. To run them, you can issue the following commands:
@@ -201,9 +165,11 @@ $ ./test_bigint
Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector` and the `Map` data structures. You can run it by issuing the following command: Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector` and the `Map` data structures. You can run it by issuing the following command:
```sh ```sh
$ make clean all CC=clang
$ ./benchmark_datum $ ./benchmark_datum
Computing Vector average time...average time: 18 ms omputing Vector average time...average time: 8 ms
Computing Map average time...average time: 31 ms Computing Map average time...average time: 53 ms
Computing BigInt average time...average time: 76 ms
``` ```

View File

@@ -6,6 +6,7 @@
#include "../src/vector.h" #include "../src/vector.h"
#include "../src/map.h" #include "../src/map.h"
#include "../src/bigint.h"
typedef void (*test_fn_t)(size_t iterations); typedef void (*test_fn_t)(size_t iterations);
@@ -22,11 +23,6 @@ void test_vector(size_t iterations) {
sum += *val; sum += *val;
} }
// Another trick to prevent compiler optimization
if (sum == 0xB00B5) {
printf("sum = %llu\n", (unsigned long long)sum);
}
vector_destroy(vec); vector_destroy(vec);
} }
@@ -53,32 +49,99 @@ void test_map(size_t iterations) {
// Cleanup values // Cleanup values
for (size_t idx = 0; idx < map->capacity; idx++) { for (size_t idx = 0; idx < map->capacity; idx++) {
if (map->elements[idx].state == ENTRY_OCCUPIED) { snprintf(key, sizeof(key), "key_%zu", idx);
int *val = (int*)map->elements[idx].value;
int *val = (int *)map_get(map, key).value.element;
free(val); free(val);
}
map_remove(map, key);
} }
map_destroy(map); map_destroy(map);
} }
long long benchmark(test_fn_t fun, size_t iterations, size_t runs) { void test_bigint(size_t iterations) {
long long total = 0; volatile uint64_t accumulator = 0;
for (size_t idx = 0; idx < runs; idx++) {
clock_t start = clock();
fun(iterations);
clock_t end = clock();
total += (long long)((end - start) * 1000 / CLOCKS_PER_SEC); for (size_t idx = 1; idx <= iterations; idx++) {
long long a_val = (long long)idx * 123456789LL;
long long b_val = (long long)idx * 17777LL;
bigint_result_t a_res = bigint_from_int(a_val);
bigint_result_t b_res = bigint_from_int(b_val);
if (a_res.status != BIGINT_OK || b_res.status != BIGINT_OK) {
bigint_destroy(a_res.value.number);
bigint_destroy(b_res.value.number);
continue;
} }
return total / runs; bigint_t *a = a_res.value.number;
bigint_t *b = b_res.value.number;
// Addition
bigint_result_t add_res = bigint_add(a, b);
if (add_res.status == BIGINT_OK) {
vector_result_t v = vector_get(add_res.value.number->digits, 0);
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
bigint_destroy(add_res.value.number);
}
// Substraction
bigint_result_t sub_res = bigint_sub(a, b);
if (sub_res.status == BIGINT_OK) {
vector_result_t v = vector_get(sub_res.value.number->digits, 0);
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
bigint_destroy(sub_res.value.number);
}
// Multiplication
bigint_result_t mul_res = bigint_prod(a, b);
if (mul_res.status == BIGINT_OK) {
vector_result_t v = vector_get(mul_res.value.number->digits, 0);
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
bigint_destroy(mul_res.value.number);
}
// Division
bigint_result_t div_res = bigint_divmod(a, b);
if (div_res.status == BIGINT_OK) {
vector_result_t v = vector_get(div_res.value.division.quotient->digits, 0);
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
bigint_destroy(div_res.value.division.quotient);
bigint_destroy(div_res.value.division.remainder);
}
bigint_destroy(a); bigint_destroy(b);
}
}
static inline uint64_t now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
long long benchmark(test_fn_t fun, size_t iterations, size_t runs) {
long long total = 0;
for (size_t idx = 0; idx < runs; idx++) {
uint64_t start = now_ns();
fun(iterations);
uint64_t end = now_ns();
total += (end - start);
}
return (long long)(total / runs / 1000000);
} }
int main(void) { int main(void) {
// Do a warmup run // Do a warmup run
test_vector(1000); test_vector(1000);
test_map(1000); test_map(1000);
test_bigint(1000);
printf("Computing Vector average time..."); printf("Computing Vector average time...");
fflush(stdout); fflush(stdout);
@@ -88,5 +151,9 @@ int main(void) {
fflush(stdout); fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30)); printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30));
printf("Computing BigInt average time...");
fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_bigint, 1e5, 30));
return 0; return 0;
} }

View File

@@ -33,20 +33,21 @@ and the boolean `is_negative` variable denotes its sign.
The `BigInt` data structure supports the following methods: The `BigInt` data structure supports the following methods:
- `bigint_result_t bigint_from_int(value)`: create a big integer from a primitive `int` type; - `bigint_result_t bigint_from_int(value)`: creates a big integer from a primitive `int` type;
- `bigint_result_t bigint_from_string(string_num)`: create a big integer from a C string; - `bigint_result_t bigint_from_string(string_num)`: creates a big integer from a C string;
- `bigint_result_t bigint_to_string(number)`: convert a big integer to a C string; - `bigint_result_t bigint_to_string(number)`: converts a big integer to a C string;
- `bigint_result_t bigint_clone(number)`: clone a big integer; - `bigint_result_t bigint_clone(number)`: clones a big integer;
- `bigint_result_t bigint_compare(x, y)`: compare two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively; - `bigint_result_t bigint_compare(x, y)`: compares two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively;
- `bigint_result_t bigint_add(x, y)`: add two big integers together in $\mathcal{O}(n)$; - `bigint_result_t bigint_add(x, y)`: adds two big integers together in $\mathcal{O}(n)$;
- `bigint_result_t bigint_sub(x, y)`: subtract two big integers in $\mathcal{O}(n)$; - `bigint_result_t bigint_sub(x, y)`: subtracts two big integers in $\mathcal{O}(n)$;
- `bigint_result_t bigint_prod(x, y)`: multiply two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$; - `bigint_result_t bigint_prod(x, y)`: multiplies two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$;
- `bigint_result_t bigint_divmod(x, y)`: divide two big integers using *long division* algorithm in $\mathcal{O}(n^2)$, returning both the quotient and the remainder; - `bigint_result_t bigint_divmod(x, y)`: divides two big integers using _Knuth's Algorithm D_ in $\mathcal{O}(n \times m)$ where $n$ and $m$ are the number of base-10^9
- `bigint_result_t bigint_mod(x, y)`: computes modulo of two big integers using *long division* algorithm in $\mathcal{O}(n^2)$; parts/limbs in the divisor and the quotient, respectively. This method returns both the quotient and the remainder;
- `bigint_result_t bigint_destroy(number)`: delete the big number; - `bigint_result_t bigint_mod(x, y)`: calls `bigint_divmod`, discards the quotient and yields the remainder;
- `bigint_result_t bigint_destroy(number)`: deletes the big number;
- `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters. - `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters.
As you can see by the previous function signatures, methods that operate on the As you can see from the previous function signatures, methods that operate on the
`BigInt` data type return a custom type called `bigint_result_t` which is defined as `BigInt` data type return a custom type called `bigint_result_t` which is defined as
follows: follows:
@@ -80,7 +81,7 @@ by setting the `status` field and by providing a descriptive message on the `mes
field. If the operation was successful (that is, `status == BIGINT_OK`), you can either field. If the operation was successful (that is, `status == BIGINT_OK`), you can either
move on with the rest of the program or read the returned value from the sum data type. move on with the rest of the program or read the returned value from the sum data type.
Of course, you can choose to ignore the return value (if you're brave enough :D) as Of course, you can choose to ignore the return value (if you're brave enough :D) as
illustrated in the first part of the README. illustrated on the first part of the README.
The sum data type (i.e., the `value` union) defines four different variables. Each The sum data type (i.e., the `value` union) defines four different variables. Each
of them has an unique scope as described below: of them has an unique scope as described below:
@@ -90,12 +91,3 @@ of them has an unique scope as described below:
- `compare_status`: result of `bigint_compare`; - `compare_status`: result of `bigint_compare`;
- `string_num`: result of `bigint_to_string`. - `string_num`: result of `bigint_to_string`.
> [!IMPORTANT]
> Currently, the division implementation employs a quadratic-time algorithm derived from the conventional _"grade school"_ long-division method.
> This approach performs adequately for integers of modest size (up to approximately 200 digits) but becomes highly inefficient when handling
> substantially larger integers (~1500 digits).
>
> Improving the efficiency of this algorithm would require further research into advanced
> numerical algorithms, which is something that I currently not inclined to pursue.

View File

@@ -5,7 +5,7 @@ aspects (internal design, memory layout, etc.) of the `Map` data structure.
`Map` is an hash table that uses open addressing with linear probing for collision `Map` is an hash table that uses open addressing with linear probing for collision
resolution and the [FNV-1a algorithm](https://en.wikipedia.org/wiki/FowlerNollVo_hash_function) as its hashing function. Resizing is performed resolution and the [FNV-1a algorithm](https://en.wikipedia.org/wiki/FowlerNollVo_hash_function) as its hashing function. Resizing is performed
automatically by doubling the capacity when the load factor exceeds 75%. Internally, automatically by doubling the capacity when the load factor exceeds 75%. Internally,
this data structure is represented by the following two structures: this data structure is represented by the following two layouts:
```c ```c
typedef struct { typedef struct {
@@ -37,16 +37,16 @@ free them before removing the keys or destroying the map.
The `Map` data structure supports the following methods: The `Map` data structure supports the following methods:
- `map_result_t map_new()`: initialize a new map; - `map_result_t map_new()`: initializes a new map;
- `map_result_t map_add(map, key, value)`: add a `(key, value)` pair to the map; - `map_result_t map_add(map, key, value)`: adds a `(key, value)` pair to the map;
- `map_result_t map_get(map, key)`: retrieve a values indexed by `key` if it exists; - `map_result_t map_get(map, key)`: retrieves a values indexed by `key` if it exists;
- `map_result_t map_remove(map, key)`: remove a key from the map if it exists; - `map_result_t map_remove(map, key)`: removes a key from the map if it exists;
- `map_result_t map_clear(map)`: reset the map state; - `map_result_t map_clear(map)`: resets the map state;
- `map_result_t map_destroy(map)`: delete the map; - `map_result_t map_destroy(map)`: deletes the map;
- `size_t map_size(map)`: returns map size (i.e., the number of elements); - `size_t map_size(map)`: returns map size (i.e., the number of elements);
- `size_t map_capacity(map)`: returns map capacity (i.e., map total size). - `size_t map_capacity(map)`: returns map capacity (i.e., map total size).
As you can see by the previous function signatures, most methods that operate As you can see from the previous function signatures, most methods that operate
on the `Map` data type return a custom type called `map_result_t` which is on the `Map` data type return a custom type called `map_result_t` which is
defined as follows: defined as follows:
@@ -73,4 +73,4 @@ Each method that returns such type indicates whether the operation was successfu
the `status` field and by providing a descriptive message on the `message` field. If the operation was the `status` field and by providing a descriptive message on the `message` field. If the operation was
successful (that is, `status == MAP_OK`), you can either move on with the rest of the program or read successful (that is, `status == MAP_OK`), you can either move on with the rest of the program or read
the returned value from the sum data type. Of course, you can choose to ignore the return value (if you're brave enough :D) as illustrated the returned value from the sum data type. Of course, you can choose to ignore the return value (if you're brave enough :D) as illustrated
in the first part of the README. on the first part of the README.

View File

@@ -5,7 +5,7 @@ aspects (internal design, memory layout, etc.) of the `Vector` data structure.
`Vector` is a dynamic array with generic data type support; this means that you can store `Vector` is a dynamic array with generic data type support; this means that you can store
any kind of homogenous value on this data structure. Resizing is performed automatically any kind of homogenous value on this data structure. Resizing is performed automatically
by increasing the capacity by 1.5 times when the array becomes full. Internally, this by increasing the capacity by 1.5 times when the array becomes full. Internally, this
data structure is represented by the following structure: data structure is represented by the following layout:
```c ```c
typedef struct { typedef struct {
@@ -25,21 +25,21 @@ deletion.
At the time being, `Vector` supports the following methods: At the time being, `Vector` supports the following methods:
- `vector_result_t vector_new(size, data_size)`: create a new vector; - `vector_result_t vector_new(size, data_size)`: creates a new vector;
- `vector_result_t vector_push(vector, value)`: add a new value to the vector; - `vector_result_t vector_push(vector, value)`: adds a new value to the vector;
- `vector_result_t vector_set(vector, index, value)`: update the value of a given index if it exists; - `vector_result_t vector_set(vector, index, value)`: updates the value of a given index if it exists;
- `vector_result_t vector_get(vector, index)`: return the value indexed by `index` if it exists; - `vector_result_t vector_get(vector, index)`: returns the value indexed by `index` if it exists;
- `map_result_t vector_sort(map, cmp)`: sort array using `cmp` function; - `vector_result_t vector_sort(vector, cmp)`: sorts vector using `cmp` function;
- `vector_result_t vector_pop(vector)`: pop last element from the vector following the LIFO policy; - `vector_result_t vector_pop(vector)`: pops last element from the vector following the LIFO policy;
- `vector_result_t vector_map(vector, callback, env)`: apply `callback` function to vector (in-place); - `vector_result_t vector_map(vector, callback, env)`: applies `callback` function to vector (in-place);
- `vector_result_t vector_filter(vector, callback, env)`: filter vector using `callback` (in-place); - `vector_result_t vector_filter(vector, callback, env)`: filters vector using `callback` (in-place);
- `vector_result_t vector_reduce(vector, accumulator, callback, env)`: fold/reduce vector using `callback`; - `vector_result_t vector_reduce(vector, accumulator, callback, env)`: folds/reduces vector using `callback`;
- `vector_result_t vector_clear(vector)`: logically reset the vector. That is, new pushes will overwrite the memory; - `vector_result_t vector_clear(vector)`: resets the vector logically. That is, new pushes will overwrite the memory;
- `vector_result_t vector_destroy(vector)`: delete the vector; - `vector_result_t vector_destroy(vector)`: deletes the vector;
- `size_t vector_size(vector)`: return vector size (i.e., the number of elements); - `size_t vector_size(vector)`: returns vector size (i.e., the number of elements);
- `size_t vector_capacity(vector)`: return vector capacity (i.e., vector total size). - `size_t vector_capacity(vector)`: returns vector capacity (i.e., vector total size).
As you can see by the previous function signatures, most methods that operate As you can see from the previous function signatures, most methods that operate
on the `Vector` data type return a custom type called `vector_result_t` which is on the `Vector` data type return a custom type called `vector_result_t` which is
defined as follows: defined as follows:
@@ -66,7 +66,7 @@ Each method that returns such type indicates whether the operation was successfu
by setting the `status` field and by providing a descriptive message on the `message` by setting the `status` field and by providing a descriptive message on the `message`
field. If the operation was successful (that is, `status == VECTOR_OK`), you can either field. If the operation was successful (that is, `status == VECTOR_OK`), you can either
move on with the rest of the program or read the returned value from the sum data type. Of course, you can choose to move on with the rest of the program or read the returned value from the sum data type. Of course, you can choose to
ignore the return value (if you're brave enough :D) as illustrated in the first part of the README. ignore the return value (if you're brave enough :D) as illustrated on the first part of the README.
## Functional methods ## Functional methods
`Vector` provides three functional methods called `map`, `filter` and `reduce` which allow the caller to apply a computation to the vector, `Vector` provides three functional methods called `map`, `filter` and `reduce` which allow the caller to apply a computation to the vector,
@@ -85,14 +85,80 @@ In particular, you should be aware of the following design choices:
- The `vector_reduce` callback method requires the caller to initialize an _"accumulator"_ variable before calling this method; - The `vector_reduce` callback method requires the caller to initialize an _"accumulator"_ variable before calling this method;
- The `vector_filter` callback method is expected to return non-zero to keep the element and zero to filter it out. - The `vector_filter` callback method is expected to return non-zero to keep the element and zero to filter it out.
- The `env` argument is an optional parameter to pass the external environment to the callback function. It is used to mock the behavior of closures, where - The `env` argument is an optional parameter to pass the external environment to the callback function. It is used to mock the behavior of closures, where
the lexical environment is captured when the closure is created. the lexical environment is captured when the closure is created;
- Callback functions must be self-contained and handle all their resources. Additionally, they are responsible for ensuring their operations
don't cause any undefined behavior.
Let's look at an example:
```c
#include <stdio.h>
#include "src/vector.h"
// Callback functions
void square(void *element, void *env);
int is_even(const void *element, void *env);
void add(void *accumulator, const void *element, void *env);
int main(void) {
// Create an integer vector of initial capacity equal to 5
vector_t *vec = vector_new(5, sizeof(int)).value.vector;
int nums[] = {1, 2, 3, 4, 5};
for (int idx = 0; idx < 5; idx++) {
vector_push(vec, &nums[idx]);
}
// Square elements: [1, 2, 3, 4, 5] -> [1, 4, 9, 16, 25]
vector_map(vec, square, NULL);
for (int idx = 0; idx < 5; idx++) {
printf("%d ", *(int *)vector_get(vec, idx).value.element);
}
putchar('\n');
// Filter even elements: [1, 4, 9, 16, 25] -> [4, 16]
vector_filter(vec, is_even, NULL);
for (int idx = 0; idx < 2; idx++) {
printf("%d ", *(int *)vector_get(vec, idx).value.element);
}
putchar('\n');
// Sum elements: [4, 16] -> 20
int sum = 0;
vector_reduce(vec, &sum, add, NULL);
printf("%d\n", sum);
vector_destroy(vec);
return 0;
}
void square(void *element, void *env) {
(void)(env);
int *value = (int*)element;
*value = (*value) * (*value);
}
int is_even(const void *element, void *env) {
(void)(env);
int value = *(int*)element;
return (value % 2) == 0;
}
void add(void *accumulator, const void *element, void *env) {
(void)(env);
*(int*)accumulator += *(int*)element;
}
```
## Sorting ## Sorting
As indicated in the [its documentation](/docs/vector.md), the `Vector` data type As indicated in the [its documentation](/docs/vector.md), the `Vector` data type
provides an efficient in-place sorting function called `vector_sort` that uses provides an efficient in-place sorting function called `vector_sort` that uses
a builtin implementation of the [Quicksort algorithm](https://en.wikipedia.org/wiki/Quicksort). This method requires an user-defined comparison procedure which allows the a builtin implementation of the [Quicksort algorithm](https://en.wikipedia.org/wiki/Quicksort). This method requires an user-defined comparison procedure which allows the
caller to customize the sorting behavior. The comparison procedure must adhere to the caller to customize the sorting behavior.
following specification:
The comparison procedure must adhere to the following specification:
1. Must return `vector_order_t`, which is defined as follows: 1. Must return `vector_order_t`, which is defined as follows:
@@ -107,7 +173,7 @@ typedef enum {
and indicates the ordering relationship between any two elements. and indicates the ordering relationship between any two elements.
2. Must accept two `const void*` parameters representing two elements to compare; 2. Must accept two `const void*` parameters representing two elements to compare;
3. Must be self-contained and handle all its own resources. 3. Must be self-contained and handle all its resources. Additionally, it's responsible for ensuring its operations don't cause any undefined behavior.
Let's look at some examples. For instance, let's say that we want to sort an array Let's look at some examples. For instance, let's say that we want to sort an array
of integers in ascending and descending order: of integers in ascending and descending order:
@@ -117,8 +183,8 @@ of integers in ascending and descending order:
#include "src/vector.h" #include "src/vector.h"
vector_order_t cmp_int_asc(const void *x, const void *y) { vector_order_t cmp_int_asc(const void *x, const void *y) {
int x_int = *(const int*)x; const int x_int = *(const int*)x;
int y_int = *(const int*)y; const int y_int = *(const int*)y;
if (x_int < y_int) return VECTOR_ORDER_LT; if (x_int < y_int) return VECTOR_ORDER_LT;
if (x_int > y_int) return VECTOR_ORDER_GT; if (x_int > y_int) return VECTOR_ORDER_GT;

File diff suppressed because it is too large Load Diff

View File

@@ -10,19 +10,13 @@
#include "map.h" #include "map.h"
// Internal methods
static uint64_t hash_key(const char *key);
static size_t map_insert_index(const map_t *map, const char *key);
static size_t map_find_index(const map_t *map, const char *key);
static map_result_t map_resize(map_t *map);
/** /**
* hash_key * hash_key
* @key: The input string for the hash function * @key: The input string for the hash function
* *
* Returns the digest of @key using the Fowler-Noll-Vo hashing algorithm * Returns the digest of @key using the Fowler-Noll-Vo hashing algorithm
*/ */
uint64_t hash_key(const char *key) { static uint64_t hash_key(const char *key) {
uint64_t hash = FNV_OFFSET_BASIS_64; uint64_t hash = FNV_OFFSET_BASIS_64;
while (*key) { while (*key) {
@@ -33,43 +27,6 @@ uint64_t hash_key(const char *key) {
return hash; return hash;
} }
/**
* map_new
*
* Returns a map_result_t data type containing a new hash map
*/
map_result_t map_new(void) {
map_result_t result = {0};
map_t *map = malloc(sizeof(map_t));
if (map == NULL) {
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map");
return result;
}
map->elements = calloc(INITIAL_CAP, sizeof(map_element_t));
if (map->elements == NULL) {
free(map);
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map elements");
return result;
}
// Initialize map
map->capacity = INITIAL_CAP;
map->size = 0;
map->tombstone_count = 0;
result.status = MAP_OK;
SET_MSG(result, "Map successfully created");
result.value.map = map;
return result;
}
/** /**
* map_insert_index * map_insert_index
* @map: a non-null map * @map: a non-null map
@@ -80,7 +37,7 @@ map_result_t map_new(void) {
* *
* Returns the index of available slot or SIZE_MAX otherwise * Returns the index of available slot or SIZE_MAX otherwise
*/ */
size_t map_insert_index(const map_t *map, const char *key) { static size_t map_insert_index(const map_t *map, const char *key) {
const uint64_t key_digest = hash_key(key); const uint64_t key_digest = hash_key(key);
size_t idx = key_digest % map->capacity; size_t idx = key_digest % map->capacity;
size_t delete_tracker = map->capacity; // Fallback index size_t delete_tracker = map->capacity; // Fallback index
@@ -113,7 +70,7 @@ size_t map_insert_index(const map_t *map, const char *key) {
* *
* Returns a a map_result_t data type containing the status * Returns a a map_result_t data type containing the status
*/ */
map_result_t map_resize(map_t *map) { static map_result_t map_resize(map_t *map) {
map_result_t result = {0}; map_result_t result = {0};
const size_t old_capacity = map->capacity; const size_t old_capacity = map->capacity;
@@ -174,6 +131,43 @@ map_result_t map_resize(map_t *map) {
return result; return result;
} }
/**
* map_new
*
* Returns a map_result_t data type containing a new hash map
*/
map_result_t map_new(void) {
map_result_t result = {0};
map_t *map = malloc(sizeof(map_t));
if (map == NULL) {
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map");
return result;
}
map->elements = calloc(INITIAL_CAP, sizeof(map_element_t));
if (map->elements == NULL) {
free(map);
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map elements");
return result;
}
// Initialize map
map->capacity = INITIAL_CAP;
map->size = 0;
map->tombstone_count = 0;
result.status = MAP_OK;
SET_MSG(result, "Map successfully created");
result.value.map = map;
return result;
}
/** /**
* map_add * map_add
* @map: a non-null map * @map: a non-null map

View File

@@ -9,11 +9,112 @@
#include "vector.h" #include "vector.h"
// Internal methods /**
static vector_result_t vector_resize(vector_t *vector); * vector_resize
static void swap(void *x, void *y, size_t size); * @vector: a non-null vector
static size_t partition(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp); *
static void quicksort(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp); * Increases the size of @vector
*
* Returns a vector_result_t data type containing the status
*/
static vector_result_t vector_resize(vector_t *vector) {
vector_result_t result = {0};
const size_t old_capacity = vector->capacity;
const size_t new_capacity = old_capacity > 0 ? old_capacity * 2 : 1;
// Check for stack overflow errors
if (new_capacity > SIZE_MAX / vector->data_size) {
result.status = VECTOR_ERR_OVERFLOW;
SET_MSG(result, "Exceeded maximum size while resizing vector");
return result;
}
void *new_elements = realloc(vector->elements, new_capacity * vector->data_size);
if (new_elements == NULL) {
result.status = VECTOR_ERR_ALLOCATE;
SET_MSG(result, "Failed to reallocate memory for vector");
return result;
}
vector->elements = new_elements;
vector->capacity = new_capacity;
result.status = VECTOR_OK;
SET_MSG(result, "Vector successfully resized");
return result;
}
/**
* swap
* @x: first element
* @y: second element
*
* Swaps @x and @y
*/
static void swap(void *x, void *y, size_t size) {
uint8_t temp[size];
memcpy(temp, x, size);
memcpy(x, y, size);
memcpy(y, temp, size);
}
/**
* partition
* @base: the array/partition
* @low: lower index
* @high: higher index
* @size: data size
* @cmp: comparison function
*
* Divides an array into two partitions
*
* Returns the pivot index
*/
static size_t partition(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp) {
uint8_t *arr = (uint8_t*)base;
void *pivot = arr + (high * size);
size_t i = low;
for (size_t j = low; j < high; j++) {
vector_order_t order = cmp(arr + (j * size), pivot);
if (order == VECTOR_ORDER_LT || order == VECTOR_ORDER_EQ) {
swap(arr + (i * size), arr + (j * size), size);
i++;
}
}
swap(arr + (i * size), arr + (high * size), size);
return i;
}
/**
* quicksort
* @base: the base array/partition
* @low: lower index
* @high: higher index
* @size: data size
* @cmp: comparision function
*
* Recursively sorts an array/partition using the Quicksort algorithm
*/
static void quicksort(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp) {
if (low < high) {
const size_t pivot = partition(base, low, high, size, cmp);
if (pivot > 0) {
quicksort(base, low, pivot - 1, size, cmp);
}
quicksort(base, pivot + 1, high, size, cmp);
}
}
/** /**
* vector_new * vector_new
@@ -61,112 +162,6 @@ vector_result_t vector_new(size_t size, size_t data_size) {
return result; return result;
} }
/**
* vector_resize
* @vector: a non-null vector
*
* Increases the size of @vector
*
* Returns a vector_result_t data type containing the status
*/
vector_result_t vector_resize(vector_t *vector) {
vector_result_t result = {0};
const size_t old_capacity = vector->capacity;
const size_t new_capacity = old_capacity > 0 ? old_capacity * 2 : 1;
// Check for stack overflow errors
if (new_capacity > SIZE_MAX / vector->data_size) {
result.status = VECTOR_ERR_OVERFLOW;
SET_MSG(result, "Exceeded maximum size while resizing vector");
return result;
}
void *new_elements = realloc(vector->elements, new_capacity * vector->data_size);
if (new_elements == NULL) {
result.status = VECTOR_ERR_ALLOCATE;
SET_MSG(result, "Failed to reallocate memory for vector");
return result;
}
vector->elements = new_elements;
vector->capacity = new_capacity;
result.status = VECTOR_OK;
SET_MSG(result, "Vector successfully resized");
return result;
}
/**
* swap
* @x: first element
* @y: second element
*
* Swaps @x and @y
*/
void swap(void *x, void *y, size_t size) {
uint8_t temp[size];
memcpy(temp, x, size);
memcpy(x, y, size);
memcpy(y, temp, size);
}
/**
* partition
* @base: the array/partition
* @low: lower index
* @high: higher index
* @size: data size
* @cmp: comparison function
*
* Divides an array into two partitions
*
* Returns the pivot index
*/
size_t partition(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp) {
uint8_t *arr = (uint8_t*)base;
void *pivot = arr + (high * size);
size_t i = low;
for (size_t j = low; j < high; j++) {
vector_order_t order = cmp(arr + (j * size), pivot);
if (order == VECTOR_ORDER_LT || order == VECTOR_ORDER_EQ) {
swap(arr + (i * size), arr + (j * size), size);
i++;
}
}
swap(arr + (i * size), arr + (high * size), size);
return i;
}
/**
* quicksort
* @base: the base array/partition
* @low: lower index
* @high: higher index
* @size: data size
* @cmp: comparision function
*
* Recursively sorts an array/partition using the Quicksort algorithm
*/
void quicksort(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp) {
if (low < high) {
const size_t pivot = partition(base, low, high, size, cmp);
if (pivot > 0) {
quicksort(base, low, pivot - 1, size, cmp);
}
quicksort(base, pivot + 1, high, size, cmp);
}
}
/** /**
* vector_push * vector_push
* @vector: a non-null vector * @vector: a non-null vector

View File

@@ -213,8 +213,8 @@ void test_bigint_prod_neg(void) {
bigint_destroy(prod.value.number); bigint_destroy(prod.value.number);
} }
// Test division between big numbers // Test division between big numbers where divisor is a single limb big number
void test_bigint_div(void) { void test_bigint_div_single_limb(void) {
bigint_result_t x = bigint_from_int(100); bigint_result_t x = bigint_from_int(100);
bigint_result_t y = bigint_from_int(2); bigint_result_t y = bigint_from_int(2);
@@ -229,11 +229,33 @@ void test_bigint_div(void) {
bigint_eq(quotient, "50"); bigint_eq(quotient, "50");
bigint_eq(remainder, "0"); bigint_eq(remainder, "0");
bigint_destroy(quotient); bigint_destroy(quotient); bigint_destroy(remainder);
bigint_destroy(remainder); bigint_destroy(x.value.number); bigint_destroy(y.value.number);
}
bigint_destroy(x.value.number); // Test division between big numbers using Knuth's algorithm
bigint_destroy(y.value.number); void test_bigint_div_knuth(void) {
// (1...9) x 8
const char *x_origin = "123456789123456789123456789123456789123456789123456789123456789123456789";
// (9...1) x 5
const char *y_origin = "987654321987654321987654321987654321987654321";
bigint_result_t x = bigint_from_string(x_origin);
bigint_result_t y = bigint_from_string(y_origin);
assert(x.status == BIGINT_OK && y.status == BIGINT_OK);
bigint_result_t div = bigint_divmod(x.value.number, y.value.number);
assert(div.status == BIGINT_OK);
bigint_t* const quotient = div.value.division.quotient;
bigint_t* const remainder = div.value.division.remainder;
bigint_eq(quotient, "124999998860937500014238281");
bigint_eq(remainder, "246737799246737799370194588370194588370194588");
bigint_destroy(quotient); bigint_destroy(remainder);
bigint_destroy(x.value.number); bigint_destroy(y.value.number);
} }
// Test division between big numbers with negative dividend // Test division between big numbers with negative dividend
@@ -262,7 +284,7 @@ void test_bigint_div_dividend(void) {
// Test division between big numbers with negative divisor // Test division between big numbers with negative divisor
// This library follows C-style divison such that sign(remainder) = sign(dividend) // This library follows C-style divison such that sign(remainder) = sign(dividend)
void test_bigint_div_divisor(void) { void test_bigint_div_neg_divisor(void) {
bigint_result_t x = bigint_from_int(13); bigint_result_t x = bigint_from_int(13);
bigint_result_t y = bigint_from_int(-4); bigint_result_t y = bigint_from_int(-4);
@@ -405,9 +427,10 @@ int main(void) {
TEST(bigint_very_large_prod); TEST(bigint_very_large_prod);
TEST(bigint_prod_mixed); TEST(bigint_prod_mixed);
TEST(bigint_prod_neg); TEST(bigint_prod_neg);
TEST(bigint_div); TEST(bigint_div_single_limb);
TEST(bigint_div_knuth);
TEST(bigint_div_dividend); TEST(bigint_div_dividend);
TEST(bigint_div_divisor); TEST(bigint_div_neg_divisor);
TEST(bigint_div_neg); TEST(bigint_div_neg);
TEST(bigint_div_by_zero); TEST(bigint_div_by_zero);
TEST(bigint_clone); TEST(bigint_clone);

View File

@@ -495,7 +495,7 @@ int bigint_usage(void) {
// Print result // Print result
bigint_printf("multiplication result = %B\n", prod); bigint_printf("multiplication result = %B\n", prod);
bigint_t *a = bigint_from_string(x_origin).value.number; bigint_t *a = bigint_from_string(large_x).value.number;
bigint_t *b = bigint_from_string(y_origin).value.number; bigint_t *b = bigint_from_string(y_origin).value.number;
// Divide two big integers // Divide two big integers