Compare commits

..

11 Commits

17 changed files with 2891 additions and 1215 deletions

View File

@@ -16,7 +16,7 @@ jobs:
- name: Run unit tests - name: Run unit tests
run: | run: |
./test_vector && ./test_map && ./test_bigint ./test_vector && ./test_map && ./test_bigint && ./test_string
- name: Run benchmarks - name: Run benchmarks
run: | run: |

View File

@@ -13,7 +13,7 @@ jobs:
- name: Run unit tests - name: Run unit tests
run: | run: |
./test_vector && ./test_map && ./test_bigint ./test_vector && ./test_map && ./test_bigint && ./test_string
- name: Run benchmarks - name: Run benchmarks
run: | run: |

View File

@@ -1,7 +1,7 @@
CC = gcc CC = gcc
CFLAGS = -Wall -Wextra -Werror -pedantic-errors -fstack-protector-strong \ CFLAGS = -Wall -Wextra -Werror -pedantic-errors -fstack-protector-strong \
-fsanitize=address -fsanitize=undefined -fstack-clash-protection \ -fsanitize=address -fsanitize=undefined -fstack-clash-protection \
-Wwrite-strings -g -std=c99 -Wwrite-strings -g -std=c99
BENCH_FLAGS = -Wall -Wextra -Werror -O3 BENCH_FLAGS = -Wall -Wextra -Werror -O3
@@ -17,14 +17,15 @@ TARGET = usage
TEST_V_TARGET = test_vector TEST_V_TARGET = test_vector
TEST_M_TARGET = test_map TEST_M_TARGET = test_map
TEST_B_TARGET = test_bigint TEST_B_TARGET = test_bigint
TEST_S_TARGET = test_string
BENCH_TARGET = benchmark_datum BENCH_TARGET = benchmark_datum
LIB_OBJS = $(OBJ_DIR)/vector.o $(OBJ_DIR)/map.o $(OBJ_DIR)/bigint.o LIB_OBJS = $(OBJ_DIR)/vector.o $(OBJ_DIR)/map.o $(OBJ_DIR)/bigint.o $(OBJ_DIR)/string.o
PROG_OBJS = $(OBJ_DIR)/usage.o PROG_OBJS = $(OBJ_DIR)/usage.o
.PHONY: all clean .PHONY: all clean
all: $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(BENCH_TARGET) all: $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(TEST_S_TARGET) $(BENCH_TARGET)
bench: $(BENCH_TARGET) bench: $(BENCH_TARGET)
$(TARGET): $(PROG_OBJS) $(LIB_OBJS) $(TARGET): $(PROG_OBJS) $(LIB_OBJS)
@@ -39,6 +40,9 @@ $(TEST_M_TARGET): $(OBJ_DIR)/test_map.o $(OBJ_DIR)/map.o
$(TEST_B_TARGET): $(OBJ_DIR)/test_bigint.o $(OBJ_DIR)/bigint.o $(OBJ_DIR)/vector.o $(TEST_B_TARGET): $(OBJ_DIR)/test_bigint.o $(OBJ_DIR)/bigint.o $(OBJ_DIR)/vector.o
$(CC) $(CFLAGS) -o $@ $^ $(CC) $(CFLAGS) -o $@ $^
$(TEST_S_TARGET): $(OBJ_DIR)/test_string.o $(OBJ_DIR)/string.o
$(CC) $(CFLAGS) -o $@ $^
$(OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(OBJ_DIR) $(OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(OBJ_DIR)
$(CC) $(CFLAGS) -c -o $@ $< $(CC) $(CFLAGS) -c -o $@ $<
@@ -52,7 +56,7 @@ $(OBJ_DIR):
mkdir -p $(OBJ_DIR) mkdir -p $(OBJ_DIR)
# Benchmark rules # Benchmark rules
$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_OBJ_DIR)/string.o
$(CC) $(BENCH_FLAGS) -o $@ $^ $(CC) $(BENCH_FLAGS) -o $@ $^
$(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR) $(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR)
@@ -65,4 +69,4 @@ $(BENCH_OBJ_DIR):
mkdir -p $(BENCH_OBJ_DIR) mkdir -p $(BENCH_OBJ_DIR)
clean: clean:
rm -rf $(OBJ_DIR) $(BENCH_OBJ_DIR) $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(BENCH_TARGET) rm -rf $(OBJ_DIR) $(BENCH_OBJ_DIR) $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(TEST_S_TARGET) $(BENCH_TARGET)

View File

@@ -2,8 +2,9 @@
<h1>Datum</h1> <h1>Datum</h1>
<h6><i>Collection of dynamic and generic data structures.</i></h6> <h6><i>Collection of dynamic and generic data structures.</i></h6>
[![](https://github.com/ceticamarco/datum/actions/workflows/gcc-build.yml/badge.svg)](https://github.com/ceticamarco/datum/actions/workflows/gcc-build.yml) ![](https://git.marcocetica.com/marco/datum/actions/workflows/gcc-build.yml/badge.svg)
[![](https://github.com/ceticamarco/datum/actions/workflows/clang-build.yml/badge.svg)](https://github.com/ceticamarco/datum/actions/workflows/clang-build.yml) ![](https://git.marcocetica.com/marco/datum/actions/workflows/clang-build.yml/badge.svg)
</div> </div>
Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond
@@ -11,7 +12,8 @@ the standard library. It currently features:
- [**Vector**](/docs/vector.md): a growable, contiguous array of homogenous generic data types; - [**Vector**](/docs/vector.md): a growable, contiguous array of homogenous generic data types;
- [**Map**](/docs/map.md): an associative array that handles generic heterogenous data types; - [**Map**](/docs/map.md): an associative array that handles generic heterogenous data types;
- [**BigInt**](/docs/bigint.md): a data type for arbitrary large integers. - [**BigInt**](/docs/bigint.md): a data type for arbitrary large integers;
- [**String**](/docs/string.md): an immutable string type with partial UTF-8 support.
## Usage ## Usage
At its simplest, you can use this library as follows: At its simplest, you can use this library as follows:
@@ -167,6 +169,39 @@ int main(void) {
} }
``` ```
### `String` usage:
```c
#include <stdio.h>
#include "src/string.h"
/*
* Compile with: gcc -O3 main.c src/string.c
* Output: Final string: "Hello,World,😀" Splitted: ["Hello" "World" "😀" ]
*/
int main(void) {
string_t *x = string_new(" Hello, ").value.string;
string_t *x_trm = string_trim(x).value.string;
string_t *y = string_new("😀,dlroW").value.string;
string_t *y_rev = string_reverse(y).value.string;
string_t *str = string_concat(x_trm, y_rev).value.string;
string_t **strings = string_split(str, ",").value.split.strings;
printf("Final string: \"%s\" Splitted: [", str->data);
for (int idx = 0; idx < 3; idx++) { printf("\"%s\" ", strings[idx]->data); }
printf("]\n");
string_split_destroy(strings, 3); string_destroy(str);
string_destroy(x); string_destroy(y);
string_destroy(x_trm); string_destroy(y_rev);
return 0;
}
```
For a more exhaustive example, refer to the `usage.c` file. There, you will find a program with proper error management For a more exhaustive example, refer to the `usage.c` file. There, you will find a program with proper error management
and a sample usage for every available method. To run it, first issue the following command: and a sample usage for every available method. To run it, first issue the following command:
@@ -198,12 +233,14 @@ $ ./test_bigint
``` ```
## Benchmark ## Benchmark
Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector` and the `Map` data structures. You can run it by issuing the following command: Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector`, `Map` and the `String` data structures.
You can run it by issuing the following command:
```sh ```sh
$ ./benchmark_datum $ ./benchmark_datum
Computing Vector average time...average time: 18 ms Computing Vector average time...average time: 19 ms
Computing Map average time...average time: 31 ms Computing Map average time...average time: 55 ms
Computing String average time...average time: 24 ms
``` ```

View File

@@ -1,3 +1,5 @@
#define _POSIX_C_SOURCE 200809L
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <time.h> #include <time.h>
@@ -6,6 +8,7 @@
#include "../src/vector.h" #include "../src/vector.h"
#include "../src/map.h" #include "../src/map.h"
#include "../src/string.h"
typedef void (*test_fn_t)(size_t iterations); typedef void (*test_fn_t)(size_t iterations);
@@ -13,20 +16,15 @@ void test_vector(size_t iterations) {
vector_t *vec = vector_new(16, sizeof(int)).value.vector; vector_t *vec = vector_new(16, sizeof(int)).value.vector;
for (size_t idx = 0; idx < iterations; idx++) { for (size_t idx = 0; idx < iterations; idx++) {
vector_push(vec, &idx); vector_push(vec, &(int){idx});
} }
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum volatile uint64_t sum = 0;
for (size_t idx = 0; idx < iterations; idx++) { for (size_t idx = 0; idx < iterations; idx++) {
const int *val = (int*)vector_get(vec, idx).value.element; const int *val = (int*)vector_get(vec, idx).value.element;
sum += *val; sum += *val;
} }
// Another trick to prevent compiler optimization
if (sum == 0xB00B5) {
printf("sum = %llu\n", (unsigned long long)sum);
}
vector_destroy(vec); vector_destroy(vec);
} }
@@ -43,7 +41,7 @@ void test_map(size_t iterations) {
map_add(map, key, (void*)value); map_add(map, key, (void*)value);
} }
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum volatile uint64_t sum = 0;
for (size_t idx = 0; idx < iterations; idx++) { for (size_t idx = 0; idx < iterations; idx++) {
snprintf(key, sizeof(key), "key_%zu", idx); snprintf(key, sizeof(key), "key_%zu", idx);
@@ -53,32 +51,68 @@ void test_map(size_t iterations) {
// Cleanup values // Cleanup values
for (size_t idx = 0; idx < map->capacity; idx++) { for (size_t idx = 0; idx < map->capacity; idx++) {
if (map->elements[idx].state == ENTRY_OCCUPIED) { snprintf(key, sizeof(key), "key_%zu", idx);
int *val = (int*)map->elements[idx].value;
free(val); int *val = (int*)map_get(map, key).value.element;
} free(val);
map_remove(map, key);
} }
map_destroy(map); map_destroy(map);
} }
void test_string(size_t iterations) {
volatile size_t total_len = 0;
for (size_t idx = 0; idx < iterations; idx++) {
string_t *str1 = string_new("hello").value.string;
string_t *str2 = string_new(" World").value.string;
string_result_t concat = string_concat(str1, str2);
string_result_t upper = string_to_upper(concat.value.string);
total_len += string_size(upper.value.string);
string_result_t needle = string_new("WORLD");
string_result_t contains = string_contains(upper.value.string, needle.value.string);
if (contains.value.idx >= 0) {
total_len += contains.value.idx;
}
string_destroy(str1);
string_destroy(str2);
string_destroy(concat.value.string);
string_destroy(upper.value.string);
string_destroy(needle.value.string);
}
}
static inline uint64_t now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
long long benchmark(test_fn_t fun, size_t iterations, size_t runs) { long long benchmark(test_fn_t fun, size_t iterations, size_t runs) {
long long total = 0; long long total = 0;
for (size_t idx = 0; idx < runs; idx++) {
clock_t start = clock();
fun(iterations);
clock_t end = clock();
total += (long long)((end - start) * 1000 / CLOCKS_PER_SEC); for (size_t idx = 0; idx < runs; idx++) {
uint64_t start = now_ns();
fun(iterations);
uint64_t end = now_ns();
total += (end - start);
} }
return total / runs; return (long long)(total / runs / 1000000);
} }
int main(void) { int main(void) {
// Do a warmup run // Do a warmup run
test_vector(1000); test_vector(1000);
test_map(1000); test_map(1000);
test_string(1000);
printf("Computing Vector average time..."); printf("Computing Vector average time...");
fflush(stdout); fflush(stdout);
@@ -88,5 +122,9 @@ int main(void) {
fflush(stdout); fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30)); printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30));
printf("Computing String average time...");
fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_string, 1e5, 30));
return 0; return 0;
} }

View File

@@ -7,4 +7,5 @@ At the time being, this documentation includes the following pages:
- [vector.md](vector.md): vector documentation; - [vector.md](vector.md): vector documentation;
- [map.md](map.md): map documentation; - [map.md](map.md): map documentation;
- [bigint.md](bigint.md): bigint documentation. - [bigint.md](bigint.md): bigint documentation;
- [string.md](string.md): string documentation.

View File

@@ -46,7 +46,7 @@ The `BigInt` data structure supports the following methods:
- `bigint_result_t bigint_destroy(number)`: delete the big number; - `bigint_result_t bigint_destroy(number)`: delete the big number;
- `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters. - `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters.
As you can see by the previous function signatures, methods that operate on the As you can see from the previous function signatures, methods that operate on the
`BigInt` data type return a custom type called `bigint_result_t` which is defined as `BigInt` data type return a custom type called `bigint_result_t` which is defined as
follows: follows:
@@ -80,7 +80,7 @@ by setting the `status` field and by providing a descriptive message on the `mes
field. If the operation was successful (that is, `status == BIGINT_OK`), you can either field. If the operation was successful (that is, `status == BIGINT_OK`), you can either
move on with the rest of the program or read the returned value from the sum data type. move on with the rest of the program or read the returned value from the sum data type.
Of course, you can choose to ignore the return value (if you're brave enough :D) as Of course, you can choose to ignore the return value (if you're brave enough :D) as
illustrated in the first part of the README. illustrated on the first part of the README.
The sum data type (i.e., the `value` union) defines four different variables. Each The sum data type (i.e., the `value` union) defines four different variables. Each
of them has an unique scope as described below: of them has an unique scope as described below:

View File

@@ -5,7 +5,7 @@ aspects (internal design, memory layout, etc.) of the `Map` data structure.
`Map` is an hash table that uses open addressing with linear probing for collision `Map` is an hash table that uses open addressing with linear probing for collision
resolution and the [FNV-1a algorithm](https://en.wikipedia.org/wiki/FowlerNollVo_hash_function) as its hashing function. Resizing is performed resolution and the [FNV-1a algorithm](https://en.wikipedia.org/wiki/FowlerNollVo_hash_function) as its hashing function. Resizing is performed
automatically by doubling the capacity when the load factor exceeds 75%. Internally, automatically by doubling the capacity when the load factor exceeds 75%. Internally,
this data structure is represented by the following two structures: this data structure is represented by the following two layouts:
```c ```c
typedef struct { typedef struct {
@@ -46,7 +46,7 @@ The `Map` data structure supports the following methods:
- `size_t map_size(map)`: returns map size (i.e., the number of elements); - `size_t map_size(map)`: returns map size (i.e., the number of elements);
- `size_t map_capacity(map)`: returns map capacity (i.e., map total size). - `size_t map_capacity(map)`: returns map capacity (i.e., map total size).
As you can see by the previous function signatures, most methods that operate As you can see from the previous function signatures, most methods that operate
on the `Map` data type return a custom type called `map_result_t` which is on the `Map` data type return a custom type called `map_result_t` which is
defined as follows: defined as follows:
@@ -73,4 +73,4 @@ Each method that returns such type indicates whether the operation was successfu
the `status` field and by providing a descriptive message on the `message` field. If the operation was the `status` field and by providing a descriptive message on the `message` field. If the operation was
successful (that is, `status == MAP_OK`), you can either move on with the rest of the program or read successful (that is, `status == MAP_OK`), you can either move on with the rest of the program or read
the returned value from the sum data type. Of course, you can choose to ignore the return value (if you're brave enough :D) as illustrated the returned value from the sum data type. Of course, you can choose to ignore the return value (if you're brave enough :D) as illustrated
in the first part of the README. on the first part of the README.

96
docs/string.md Normal file
View File

@@ -0,0 +1,96 @@
# String Technical Details
In this document you can find a quick overview of the technical
aspects (internal design, memory layout, etc.) of the `String` data structure.
`String` is an immutable string data type with partial UTF-8 support.
This means that methods return a new string instance rather than modifying the string in-place.
Internally, this data structure is represented by the following layout:
```c
typedef struct {
char *data;
size_t byte_size;
size_t byte_capacity;
size_t char_count;
} string_t;
```
where the `data` variable represents the actual string (represented as a pointer to `char`),
the `byte_size` variable indicates the actual size (in bytes) of the string, the
`byte_capacity` variable represents the total number of allocated memory (in bytes) and the
`char_count` variable represent the number of logical characters, that is the number of
symbols.
As mentioned earlier, this library provides partial UTF-8 support. It is able to recognize
UTF-8 byte sequences as individual Unicode code points, which allows it to correctly distinguish
between byte length and character count. It fully supports Unicode symbols and emojis, while
remaining backward compatible with ASCII strings.
However, this data structure does not support localization. In particular, it does not perform
locale-aware conversion; for instance, uppercase/lowercase transformations are limited to ASCII
characters only. As a result, the German scharfes S (`ß`) is not convert to `SS`, the Spanish
`Ñ` is not converted to `ñ` and the Italian `é` (and its variants) is not treated as a single
symbol, but rather as a base letter combined with an accent.
At the time being, `String` supports the following methods:
- `string_result_t string_new(c_str)`: create a new string;
- `string_result_t string_clone(str)`: clone an existing string;
- `string_result_t string_concat(x, y)`: concatenate two strings together;
- `string_result_t string_contains(haystack, needle)`: search whether the `haystack` string contains `needle`;
- `string_result_t string_slice(str, start, end)`: return a slice (a new string) from `str` between `start` and `end` indices (inclusive);
- `string_result_t string_eq(x, y, case_sensitive)`: check whether `x` and `y` are equal;
- `string_result_t string_get_at(str, position)`: get the UTF-8 symbol indexed by `position` from `str`;
- `string_result_t string_set_at(str, position, utf8_char)`: write a UTF-8 symbol into `str` at index `position`;
- `string_result_t string_to_lower(str)`: convert a string to lowercase;
- `string_result_t string_to_upper(str)`: convert a string to uppercase;
- `string_result_t string_reverse(str)`: reverse a string;
- `string_result_t string_trim(str)`: remove leading and trailing white space from a string;
- `string_result_t string_split(str, delim)`: split a string into an array of `string_t` by specifying a separator;
- `string_result_t string_destroy(str)`: remove a string from memory;
- `string_result_t string_split_destroy(split, count)`: remove an array of strings from memory;
- `size_t string_size(str)`: return string character count.
As you can see from the previous function signatures, most methods that operate on the `String`
data type return a custom type called `string_result_t` which is defined as follows:
```c
typedef enum {
STRING_OK = 0x0,
STRING_ERR_ALLOCATE,
STRING_ERR_INVALID,
STRING_ERR_INVALID_UTF8,
STRING_ERR_OVERFLOW
} string_status_t;
typedef struct {
string_status_t status;
uint8_t message[RESULT_MSG_SIZE];
union {
string_t *string; // For new, clone, slice, reverse, trim
char *symbol; // For get_at
int64_t idx; // For contains
bool is_equ; // For comparison
struct { // For split
string_t **strings;
size_t count;
} split;
} value;
} string_result_t;
```
Each method that returns such type indicates whether the operation was successful or not
by setting the `status` field and by providing a descriptive message on the `message`
field. If the operation was successful (that is, `status == STRING_OK`) you can either
move on with the rest of your program or read the returned value from the sum data type.
Of course, you can choose to ignore the return value (if you're brave enough :D) as illustrated
on the first part of the README.
The sum data type (i.e., the `value` union) defines five different variables.
Each of them has an unique scope as described below:
- `string`: result of `new`, `clone`, `slice`, `reverse` and `trim` functions;
- `symbol`: result of `get_at` function;
- `idx`: result of `contains` function;
- `is_eq`: result of `equ` function. It's true when two strings are equal, false otherwise;
- `split`: result of `split` function. It contains an array of `string_t` and its number of elements.

View File

@@ -5,7 +5,7 @@ aspects (internal design, memory layout, etc.) of the `Vector` data structure.
`Vector` is a dynamic array with generic data type support; this means that you can store `Vector` is a dynamic array with generic data type support; this means that you can store
any kind of homogenous value on this data structure. Resizing is performed automatically any kind of homogenous value on this data structure. Resizing is performed automatically
by increasing the capacity by 1.5 times when the array becomes full. Internally, this by increasing the capacity by 1.5 times when the array becomes full. Internally, this
data structure is represented by the following structure: data structure is represented by the following layout:
```c ```c
typedef struct { typedef struct {
@@ -39,7 +39,7 @@ At the time being, `Vector` supports the following methods:
- `size_t vector_size(vector)`: return vector size (i.e., the number of elements); - `size_t vector_size(vector)`: return vector size (i.e., the number of elements);
- `size_t vector_capacity(vector)`: return vector capacity (i.e., vector total size). - `size_t vector_capacity(vector)`: return vector capacity (i.e., vector total size).
As you can see by the previous function signatures, most methods that operate As you can see from the previous function signatures, most methods that operate
on the `Vector` data type return a custom type called `vector_result_t` which is on the `Vector` data type return a custom type called `vector_result_t` which is
defined as follows: defined as follows:
@@ -66,7 +66,7 @@ Each method that returns such type indicates whether the operation was successfu
by setting the `status` field and by providing a descriptive message on the `message` by setting the `status` field and by providing a descriptive message on the `message`
field. If the operation was successful (that is, `status == VECTOR_OK`), you can either field. If the operation was successful (that is, `status == VECTOR_OK`), you can either
move on with the rest of the program or read the returned value from the sum data type. Of course, you can choose to move on with the rest of the program or read the returned value from the sum data type. Of course, you can choose to
ignore the return value (if you're brave enough :D) as illustrated in the first part of the README. ignore the return value (if you're brave enough :D) as illustrated on the first part of the README.
## Functional methods ## Functional methods
`Vector` provides three functional methods called `map`, `filter` and `reduce` which allow the caller to apply a computation to the vector, `Vector` provides three functional methods called `map`, `filter` and `reduce` which allow the caller to apply a computation to the vector,

File diff suppressed because it is too large Load Diff

View File

@@ -11,10 +11,6 @@
#include "map.h" #include "map.h"
// Internal methods // Internal methods
static uint64_t hash_key(const char *key);
static size_t map_insert_index(const map_t *map, const char *key);
static size_t map_find_index(const map_t *map, const char *key);
static map_result_t map_resize(map_t *map);
/** /**
* hash_key * hash_key
@@ -22,7 +18,7 @@ static map_result_t map_resize(map_t *map);
* *
* Returns the digest of @key using the Fowler-Noll-Vo hashing algorithm * Returns the digest of @key using the Fowler-Noll-Vo hashing algorithm
*/ */
uint64_t hash_key(const char *key) { static uint64_t hash_key(const char *key) {
uint64_t hash = FNV_OFFSET_BASIS_64; uint64_t hash = FNV_OFFSET_BASIS_64;
while (*key) { while (*key) {
@@ -33,43 +29,6 @@ uint64_t hash_key(const char *key) {
return hash; return hash;
} }
/**
* map_new
*
* Returns a map_result_t data type containing a new hash map
*/
map_result_t map_new(void) {
map_result_t result = {0};
map_t *map = malloc(sizeof(map_t));
if (map == NULL) {
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map");
return result;
}
map->elements = calloc(INITIAL_CAP, sizeof(map_element_t));
if (map->elements == NULL) {
free(map);
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map elements");
return result;
}
// Initialize map
map->capacity = INITIAL_CAP;
map->size = 0;
map->tombstone_count = 0;
result.status = MAP_OK;
SET_MSG(result, "Map successfully created");
result.value.map = map;
return result;
}
/** /**
* map_insert_index * map_insert_index
* @map: a non-null map * @map: a non-null map
@@ -80,7 +39,7 @@ map_result_t map_new(void) {
* *
* Returns the index of available slot or SIZE_MAX otherwise * Returns the index of available slot or SIZE_MAX otherwise
*/ */
size_t map_insert_index(const map_t *map, const char *key) { static size_t map_insert_index(const map_t *map, const char *key) {
const uint64_t key_digest = hash_key(key); const uint64_t key_digest = hash_key(key);
size_t idx = key_digest % map->capacity; size_t idx = key_digest % map->capacity;
size_t delete_tracker = map->capacity; // Fallback index size_t delete_tracker = map->capacity; // Fallback index
@@ -113,7 +72,7 @@ size_t map_insert_index(const map_t *map, const char *key) {
* *
* Returns a a map_result_t data type containing the status * Returns a a map_result_t data type containing the status
*/ */
map_result_t map_resize(map_t *map) { static map_result_t map_resize(map_t *map) {
map_result_t result = {0}; map_result_t result = {0};
const size_t old_capacity = map->capacity; const size_t old_capacity = map->capacity;
@@ -174,6 +133,43 @@ map_result_t map_resize(map_t *map) {
return result; return result;
} }
/**
* map_new
*
* Returns a map_result_t data type containing a new hash map
*/
map_result_t map_new(void) {
map_result_t result = {0};
map_t *map = malloc(sizeof(map_t));
if (map == NULL) {
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map");
return result;
}
map->elements = calloc(INITIAL_CAP, sizeof(map_element_t));
if (map->elements == NULL) {
free(map);
result.status = MAP_ERR_ALLOCATE;
SET_MSG(result, "Failed to allocate memory for map elements");
return result;
}
// Initialize map
map->capacity = INITIAL_CAP;
map->size = 0;
map->tombstone_count = 0;
result.status = MAP_OK;
SET_MSG(result, "Map successfully created");
result.value.map = map;
return result;
}
/** /**
* map_add * map_add
* @map: a non-null map * @map: a non-null map

934
src/string.c Normal file
View File

@@ -0,0 +1,934 @@
#define SET_MSG(result, msg) \
do { \
snprintf((char *)(result).message, RESULT_MSG_SIZE, "%s", (const char *)msg); \
} while (0)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "string.h"
// Check if a character is a space
static inline bool is_space(unsigned char c) {
return (c == ' ' || c == '\t' ||
c == '\n' || c == '\r' ||
c == '\f' || c == '\v');
}
// Get byte length of a UTF-8 character/symbol
static inline int utf8_char_len(unsigned char byte) {
if ((byte & 0x80) == 0x00) return 1;
if ((byte & 0xE0) == 0xC0) return 2;
if ((byte & 0xF0) == 0xE0) return 3;
if ((byte & 0xF8) == 0xF0) return 4;
return -1;
}
// Validate an UTF-8 symbol
static bool utf8_is_char_valid(const char *utf8_char, int *out_len) {
if (utf8_char == NULL) {
return false;
}
size_t len = utf8_char_len((unsigned char)utf8_char[0]);
if (len <= 0) {
return false;
}
for (size_t idx = 1; idx < len; idx++) {
if ((utf8_char[idx] & 0xC0) != 0x80) {
return false;
}
}
if (utf8_char[len] != '\0') {
return false;
}
if (out_len) {
*out_len = len;
}
return true;
}
// Validate an UTF-8 symbol and measure byte length and character count in one pass
static bool utf8_scan(const char *str, size_t *out_byte_size, size_t *out_char_count) {
size_t b_size = 0;
size_t c_count = 0;
const unsigned char *p = (const unsigned char *)str;
while (p[b_size] != '\0') {
size_t len = utf8_char_len(p[b_size]);
if (len <= 0) {
return false;
}
for (size_t idx = 1; idx < len; idx++) {
if (p[b_size + idx] == '\0' || (p[b_size + idx] & 0xC0) != 0x80) {
return false;
}
}
b_size += len;
c_count++;
}
*out_byte_size = b_size;
*out_char_count = c_count;
return true;
}
// Decode an UTF-8 symbol to a codepoint
static uint32_t utf8_decode(const char *str, int *char_len) {
unsigned char byte = (unsigned char)*str;
*char_len = utf8_char_len(byte);
uint32_t result = 0;
switch (*char_len) {
case 1:
result = byte;
break;
case 2:
result = ((byte & 0x1F) << 6) |
(str[1] & 0x3F);
break;
case 3:
result = ((byte & 0x0F) << 12) |
((str[1] & 0x3F) << 6) |
(str[2] & 0x3F);
break;
case 4:
result = ((byte & 0x07) << 18) |
((str[1] & 0x3F) << 12) |
((str[2] & 0x3F) << 6) |
(str[3] & 0x3F);
break;
default:
result = 0;
break;
}
return result;
}
// Encode a codepoint to an UTF-8 symbol
static int utf8_encode(uint32_t codepoint, char *out) {
if (codepoint <= 0x7F) {
out[0] = (char)codepoint;
return 1;
}
if (codepoint <= 0x7FF) {
out[0] = (char)(0xC0 | (codepoint >> 6));
out[1] = (char)(0x80 | (codepoint & 0x3F));
return 2;
}
if (codepoint <= 0xFFFF) {
out[0] = (char)(0xE0 | (codepoint >> 12));
out[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
out[2] = (char)(0x80 | (codepoint & 0x3F));
return 3;
}
if (codepoint <= 0x10FFFF) {
out[0] = (char)(0xF0 | (codepoint >> 18));
out[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F));
out[2] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
out[3] = (char)(0x80 | (codepoint & 0x3F));
return 4;
}
return 0;
}
/**
* string_new
* @c_str: a C-string
*
* Returns a string_result_t containing a new String data type
*/
string_result_t string_new(const char *c_str) {
string_result_t result = {0};
if (c_str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid null input string");
return result;
}
size_t b_size, c_count;
if (utf8_scan(c_str, &b_size, &c_count) == 0) {
result.status = STRING_ERR_INVALID_UTF8;
SET_MSG(result, "Malformed UTF-8 sequence");
return result;
}
string_t *str = malloc(sizeof(string_t));
if (str == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
str->data = malloc(b_size + 1);
if (str->data == NULL) {
free(str);
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(str->data, c_str, b_size + 1);
str->byte_size = b_size;
str->byte_capacity = b_size + 1;
str->char_count = c_count;
result.status = STRING_OK;
SET_MSG(result, "String successfully created");
result.value.string = str;
return result;
}
/**
* string_clone
* @str: a non-null string
*
* Deep copies @str
*
* Returns a string_result_t containing the copied string
*/
string_result_t string_clone(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
string_t *str_copy = malloc(sizeof(string_t));
if (str_copy == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
str_copy->data = malloc(str->byte_size + 1);
if (str_copy->data == NULL) {
free(str_copy);
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(str_copy->data, str->data, str->byte_size + 1);
str_copy->byte_size = str->byte_size;
str_copy->byte_capacity = str->byte_size + 1;
str_copy->char_count = str->char_count;
result.status = STRING_OK;
result.value.string = str_copy;
SET_MSG(result, "String successfully copied");
return result;
}
/**
* string_concat
* @x: a non-null string
* @y: a non-null string
*
* Concats @x and @y in a new String
*
* Returns a string_result_t containing the new string
*/
string_result_t string_concat(const string_t *x, const string_t *y) {
string_result_t result = {0};
if (x == NULL || y == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid strings");
return result;
}
if (x->byte_size > SIZE_MAX - y->byte_size - 1) {
result.status = STRING_ERR_OVERFLOW;
SET_MSG(result, "Concatenation exceeds size limits");
return result;
}
size_t new_size = x->byte_size + y->byte_size;
char *buf = malloc(new_size + 1);
if (buf == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(buf, x->data, x->byte_size);
memcpy(buf + x->byte_size, y->data, y->byte_size);
buf[new_size] = '\0';
result = string_new(buf);
free(buf);
return result;
}
/**
* string_contains
* @haystack: a non-null string
* @needle: a non-null string
*
* Finds @needle on @haystack
*
* Returns a string_result_t containing the index to the beginning of the located string
* (if the substring has been found)
*/
string_result_t string_contains(const string_t *haystack, const string_t *needle) {
string_result_t result = {
.status = STRING_OK,
.value.idx = -1
};
if (haystack == NULL || needle == NULL || needle->byte_size == 0) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid substrings");
return result;
}
const char *found = strstr(haystack->data, needle->data);
if (found) {
size_t char_idx = 0;
const char *ptr = haystack->data;
while (ptr < found) {
ptr += utf8_char_len((unsigned char)*ptr);
char_idx++;
}
result.value.idx = (int64_t)char_idx;
SET_MSG(result, "Substring found");
} else {
SET_MSG(result, "Substring not found");
}
return result;
}
/**
* string_slice
* @str: a non-null string
* @start: the lower bound (inclusive)
* @end: the upper bound (inclusive)
*
* Extracts a slice from @str between @start and @end (inclusive)
*
* Returns a string_result_t data type containing the slice
*/
string_result_t string_slice(const string_t *str, size_t start, size_t end) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
if (start > end || end >= str->char_count) {
result.status = STRING_ERR_OVERFLOW;
SET_MSG(result, "Index out of bounds");
return result;
}
size_t start_byte_offset = 0;
for (size_t idx = 0; idx < start; idx++) {
start_byte_offset += utf8_char_len((unsigned char)str->data[start_byte_offset]);
}
size_t end_byte_offset = start_byte_offset;
for (size_t idx = start; idx <= end; idx++) {
end_byte_offset += utf8_char_len((unsigned char)str->data[end_byte_offset]);
}
const size_t slice_byte_size = end_byte_offset - start_byte_offset;
string_t *slice = malloc(sizeof(string_t));
if (slice == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
slice->data = malloc(slice_byte_size + 1);
if (slice->data == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(slice->data, str->data + start_byte_offset, slice_byte_size);
slice->data[slice_byte_size] = '\0';
slice->byte_size = slice_byte_size;
slice->byte_capacity = slice_byte_size + 1;
slice->char_count = end - start + 1;
result.status = STRING_OK;
result.value.string = slice;
SET_MSG(result, "String sliced successfully");
return result;
}
/**
* string_eq
* @x: a non-null string
* @y: a non-null string
* @case_sensitive: boolean value for case sensitive comparison
*
* Compares two Strings
*
* Returns a string_result_t containing the comparison result
*/
string_result_t string_eq(const string_t *x, const string_t *y, bool case_sensitive) {
string_result_t result = {
.status = STRING_OK,
.value.is_equ = false
};
if (x == NULL || y == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid strings");
return result;
}
if (x->char_count != y->char_count) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Strings differ in length");
return result;
}
if (case_sensitive) {
result.value.is_equ = (strcmp(x->data, y->data) == 0);
} else {
const char *p1 = x->data, *p2 = y->data;
while (*p1 && *p2) {
int l1, l2;
const uint32_t codepoint1 = utf8_decode(p1, &l1);
const uint32_t codepoint2 = utf8_decode(p2, &l2);
const uint32_t c1 = (codepoint1 >= 'A' && codepoint1 <= 'Z') ? codepoint1 + 32 : codepoint1;
const uint32_t c2 = (codepoint2 >= 'A' && codepoint2 <= 'Z') ? codepoint2 + 32 : codepoint2;
if (c1 != c2) {
result.value.is_equ = false;
return result;
}
p1 += l1;
p2 += l2;
}
result.value.is_equ = (*p1 == *p2);
}
SET_MSG(result, "Comparison completed successfully");
return result;
}
/**
* string_get_at
* @str: a non-null string
* @position: the position of the symbol to read
*
* Gets symbol indexed by @position from @str
*
* Returns a string_result_t containing the symbol as a C string
*/
string_result_t string_get_at(const string_t *str, size_t position) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
if (position >= str->char_count) {
result.status = STRING_ERR_OVERFLOW;
SET_MSG(result, "Index out of bounds");
return result;
}
const char *ptr = str->data;
for (size_t idx = 0; idx < position; idx++) {
ptr += utf8_char_len((unsigned char)*ptr);
}
int char_len = utf8_char_len((unsigned char)*ptr);
char *utf8_char = malloc(char_len + 1);
if (utf8_char == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(utf8_char, ptr, char_len);
utf8_char[char_len] = '\0';
result.value.symbol = utf8_char;
result.status = STRING_OK;
SET_MSG(result, "Symbol successfully retrieved");
return result;
}
/**
* string_set_at
* @str: a non-null string
* @position: the position to write into
* @utf8_char: an UTF8 symbol
*
* Writes @utf8_char into @str at index @position
*
* Returns a string_result_t data type
*/
string_result_t string_set_at(const string_t *str, size_t position, const char *utf8_char) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
int new_char_bytes;
if (utf8_is_char_valid(utf8_char, &new_char_bytes) == 0) {
result.status = STRING_ERR_INVALID_UTF8;
SET_MSG(result, "Invalid UTF-8 character");
return result;
}
if (position >= str->char_count) {
result.status = STRING_ERR_OVERFLOW;
SET_MSG(result, "Index out of bounds");
return result;
}
// Locate the byte offset of the character to replace
const char *pos = str->data;
for (size_t idx = 0; idx < position; idx++) {
pos += utf8_char_len((unsigned char)*pos);
}
const size_t prefix_len = pos - str->data;
const int old_char_bytes = utf8_char_len((unsigned char)*pos);
const size_t suffix_len = str->byte_size - prefix_len - old_char_bytes;
const size_t new_total_bytes = prefix_len + new_char_bytes + suffix_len;
string_t *new_str = malloc(sizeof(string_t));
if (new_str == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
new_str->data = malloc(new_total_bytes + 1);
if (new_str->data == NULL) {
free(new_str);
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
// Copy prefix data from original string
memcpy(new_str->data, str->data, prefix_len);
// Copy the new character at requested index
memcpy(new_str->data + prefix_len, utf8_char, new_char_bytes);
// Copy suffix data from the original string by skipping the overwritten character
memcpy(new_str->data + prefix_len + new_char_bytes, pos + old_char_bytes, suffix_len);
new_str->data[new_total_bytes] = '\0';
new_str->byte_size = new_total_bytes;
new_str->byte_capacity = new_total_bytes + 1;
new_str->char_count = str->char_count;
result.status = STRING_OK;
result.value.string = new_str;
SET_MSG(result, "Symbol successfully set");
return result;
}
/**
* string_to_lower
* @str: a non-null string
*
* Converts a String to lowercase
*
* Returns a string_result_t containing a new string
*/
string_result_t string_to_lower(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
char *buf = malloc(str->byte_capacity);
if (buf == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char *src = str->data;
char *dst = buf;
while (*src) {
int len;
uint32_t codepoint = utf8_decode(src, &len);
uint32_t lower = (codepoint >= 'A' && codepoint <= 'Z') ? codepoint + 32 : codepoint;
dst += utf8_encode(lower, dst);
src += len;
}
*dst = '\0';
result = string_new(buf);
free(buf);
result.status = STRING_OK;
SET_MSG(result, "String successfully converted to lowercase");
return result;
}
/**
* string_to_upper
* @str: a non-null string
*
* Converts a String to uppercase
*
* Returns a string_result_t containing a new string
*/
string_result_t string_to_upper(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
char *buf = malloc(str->byte_capacity);
if (buf == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char *src = str->data;
char *dst = buf;
while (*src) {
int len;
uint32_t codepoint = utf8_decode(src, &len);
uint32_t upper = (codepoint >= 'a' && codepoint <= 'z') ? codepoint - 32 : codepoint;
dst += utf8_encode(upper, dst);
src += len;
}
*dst = '\0';
result = string_new(buf);
free(buf);
result.status = STRING_OK;
SET_MSG(result, "String successfully converted to uppercase");
return result;
}
/**
* string_reverse
* @str: a non-null string
*
* Reverses @str
*
* Returns a new string_result_t containing the reversed string
*/
string_result_t string_reverse(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
char *buf = malloc(str->byte_capacity);
if (buf == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char **pos = malloc(str->char_count * sizeof(char *));
if (pos == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char *ptr = str->data;
for (size_t idx = 0; idx < str->char_count; idx++) {
pos[idx] = ptr;
ptr += utf8_char_len((unsigned char)*ptr);
}
char *dst = buf;
for (int64_t idx = (int64_t)str->char_count - 1; idx >= 0; idx--) {
int len = utf8_char_len((unsigned char)*pos[idx]);
memcpy(dst, pos[idx], len);
dst += len;
}
*dst = '\0';
free(pos);
result = string_new(buf);
free(buf);
SET_MSG(result, "String successfully reversed");
return result;
}
/**
* string_trim
* @str: a non-null string
*
* Trims whitespace from @str
*
* Returns a string_result_t containing the trimmed string
*/
string_result_t string_trim(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
const char *start = str->data;
while (*start && is_space((unsigned char)*start)) {
start++;
}
if (*start == '\0') {
return string_new("");
}
const char *end = str->data + str->byte_size - 1;
while (end > start && is_space((unsigned char)*end)) {
end--;
}
const size_t len = (end - start) + 1;
char *trimmed = malloc(len + 1);
if (trimmed == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(trimmed, start, len);
trimmed[len] = '\0';
result = string_new(trimmed);
free(trimmed);
result.status = STRING_OK;
SET_MSG(result, "String successfully trimmed");
return result;
}
/**
* string_split
* @str: a non-null string
* @delim: delimiter string
*
* Splits @str by @delim
*
* Returns a string_result_t containing an array of String pointers
*/
string_result_t string_split(const string_t *str, const char *delim) {
string_result_t result = {0};
string_result_t tmp_res = {0};
if (str == NULL || delim == NULL || delim[0] == '\0') {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid strings");
return result;
}
const char *ptr = str->data;
const size_t delim_len = strlen(delim);
size_t count = 1;
while ((ptr = strstr(ptr, delim))) {
count++;
ptr += delim_len;
}
string_t **string_array = malloc(count * sizeof(string_t *));
if (string_array == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char *start = str->data;
size_t idx = 0;
while ((ptr = strstr(start, delim))) {
const size_t part_len = ptr - start;
char *tmp = malloc(part_len + 1);
if (tmp == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocated memory");
goto cleanup;
}
memcpy(tmp, start, part_len);
tmp[part_len] = '\0';
tmp_res = string_new(tmp);
free(tmp);
if (tmp_res.status != STRING_OK) { result = tmp_res; goto cleanup; }
string_array[idx++] = tmp_res.value.string;
start = ptr + delim_len;
}
tmp_res = string_new(start);
if (tmp_res.status != STRING_OK) { result = tmp_res; goto cleanup; }
string_array[idx] = tmp_res.value.string;
result.status = STRING_OK;
result.value.split.strings = string_array;
result.value.split.count = count;
SET_MSG(result, "String successfully split");
return result;
cleanup:
for (size_t j = 0; j < idx; j++) {
string_destroy(string_array[j]);
}
free(string_array);
return result;
}
/**
* string_destroy
* @str: a non-null string
*
* Destroys @str
*
* Returns a string_result_t data type
*/
string_result_t string_destroy(string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
free(str->data);
free(str);
result.status = STRING_OK;
SET_MSG(result, "String successfully deleted");
return result;
}
/**
* string_split_destory
* @split: an array of pointers of String
* @count: the number of elements
*
* Destroys the @split array of Strings
*
* Returns a string_result_t data type
*/
string_result_t string_split_destroy(string_t **split, size_t count) {
string_result_t result = {0};
if (split == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
for (size_t idx = 0; idx < count; idx++) {
string_destroy(split[idx]);
}
free(split);
result.status = STRING_OK;
SET_MSG(result, "Array of strings successfully deleted");
return result;
}

70
src/string.h Normal file
View File

@@ -0,0 +1,70 @@
#ifndef STRING_H
#define STRING_H
#define RESULT_MSG_SIZE 64
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
typedef enum {
STRING_OK = 0x0,
STRING_ERR_ALLOCATE,
STRING_ERR_INVALID,
STRING_ERR_INVALID_UTF8,
STRING_ERR_OVERFLOW
} string_status_t;
typedef struct {
char *data;
size_t byte_size; // Size in bytes excluding NULL terminator
size_t byte_capacity; // Total allocated memory
size_t char_count; // Number of symbols
} string_t;
typedef struct {
string_status_t status;
uint8_t message[RESULT_MSG_SIZE];
union {
string_t *string; // For new, clone, slice, reverse, trim
char *symbol; // For get_at
int64_t idx; // For contains
bool is_equ; // For comparison
struct { // For split
string_t **strings;
size_t count;
} split;
} value;
} string_result_t;
#ifdef __cplusplus
extern "C" {
#endif
// Public APIs
string_result_t string_new(const char *c_str);
string_result_t string_clone(const string_t *str);
string_result_t string_concat(const string_t *x, const string_t *y);
string_result_t string_contains(const string_t *haystack, const string_t *needle);
string_result_t string_slice(const string_t *str, size_t start, size_t end);
string_result_t string_eq(const string_t *x, const string_t *y, bool case_sensitive);
string_result_t string_get_at(const string_t *str, size_t position);
string_result_t string_set_at(const string_t *str, size_t position, const char *utf8_char);
string_result_t string_to_lower(const string_t *str);
string_result_t string_to_upper(const string_t *str);
string_result_t string_reverse(const string_t *str);
string_result_t string_trim(const string_t *str);
string_result_t string_split(const string_t *str, const char *delim);
string_result_t string_destroy(string_t *str);
string_result_t string_split_destroy(string_t **split, size_t count);
// Inline methods
static inline size_t string_size(const string_t *str) {
return str ? str->char_count : 0;
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -10,10 +10,112 @@
#include "vector.h" #include "vector.h"
// Internal methods // Internal methods
static vector_result_t vector_resize(vector_t *vector); /**
static void swap(void *x, void *y, size_t size); * vector_resize
static size_t partition(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp); * @vector: a non-null vector
static void quicksort(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp); *
* Increases the size of @vector
*
* Returns a vector_result_t data type containing the status
*/
static vector_result_t vector_resize(vector_t *vector) {
vector_result_t result = {0};
const size_t old_capacity = vector->capacity;
const size_t new_capacity = old_capacity > 0 ? old_capacity * 2 : 1;
// Check for stack overflow errors
if (new_capacity > SIZE_MAX / vector->data_size) {
result.status = VECTOR_ERR_OVERFLOW;
SET_MSG(result, "Exceeded maximum size while resizing vector");
return result;
}
void *new_elements = realloc(vector->elements, new_capacity * vector->data_size);
if (new_elements == NULL) {
result.status = VECTOR_ERR_ALLOCATE;
SET_MSG(result, "Failed to reallocate memory for vector");
return result;
}
vector->elements = new_elements;
vector->capacity = new_capacity;
result.status = VECTOR_OK;
SET_MSG(result, "Vector successfully resized");
return result;
}
/**
* swap
* @x: first element
* @y: second element
*
* Swaps @x and @y
*/
static void swap(void *x, void *y, size_t size) {
uint8_t temp[size];
memcpy(temp, x, size);
memcpy(x, y, size);
memcpy(y, temp, size);
}
/**
* partition
* @base: the array/partition
* @low: lower index
* @high: higher index
* @size: data size
* @cmp: comparison function
*
* Divides an array into two partitions
*
* Returns the pivot index
*/
static size_t partition(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp) {
uint8_t *arr = (uint8_t*)base;
void *pivot = arr + (high * size);
size_t i = low;
for (size_t j = low; j < high; j++) {
vector_order_t order = cmp(arr + (j * size), pivot);
if (order == VECTOR_ORDER_LT || order == VECTOR_ORDER_EQ) {
swap(arr + (i * size), arr + (j * size), size);
i++;
}
}
swap(arr + (i * size), arr + (high * size), size);
return i;
}
/**
* quicksort
* @base: the base array/partition
* @low: lower index
* @high: higher index
* @size: data size
* @cmp: comparision function
*
* Recursively sorts an array/partition using the Quicksort algorithm
*/
static void quicksort(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp) {
if (low < high) {
const size_t pivot = partition(base, low, high, size, cmp);
if (pivot > 0) {
quicksort(base, low, pivot - 1, size, cmp);
}
quicksort(base, pivot + 1, high, size, cmp);
}
}
/** /**
* vector_new * vector_new
@@ -61,112 +163,6 @@ vector_result_t vector_new(size_t size, size_t data_size) {
return result; return result;
} }
/**
* vector_resize
* @vector: a non-null vector
*
* Increases the size of @vector
*
* Returns a vector_result_t data type containing the status
*/
vector_result_t vector_resize(vector_t *vector) {
vector_result_t result = {0};
const size_t old_capacity = vector->capacity;
const size_t new_capacity = old_capacity > 0 ? old_capacity * 2 : 1;
// Check for stack overflow errors
if (new_capacity > SIZE_MAX / vector->data_size) {
result.status = VECTOR_ERR_OVERFLOW;
SET_MSG(result, "Exceeded maximum size while resizing vector");
return result;
}
void *new_elements = realloc(vector->elements, new_capacity * vector->data_size);
if (new_elements == NULL) {
result.status = VECTOR_ERR_ALLOCATE;
SET_MSG(result, "Failed to reallocate memory for vector");
return result;
}
vector->elements = new_elements;
vector->capacity = new_capacity;
result.status = VECTOR_OK;
SET_MSG(result, "Vector successfully resized");
return result;
}
/**
* swap
* @x: first element
* @y: second element
*
* Swaps @x and @y
*/
void swap(void *x, void *y, size_t size) {
uint8_t temp[size];
memcpy(temp, x, size);
memcpy(x, y, size);
memcpy(y, temp, size);
}
/**
* partition
* @base: the array/partition
* @low: lower index
* @high: higher index
* @size: data size
* @cmp: comparison function
*
* Divides an array into two partitions
*
* Returns the pivot index
*/
size_t partition(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp) {
uint8_t *arr = (uint8_t*)base;
void *pivot = arr + (high * size);
size_t i = low;
for (size_t j = low; j < high; j++) {
vector_order_t order = cmp(arr + (j * size), pivot);
if (order == VECTOR_ORDER_LT || order == VECTOR_ORDER_EQ) {
swap(arr + (i * size), arr + (j * size), size);
i++;
}
}
swap(arr + (i * size), arr + (high * size), size);
return i;
}
/**
* quicksort
* @base: the base array/partition
* @low: lower index
* @high: higher index
* @size: data size
* @cmp: comparision function
*
* Recursively sorts an array/partition using the Quicksort algorithm
*/
void quicksort(void *base, size_t low, size_t high, size_t size, vector_cmp_fn cmp) {
if (low < high) {
const size_t pivot = partition(base, low, high, size, cmp);
if (pivot > 0) {
quicksort(base, low, pivot - 1, size, cmp);
}
quicksort(base, pivot + 1, high, size, cmp);
}
}
/** /**
* vector_push * vector_push
* @vector: a non-null vector * @vector: a non-null vector

329
tests/test_string.c Normal file
View File

@@ -0,0 +1,329 @@
/*
* Unit tests for String data type
*/
#define TEST(NAME) do { \
printf("Running test_%s...", #NAME); \
test_##NAME(); \
printf(" PASSED\n"); \
} while(0)
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "../src/string.h"
// Test string creation
void test_string_new(void) {
string_result_t res = string_new("hello");
assert(res.status == STRING_OK);
assert(res.value.string != NULL);
assert(strcmp(res.value.string->data, "hello") == 0);
assert(string_size(res.value.string) == 5);
assert(res.value.string->byte_size == 5);
string_destroy(res.value.string);
}
// Test empty string
void test_string_new_empty(void) {
string_result_t res = string_new("");
assert(res.status == STRING_OK);
assert(string_size(res.value.string) == 0);
assert(res.value.string->byte_size == 0);
assert(res.value.string->data[0] == '\0');
string_destroy(res.value.string);
}
// Test cloning an existing string
void test_string_clone(void) {
string_t *original = string_new("Original").value.string;
string_result_t res = string_clone(original);
assert(res.status == STRING_OK);
assert(res.value.string != original); // Different memory address
assert(strcmp(res.value.string->data, original->data) == 0);
assert(res.value.string->byte_size == original->byte_size);
string_destroy(original);
string_destroy(res.value.string);
}
// Test string concatenation
void test_string_concat(void) {
string_t *str1 = string_new("Foo").value.string;
string_t *str2 = string_new(" Bar").value.string;
string_result_t res = string_concat(str1, str2);
assert(res.status == STRING_OK);
assert(strcmp(res.value.string->data, "Foo Bar") == 0);
assert(string_size(res.value.string) == 7);
string_destroy(str1);
string_destroy(str2);
string_destroy(res.value.string);
}
// Test if string contains a substring
void test_string_contains(void) {
string_t *haystack = string_new("Hello 🌍 World").value.string;
string_t *needle_ascii = string_new("World").value.string;
string_t *needle_utf8 = string_new("🌍").value.string;
string_t *needle_none = string_new("not found").value.string;
// World starts at symbol 8
string_result_t res1 = string_contains(haystack, needle_ascii);
assert(res1.status == STRING_OK);
assert(res1.value.idx == 8);
// 🌍 is at position 6
string_result_t res2 = string_contains(haystack, needle_utf8);
assert(res2.status == STRING_OK);
assert(res2.value.idx == 6);
// Not found should return -1
string_result_t res3 = string_contains(haystack, needle_none);
assert(res3.status == STRING_OK);
assert(res3.value.idx == -1);
string_destroy(haystack);
string_destroy(needle_ascii);
string_destroy(needle_utf8);
string_destroy(needle_none);
}
// Test string slicing
void test_string_slice(void) {
// ASCII slice
string_t *str1 = string_new("foobar").value.string;
string_result_t res1 = string_slice(str1, 2, 4);
assert(res1.status == STRING_OK);
assert(strcmp(res1.value.string->data, "oba") == 0);
assert(res1.value.string->char_count == 3);
// UTF-8 slice
string_t *str2 = string_new("AB😆🌍").value.string;
string_result_t res2 = string_slice(str2, 2, 2);
assert(res2.status == STRING_OK);
assert(strcmp(res2.value.string->data, "😆") == 0);
assert(res2.value.string->byte_size == 4); // emoji = 4 bytes
// UTF-8 + ASCII slice
string_result_t res3 = string_slice(str2, 0, 2);
assert(res3.status == STRING_OK);
assert(strcmp(res3.value.string->data, "AB😆") == 0);
// Invalid bounds
string_result_t res4 = string_slice(str1, 5, 2);
assert(res4.status == STRING_ERR_OVERFLOW);
res4 = string_slice(str1, 1, 50);
assert(res4.status == STRING_ERR_OVERFLOW);
string_destroy(str1);
string_destroy(str2);
string_destroy(res1.value.string);
string_destroy(res2.value.string);
string_destroy(res3.value.string);
}
// Test case-insensitive and sensitive comparison
void test_string_eq(void) {
string_t *str1 = string_new("Foo").value.string;
string_t *str2 = string_new("foo").value.string;
// Case sensitive comparison should be false
assert(string_eq(str1, str2, true).value.is_equ == false);
// Case insensitive comparison should be true
assert(string_eq(str1, str2, false).value.is_equ == true);
string_destroy(str1);
string_destroy(str2);
}
// Test string reverse using UTF-8 symbols
void test_string_reverse_utf8(void) {
string_t *str = string_new("A🌍Z").value.string;
string_result_t res = string_reverse(str);
assert(res.status == STRING_OK);
assert(string_size(res.value.string) == 3);
assert(strcmp(res.value.string->data, "Z🌍A") == 0);
assert(string_size(res.value.string) == 3);
string_destroy(str);
string_destroy(res.value.string);
}
// Test string get_at
void test_string_get_at(void) {
string_t *str = string_new("AB😆🌍").value.string;
// 😆 is at index 2
string_result_t res1 = string_get_at(str, 2);
assert(res1.status == STRING_OK);
assert(strcmp((char*)res1.value.symbol, "😆") == 0);
free(res1.value.symbol);
// 🌍 is at index 3
string_result_t res2 = string_get_at(str, 3);
assert(res2.status == STRING_OK);
assert(strcmp((char*)res2.value.symbol, "🌍") == 0);
free(res2.value.symbol);
string_destroy(str);
}
// Test string get_at with invalid index
void test_string_get_at_overflow(void) {
string_t *str = string_new("ABC").value.string;
string_result_t res = string_get_at(str, 50);
assert(res.status == STRING_ERR_OVERFLOW);
string_destroy(str);
}
// Test mutation of UTF-8 symbol
void test_string_set_at(void) {
string_t *str = string_new("ABC").value.string;
// Replace 'B' with emoji
string_result_t res = string_set_at(str, 1, "😆");
string_t *altered = res.value.string;
assert(res.status == STRING_OK);
assert(strcmp(altered->data, "A😆C") == 0);
assert(string_size(altered) == 3);
assert(altered->byte_size == 6); // that is: A (1B) + emoji (4B) + C (1B)
string_destroy(str);
string_destroy(altered);
}
// Test mutation of invalid UTF-8 symbol
void test_string_set_at_invalid_utf8(void) {
string_t *str = string_new("ABC").value.string;
const char * const invalid_sym1 = "\xFF";
const char * const invalid_sym2 = "\x80";
string_result_t res1 = string_set_at(str, 1, invalid_sym1);
assert(res1.status == STRING_ERR_INVALID_UTF8);
string_result_t res2 = string_set_at(str, 1, invalid_sym2);
assert(res2.status == STRING_ERR_INVALID_UTF8);
string_destroy(str);
}
// Test mutation with overflow
void test_string_set_at_overflow(void) {
string_t *str = string_new("ABC").value.string;
string_result_t res = string_set_at(str, 10, "a");
assert(res.status == STRING_ERR_OVERFLOW);
string_destroy(str);
}
// Test string to lowercase
void test_string_to_lower(void) {
string_t *str = string_new("AbC").value.string;
string_result_t res = string_to_lower(str);
assert(res.status == STRING_OK);
assert(strcmp(res.value.string->data, "abc") == 0);
string_destroy(str);
string_destroy(res.value.string);
}
// Test string to uppercase
void test_string_to_upper(void) {
string_t *str = string_new("aBc").value.string;
string_result_t res = string_to_upper(str);
assert(res.status == STRING_OK);
assert(strcmp(res.value.string->data, "ABC") == 0);
string_destroy(str);
string_destroy(res.value.string);
}
// Test whitespace trimming
void test_string_trim(void) {
string_t *str = string_new(" \t Foo Bar \n ").value.string;
string_result_t res = string_trim(str);
assert(res.status == STRING_OK);
assert(strcmp(res.value.string->data, "Foo Bar") == 0);
string_destroy(str);
string_destroy(res.value.string);
}
// Test string splitting into an array
void test_string_split(void) {
string_t *str = string_new("Red,Green,Blue").value.string;
string_result_t res = string_split(str, ",");
assert(res.status == STRING_OK);
assert(res.value.split.count == 3);
const size_t count = res.value.split.count;
string_t **strings = res.value.split.strings;
const char *expected[] = { "Red", "Green", "Blue" };
for (size_t idx = 0; idx < count; idx++) {
assert(strcmp(strings[idx]->data, expected[idx]) == 0);
}
string_split_destroy(strings, count);
string_destroy(str);
}
// Test string destroy
void test_string_destroy(void) {
string_t *str = string_new("delete me").value.string;
string_result_t res = string_destroy(str);
assert(res.status == STRING_OK);
string_result_t res_null = string_destroy(NULL);
assert(res_null.status == STRING_ERR_INVALID);
}
int main(void) {
printf("=== Running String unit tests ===\n\n");
TEST(string_new);
TEST(string_new_empty);
TEST(string_clone);
TEST(string_concat);
TEST(string_contains);
TEST(string_slice);
TEST(string_eq);
TEST(string_reverse_utf8);
TEST(string_get_at);
TEST(string_get_at_overflow);
TEST(string_set_at);
TEST(string_set_at_overflow);
TEST(string_set_at_invalid_utf8);
TEST(string_to_lower);
TEST(string_to_upper);
TEST(string_trim);
TEST(string_split);
TEST(string_destroy);
printf("\n=== All tests passed! ===\n");
return 0;
}

185
usage.c
View File

@@ -25,10 +25,12 @@
#include "src/vector.h" #include "src/vector.h"
#include "src/map.h" #include "src/map.h"
#include "src/bigint.h" #include "src/bigint.h"
#include "src/string.h"
static int vector_usage(void); static int vector_usage(void);
static int map_usage(void); static int map_usage(void);
static int bigint_usage(void); static int bigint_usage(void);
static int string_usage(void);
static vector_order_t cmp_int_asc(const void *x, const void *y); static vector_order_t cmp_int_asc(const void *x, const void *y);
static vector_order_t cmp_int_desc(const void *x, const void *y); static vector_order_t cmp_int_desc(const void *x, const void *y);
@@ -52,6 +54,11 @@ int main(void) {
st = bigint_usage(); st = bigint_usage();
if (st) { return st; } if (st) { return st; }
SEP(50);
st = string_usage();
if (st) { return st; }
return 0; return 0;
} }
@@ -524,3 +531,181 @@ int bigint_usage(void) {
return 0; return 0;
} }
int string_usage(void) {
// Create a new string
string_result_t res = string_new("Hello, ");
if (res.status != STRING_OK) {
printf("Error: %s\n", res.message);
return 1;
}
string_t *str1 = res.value.string;
printf("Created string: \"%s\"\n", str1->data);
printf("Character count: %zu (%zu actual bytes)\n", string_size(str1), str1->byte_size);
string_result_t res_clone = string_clone(str1);
if (res_clone.status != STRING_OK) {
printf("Error: %s\n", res.message);
return 1;
}
string_t *cloned = res_clone.value.string;
printf("Cloned string: \"%s\"\n\n", cloned->data);
string_destroy(cloned);
// Concatenation of strings
string_result_t res_suffix = string_new("World! 🦜");
if (res_suffix.status != STRING_OK) {
printf("Error: %s\n", res.message);
return 1;
}
string_t *suffix = res_suffix.value.string;
printf("Created another string: \"%s\"\n", suffix->data);
printf("Character count: %zu (%zu actual bytes)\n\n", string_size(suffix), suffix->byte_size);
string_result_t res_cat = string_concat(str1, suffix);
if (res_cat.status != STRING_OK) {
printf("Error: %s\n", res_cat.message);
return 1;
}
string_destroy(suffix);
string_t *concat_str = res_cat.value.string;
printf("Concatenation result: \"%s\"\n\n", concat_str->data);
// String contains
string_t *haystack = string_new("The quick brown fox jumps over the lazy dog.").value.string;
string_t *needle = string_new("brown fox").value.string;
string_result_t res_contains = string_contains(haystack, needle);
if (res_contains.status != STRING_OK) {
printf("Error: %s\n", res_contains.message);
return 1;
}
if (res_contains.value.idx != -1) {
printf("Substring found. Starting at index %zu\n\n", res_contains.value.idx);
}
string_destroy(haystack);
string_destroy(needle);
// String slicing
string_result_t res_slice = string_slice(concat_str, 7, 14);
if (res_slice.status != STRING_OK) {
printf("Error: %s\n", res_slice.message);
return 1;
}
printf("Slice of string: \"%s\"\n\n", res_slice.value.string->data);
string_destroy(res_slice.value.string);
// String equality
string_t *compare = string_new("hello, World! 🦜").value.string;
string_result_t res_eq = string_eq(concat_str, compare, true);
if (res_eq.value.is_equ) {
printf("The two strings are equal\n\n");
} else {
printf("The two strings are not equal\n\n");
}
string_destroy(compare);
// Uppercase string
string_result_t res_upper = string_to_upper(concat_str);
if (res_upper.status != STRING_OK) {
printf("Error: %s\n", res_upper.message);
return 1;
}
printf("Uppercase: \"%s\"\n", res_upper.value.string->data);
string_destroy(res_upper.value.string);
// Lowercase string
string_result_t res_lower = string_to_lower(concat_str);
if (res_lower.status != STRING_OK) {
printf("Error: %s\n", res_lower.message);
return 1;
}
printf("Lowercase: \"%s\"\n\n", res_lower.value.string->data);
string_destroy(res_lower.value.string);
// Reverse string
string_result_t res_rev = string_reverse(concat_str);
if (res_rev.status != STRING_OK) {
printf("Error: %s\n", res_rev.message);
return 1;
}
printf("Reversed: \"%s\"\n\n", res_rev.value.string->data);
string_destroy(res_rev.value.string);
// Change first character of the string
string_result_t res_set = string_set_at(concat_str, 0, "J");
if (res_set.status != STRING_OK) {
printf("Error: %s\n", res_set.message);
return 1;
}
printf("Updated string: \"%s\"\n\n", res_set.value.string->data);
string_destroy(res_set.value.string);
// Get character from string (the emoji)
string_result_t res_get = string_get_at(concat_str, 14);
if (res_get.status != STRING_OK) {
printf("Error: %s\n", res_get.message);
return 1;
}
printf("Extracted symbol: \"%s\"\n", res_get.value.symbol);
free(res_get.value.symbol);
// Trim string
string_t *to_trim = string_new(" foo ").value.string;
string_result_t res_trim = string_trim(to_trim);
if (res_trim.status != STRING_OK) {
printf("Error: %s\n", res_trim.message);
return 1;
}
printf("Trimmed string: \"%s\"\n\n", res_trim.value.string->data);
string_destroy(to_trim);
string_destroy(res_trim.value.string);
// Split string
string_t *to_split = string_new("foo/bar/biz").value.string;
string_result_t res_split = string_split(to_split, "/");
if (res_split.status != STRING_OK) {
printf("Error: %s\n", res_split.message);
return 1;
}
const size_t count = res_split.value.split.count;
string_t **strings = res_split.value.split.strings;
printf("Original string: \"%s\"\nSplitted string: ", to_split->data);
for (size_t idx = 0; idx < count; idx++) {
printf("\"%s\" ", strings[idx]->data);
}
printf("\n");
string_split_destroy(strings, count);
string_destroy(to_split);
string_destroy(concat_str);
string_destroy(str1);
return 0;
}