Compare commits

12 Commits

Author SHA1 Message Date
7d95b32e52 Updated documentation
All checks were successful
clang-build / clang-build (push) Successful in 45s
gcc-build / gcc-build (push) Successful in 56s
2026-01-23 17:06:13 +01:00
78909ea0fe Migrating to Gitea CI
All checks were successful
clang-build / clang-build (push) Successful in 42s
gcc-build / gcc-build (push) Successful in 19s
2026-01-16 11:56:20 +01:00
912faca620 Added benchmark for String type 2026-01-12 15:58:42 +01:00
b1cca113ce Refactored string_set_at and added documentation 2026-01-12 14:16:07 +01:00
0f8378bf75 Completed unit tests for String type and added string_slice function 2026-01-09 12:14:30 +01:00
44e3dfa58d Added unit tests for String data type and updated CI 2026-01-08 16:29:34 +01:00
3b7e2dabc9 Fixed various bugs and added usage for String data type 2026-01-08 15:22:47 +01:00
d87ddaf45c Added string_{clone,trim,split,destroy,split_destroy} functions 2026-01-08 11:04:10 +01:00
4220229aa8 Added string_{to_lower,to_upper,reverse} functions 2026-01-07 16:07:58 +01:00
c7e2ca273e General refactoring 2026-01-07 11:08:53 +01:00
35f72ba139 Added string_{new,concat,substring,eq,get_at,set_at} implementations. 2026-01-07 09:58:51 +01:00
36616c30b5 Added String data type and its features 2025-12-23 15:48:20 +01:00
18 changed files with 1903 additions and 419 deletions

View File

@@ -16,7 +16,7 @@ jobs:
- name: Run unit tests
run: |
./test_vector && ./test_map && ./test_bigint
./test_vector && ./test_map && ./test_bigint && ./test_string
- name: Run benchmarks
run: |

View File

@@ -13,7 +13,7 @@ jobs:
- name: Run unit tests
run: |
./test_vector && ./test_map && ./test_bigint
./test_vector && ./test_map && ./test_bigint && ./test_string
- name: Run benchmarks
run: |

View File

@@ -17,14 +17,15 @@ TARGET = usage
TEST_V_TARGET = test_vector
TEST_M_TARGET = test_map
TEST_B_TARGET = test_bigint
TEST_S_TARGET = test_string
BENCH_TARGET = benchmark_datum
LIB_OBJS = $(OBJ_DIR)/vector.o $(OBJ_DIR)/map.o $(OBJ_DIR)/bigint.o
LIB_OBJS = $(OBJ_DIR)/vector.o $(OBJ_DIR)/map.o $(OBJ_DIR)/bigint.o $(OBJ_DIR)/string.o
PROG_OBJS = $(OBJ_DIR)/usage.o
.PHONY: all clean
all: $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(BENCH_TARGET)
all: $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(TEST_S_TARGET) $(BENCH_TARGET)
bench: $(BENCH_TARGET)
$(TARGET): $(PROG_OBJS) $(LIB_OBJS)
@@ -39,6 +40,9 @@ $(TEST_M_TARGET): $(OBJ_DIR)/test_map.o $(OBJ_DIR)/map.o
$(TEST_B_TARGET): $(OBJ_DIR)/test_bigint.o $(OBJ_DIR)/bigint.o $(OBJ_DIR)/vector.o
$(CC) $(CFLAGS) -o $@ $^
$(TEST_S_TARGET): $(OBJ_DIR)/test_string.o $(OBJ_DIR)/string.o
$(CC) $(CFLAGS) -o $@ $^
$(OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(OBJ_DIR)
$(CC) $(CFLAGS) -c -o $@ $<
@@ -52,7 +56,7 @@ $(OBJ_DIR):
mkdir -p $(OBJ_DIR)
# Benchmark rules
$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_OBJ_DIR)/bigint.o
$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_OBJ_DIR)/string.o
$(CC) $(BENCH_FLAGS) -o $@ $^
$(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR)
@@ -65,4 +69,4 @@ $(BENCH_OBJ_DIR):
mkdir -p $(BENCH_OBJ_DIR)
clean:
rm -rf $(OBJ_DIR) $(BENCH_OBJ_DIR) $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(BENCH_TARGET)
rm -rf $(OBJ_DIR) $(BENCH_OBJ_DIR) $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(TEST_S_TARGET) $(BENCH_TARGET)

View File

@@ -4,14 +4,16 @@
![](https://git.marcocetica.com/marco/datum/actions/workflows/gcc-build.yml/badge.svg)
![](https://git.marcocetica.com/marco/datum/actions/workflows/clang-build.yml/badge.svg)
</div>
Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond
the standard library. It currently features:
- [**Vector**](/docs/vector.md): a growable, contiguous array of homogenous generic data types;
- [**Map**](/docs/map.md): an associative array of generic heterogenous data types;
- [**BigInt**](/docs/bigint.md): a data type for arbitrary large integers.
- [**Map**](/docs/map.md): an associative array that handles generic heterogenous data types;
- [**BigInt**](/docs/bigint.md): a data type for arbitrary large integers;
- [**String**](/docs/string.md): an immutable string type with partial UTF-8 support.
## Usage
At its simplest, you can use this library as follows:
@@ -110,9 +112,9 @@ int main(void) {
#include "src/bigint.h"
/*
* Compile with: clang -O3 fact.c src/bigint.c src/vector.c -o fact
* Compile with: gcc -O3 main.c src/bigint.c src/vector.c
* Output: 20000! = 1819206320230345134827641...
* Time: 1.49s user 0.00s system 99% cpu 1.501 total
* Time: 4.01s user 0.00s system 99% cpu 4.021 total
*/
int main(void) {
const int n = 20000;
@@ -134,6 +136,39 @@ int main(void) {
}
```
### `String` usage:
```c
#include <stdio.h>
#include "src/string.h"
/*
* Compile with: gcc main.c src/string.c
* Output: Final string: "Hello,World,😀" Splitted: ["Hello" "World" "😀" ]
*/
int main(void) {
string_t *x = string_new(" Hello, ").value.string;
string_t *x_trm = string_trim(x).value.string;
string_t *y = string_new("😀,dlroW").value.string;
string_t *y_rev = string_reverse(y).value.string;
string_t *str = string_concat(x_trm, y_rev).value.string;
string_t **strings = string_split(str, ",").value.split.strings;
printf("Final string: \"%s\" Splitted: [", str->data);
for (int idx = 0; idx < 3; idx++) { printf("\"%s\" ", strings[idx]->data); }
printf("]\n");
string_split_destroy(strings, 3); string_destroy(str);
string_destroy(x); string_destroy(y);
string_destroy(x_trm); string_destroy(y_rev);
return 0;
}
```
For a more exhaustive example, refer to the `usage.c` file. There, you will find a program with proper error management
and a sample usage for every available method. To run it, first issue the following command:
@@ -145,7 +180,9 @@ This will compile the library as well as the `usage.c` file, the unit tests and
> [!NOTE]
> This project is primarily developed for learning purposes and was not created with industrial
> or production use in mind. As such, it is not intended to compete with any existing C library such as the
> or production use in mind. As such, it is not intended to compete with any existing C library.
> In particular, the big number implementation does not aim to match the design, the maturity and
> the performance of established solutions such as the
> GNU Multiple Precision Arithmetic Library (GMP).
## Documentation
@@ -162,14 +199,14 @@ $ ./test_bigint
```
## Benchmark
Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector` and the `Map` data structures. You can run it by issuing the following command:
Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector`, `Map` and the `String` data structures.
You can run it by issuing the following command:
```sh
$ make clean all CC=clang
$ ./benchmark_datum
omputing Vector average time...average time: 8 ms
Computing Map average time...average time: 53 ms
Computing BigInt average time...average time: 76 ms
Computing Vector average time...average time: 19 ms
Computing Map average time...average time: 55 ms
Computing String average time...average time: 24 ms
```

View File

@@ -1,3 +1,5 @@
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -6,7 +8,7 @@
#include "../src/vector.h"
#include "../src/map.h"
#include "../src/bigint.h"
#include "../src/string.h"
typedef void (*test_fn_t)(size_t iterations);
@@ -14,10 +16,10 @@ void test_vector(size_t iterations) {
vector_t *vec = vector_new(16, sizeof(int)).value.vector;
for (size_t idx = 0; idx < iterations; idx++) {
vector_push(vec, &idx);
vector_push(vec, &(int){idx});
}
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum
volatile uint64_t sum = 0;
for (size_t idx = 0; idx < iterations; idx++) {
const int *val = (int*)vector_get(vec, idx).value.element;
sum += *val;
@@ -39,7 +41,7 @@ void test_map(size_t iterations) {
map_add(map, key, (void*)value);
}
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum
volatile uint64_t sum = 0;
for (size_t idx = 0; idx < iterations; idx++) {
snprintf(key, sizeof(key), "key_%zu", idx);
@@ -51,7 +53,7 @@ void test_map(size_t iterations) {
for (size_t idx = 0; idx < map->capacity; idx++) {
snprintf(key, sizeof(key), "key_%zu", idx);
int *val = (int *)map_get(map, key).value.element;
int *val = (int*)map_get(map, key).value.element;
free(val);
map_remove(map, key);
@@ -60,59 +62,28 @@ void test_map(size_t iterations) {
map_destroy(map);
}
void test_bigint(size_t iterations) {
volatile uint64_t accumulator = 0;
void test_string(size_t iterations) {
volatile size_t total_len = 0;
for (size_t idx = 1; idx <= iterations; idx++) {
long long a_val = (long long)idx * 123456789LL;
long long b_val = (long long)idx * 17777LL;
for (size_t idx = 0; idx < iterations; idx++) {
string_t *str1 = string_new("hello").value.string;
string_t *str2 = string_new(" World").value.string;
bigint_result_t a_res = bigint_from_int(a_val);
bigint_result_t b_res = bigint_from_int(b_val);
string_result_t concat = string_concat(str1, str2);
string_result_t upper = string_to_upper(concat.value.string);
total_len += string_size(upper.value.string);
string_result_t needle = string_new("WORLD");
string_result_t contains = string_contains(upper.value.string, needle.value.string);
if (a_res.status != BIGINT_OK || b_res.status != BIGINT_OK) {
bigint_destroy(a_res.value.number);
bigint_destroy(b_res.value.number);
continue;
if (contains.value.idx >= 0) {
total_len += contains.value.idx;
}
bigint_t *a = a_res.value.number;
bigint_t *b = b_res.value.number;
// Addition
bigint_result_t add_res = bigint_add(a, b);
if (add_res.status == BIGINT_OK) {
vector_result_t v = vector_get(add_res.value.number->digits, 0);
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
bigint_destroy(add_res.value.number);
}
// Substraction
bigint_result_t sub_res = bigint_sub(a, b);
if (sub_res.status == BIGINT_OK) {
vector_result_t v = vector_get(sub_res.value.number->digits, 0);
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
bigint_destroy(sub_res.value.number);
}
// Multiplication
bigint_result_t mul_res = bigint_prod(a, b);
if (mul_res.status == BIGINT_OK) {
vector_result_t v = vector_get(mul_res.value.number->digits, 0);
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
bigint_destroy(mul_res.value.number);
}
// Division
bigint_result_t div_res = bigint_divmod(a, b);
if (div_res.status == BIGINT_OK) {
vector_result_t v = vector_get(div_res.value.division.quotient->digits, 0);
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
bigint_destroy(div_res.value.division.quotient);
bigint_destroy(div_res.value.division.remainder);
}
bigint_destroy(a); bigint_destroy(b);
string_destroy(str1);
string_destroy(str2);
string_destroy(concat.value.string);
string_destroy(upper.value.string);
string_destroy(needle.value.string);
}
}
@@ -141,7 +112,7 @@ int main(void) {
// Do a warmup run
test_vector(1000);
test_map(1000);
test_bigint(1000);
test_string(1000);
printf("Computing Vector average time...");
fflush(stdout);
@@ -151,9 +122,9 @@ int main(void) {
fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30));
printf("Computing BigInt average time...");
printf("Computing String average time...");
fflush(stdout);
printf("average time: %lld ms\n", benchmark(test_bigint, 1e5, 30));
printf("average time: %lld ms\n", benchmark(test_string, 1e5, 30));
return 0;
}

View File

@@ -7,4 +7,5 @@ At the time being, this documentation includes the following pages:
- [vector.md](vector.md): vector documentation;
- [map.md](map.md): map documentation;
- [bigint.md](bigint.md): bigint documentation.
- [bigint.md](bigint.md): bigint documentation;
- [string.md](string.md): string documentation.

View File

@@ -33,18 +33,17 @@ and the boolean `is_negative` variable denotes its sign.
The `BigInt` data structure supports the following methods:
- `bigint_result_t bigint_from_int(value)`: creates a big integer from a primitive `int` type;
- `bigint_result_t bigint_from_string(string_num)`: creates a big integer from a C string;
- `bigint_result_t bigint_to_string(number)`: converts a big integer to a C string;
- `bigint_result_t bigint_clone(number)`: clones a big integer;
- `bigint_result_t bigint_compare(x, y)`: compares two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively;
- `bigint_result_t bigint_add(x, y)`: adds two big integers together in $\mathcal{O}(n)$;
- `bigint_result_t bigint_sub(x, y)`: subtracts two big integers in $\mathcal{O}(n)$;
- `bigint_result_t bigint_prod(x, y)`: multiplies two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$;
- `bigint_result_t bigint_divmod(x, y)`: divides two big integers using _Knuth's Algorithm D_ in $\mathcal{O}(n \times m)$ where $n$ and $m$ are the number of base-10^9
parts/limbs in the divisor and the quotient, respectively. This method returns both the quotient and the remainder;
- `bigint_result_t bigint_mod(x, y)`: calls `bigint_divmod`, discards the quotient and yields the remainder;
- `bigint_result_t bigint_destroy(number)`: deletes the big number;
- `bigint_result_t bigint_from_int(value)`: create a big integer from a primitive `int` type;
- `bigint_result_t bigint_from_string(string_num)`: create a big integer from a C string;
- `bigint_result_t bigint_to_string(number)`: convert a big integer to a C string;
- `bigint_result_t bigint_clone(number)`: clone a big integer;
- `bigint_result_t bigint_compare(x, y)`: compare two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively;
- `bigint_result_t bigint_add(x, y)`: add two big integers together in $\mathcal{O}(n)$;
- `bigint_result_t bigint_sub(x, y)`: subtract two big integers in $\mathcal{O}(n)$;
- `bigint_result_t bigint_prod(x, y)`: multiply two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$;
- `bigint_result_t bigint_divmod(x, y)`: divide two big integers using *long division* algorithm in $\mathcal{O}(n^2)$, returning both the quotient and the remainder;
- `bigint_result_t bigint_mod(x, y)`: computes modulo of two big integers using *long division* algorithm in $\mathcal{O}(n^2)$;
- `bigint_result_t bigint_destroy(number)`: delete the big number;
- `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters.
As you can see from the previous function signatures, methods that operate on the
@@ -91,3 +90,12 @@ of them has an unique scope as described below:
- `compare_status`: result of `bigint_compare`;
- `string_num`: result of `bigint_to_string`.
> [!IMPORTANT]
> Currently, the division implementation employs a quadratic-time algorithm derived from the conventional _"grade school"_ long-division method.
> This approach performs adequately for integers of modest size (up to approximately 200 digits) but becomes highly inefficient when handling
> substantially larger integers (~1500 digits).
>
> Improving the efficiency of this algorithm would require further research into advanced
> numerical algorithms, which is something that I currently not inclined to pursue.

View File

@@ -37,12 +37,12 @@ free them before removing the keys or destroying the map.
The `Map` data structure supports the following methods:
- `map_result_t map_new()`: initializes a new map;
- `map_result_t map_add(map, key, value)`: adds a `(key, value)` pair to the map;
- `map_result_t map_get(map, key)`: retrieves a values indexed by `key` if it exists;
- `map_result_t map_remove(map, key)`: removes a key from the map if it exists;
- `map_result_t map_clear(map)`: resets the map state;
- `map_result_t map_destroy(map)`: deletes the map;
- `map_result_t map_new()`: initialize a new map;
- `map_result_t map_add(map, key, value)`: add a `(key, value)` pair to the map;
- `map_result_t map_get(map, key)`: retrieve a values indexed by `key` if it exists;
- `map_result_t map_remove(map, key)`: remove a key from the map if it exists;
- `map_result_t map_clear(map)`: reset the map state;
- `map_result_t map_destroy(map)`: delete the map;
- `size_t map_size(map)`: returns map size (i.e., the number of elements);
- `size_t map_capacity(map)`: returns map capacity (i.e., map total size).

96
docs/string.md Normal file
View File

@@ -0,0 +1,96 @@
# String Technical Details
In this document you can find a quick overview of the technical
aspects (internal design, memory layout, etc.) of the `String` data structure.
`String` is an immutable string data type with partial UTF-8 support.
This means that methods return a new string instance rather than modifying the string in-place.
Internally, this data structure is represented by the following layout:
```c
typedef struct {
char *data;
size_t byte_size;
size_t byte_capacity;
size_t char_count;
} string_t;
```
where the `data` variable represents the actual string (represented as a pointer to `char`),
the `byte_size` variable indicates the actual size (in bytes) of the string, the
`byte_capacity` variable represents the total number of allocated memory (in bytes) and the
`char_count` variable represent the number of logical characters, that is the number of
symbols.
As mentioned earlier, this library provides partial UTF-8 support. It is able to recognize
UTF-8 byte sequences as individual Unicode code points, which allows it to correctly distinguish
between byte length and character count. It fully supports Unicode symbols and emojis, while
remaining backward compatible with ASCII strings.
However, this data structure does not support localization. In particular, it does not perform
locale-aware conversion; for instance, uppercase/lowercase transformations are limited to ASCII
characters only. As a result, the German scharfes S (`ß`) is not convert to `SS`, the Spanish
`Ñ` is not converted to `ñ` and the Italian `é` (and its variants) is not treated as a single
symbol, but rather as a base letter combined with an accent.
At the time being, `String` supports the following methods:
- `string_result_t string_new(c_str)`: create a new string;
- `string_result_t string_clone(str)`: clone an existing string;
- `string_result_t string_concat(x, y)`: concatenate two strings together;
- `string_result_t string_contains(haystack, needle)`: search whether the `haystack` string contains `needle`;
- `string_result_t string_slice(str, start, end)`: return a slice (a new string) from `str` between `start` and `end` indices (inclusive);
- `string_result_t string_eq(x, y, case_sensitive)`: check whether `x` and `y` are equal;
- `string_result_t string_get_at(str, position)`: get the UTF-8 symbol indexed by `position` from `str`;
- `string_result_t string_set_at(str, position, utf8_char)`: write a UTF-8 symbol into `str` at index `position`;
- `string_result_t string_to_lower(str)`: convert a string to lowercase;
- `string_result_t string_to_upper(str)`: convert a string to uppercase;
- `string_result_t string_reverse(str)`: reverse a string;
- `string_result_t string_trim(str)`: remove leading and trailing white space from a string;
- `string_result_t string_split(str, delim)`: split a string into an array of `string_t` by specifying a separator;
- `string_result_t string_destroy(str)`: remove a string from memory;
- `string_result_t string_split_destroy(split, count)`: remove an array of strings from memory;
- `size_t string_size(str)`: return string character count.
As you can see from the previous function signatures, most methods that operate on the `String`
data type return a custom type called `string_result_t` which is defined as follows:
```c
typedef enum {
STRING_OK = 0x0,
STRING_ERR_ALLOCATE,
STRING_ERR_INVALID,
STRING_ERR_INVALID_UTF8,
STRING_ERR_OVERFLOW
} string_status_t;
typedef struct {
string_status_t status;
uint8_t message[RESULT_MSG_SIZE];
union {
string_t *string; // For new, clone, slice, reverse, trim
char *symbol; // For get_at
int64_t idx; // For contains
bool is_equ; // For comparison
struct { // For split
string_t **strings;
size_t count;
} split;
} value;
} string_result_t;
```
Each method that returns such type indicates whether the operation was successful or not
by setting the `status` field and by providing a descriptive message on the `message`
field. If the operation was successful (that is, `status == STRING_OK`) you can either
move on with the rest of your program or read the returned value from the sum data type.
Of course, you can choose to ignore the return value (if you're brave enough :D) as illustrated
on the first part of the README.
The sum data type (i.e., the `value` union) defines five different variables.
Each of them has an unique scope as described below:
- `string`: result of `new`, `clone`, `slice`, `reverse` and `trim` functions;
- `symbol`: result of `get_at` function;
- `idx`: result of `contains` function;
- `is_eq`: result of `equ` function. It's true when two strings are equal, false otherwise;
- `split`: result of `split` function. It contains an array of `string_t` and its number of elements.

View File

@@ -25,19 +25,19 @@ deletion.
At the time being, `Vector` supports the following methods:
- `vector_result_t vector_new(size, data_size)`: creates a new vector;
- `vector_result_t vector_push(vector, value)`: adds a new value to the vector;
- `vector_result_t vector_set(vector, index, value)`: updates the value of a given index if it exists;
- `vector_result_t vector_get(vector, index)`: returns the value indexed by `index` if it exists;
- `vector_result_t vector_sort(vector, cmp)`: sorts vector using `cmp` function;
- `vector_result_t vector_pop(vector)`: pops last element from the vector following the LIFO policy;
- `vector_result_t vector_map(vector, callback, env)`: applies `callback` function to vector (in-place);
- `vector_result_t vector_filter(vector, callback, env)`: filters vector using `callback` (in-place);
- `vector_result_t vector_reduce(vector, accumulator, callback, env)`: folds/reduces vector using `callback`;
- `vector_result_t vector_clear(vector)`: resets the vector logically. That is, new pushes will overwrite the memory;
- `vector_result_t vector_destroy(vector)`: deletes the vector;
- `size_t vector_size(vector)`: returns vector size (i.e., the number of elements);
- `size_t vector_capacity(vector)`: returns vector capacity (i.e., vector total size).
- `vector_result_t vector_new(size, data_size)`: create a new vector;
- `vector_result_t vector_push(vector, value)`: add a new value to the vector;
- `vector_result_t vector_set(vector, index, value)`: update the value of a given index if it exists;
- `vector_result_t vector_get(vector, index)`: return the value indexed by `index` if it exists;
- `vector_result_t vector_sort(vector, cmp)`: sort vector using `cmp` function;
- `vector_result_t vector_pop(vector)`: pop last element from the vector following the LIFO policy;
- `vector_result_t vector_map(vector, callback, env)`: apply `callback` function to vector (in-place);
- `vector_result_t vector_filter(vector, callback, env)`: filter vector using `callback` (in-place);
- `vector_result_t vector_reduce(vector, accumulator, callback, env)`: fold/reduce vector using `callback`;
- `vector_result_t vector_clear(vector)`: logically reset the vector. That is, new pushes will overwrite the memory;
- `vector_result_t vector_destroy(vector)`: delete the vector;
- `size_t vector_size(vector)`: return vector size (i.e., the number of elements);
- `size_t vector_capacity(vector)`: return vector capacity (i.e., vector total size).
As you can see from the previous function signatures, most methods that operate
on the `Vector` data type return a custom type called `vector_result_t` which is

View File

@@ -9,10 +9,6 @@
(result).message[RESULT_MSG_SIZE - 1] = '\0'; \
} while (0)
#define REMOVE(ptr) \
free(ptr); \
ptr = NULL
#define IS_DIGIT(c) ((c) >= '0') && ((c) <= '9')
#include <stdio.h>
@@ -23,6 +19,7 @@
#include "bigint.h"
#include "vector.h"
// Internal methods
/**
* bigint_trim_zeros
* @number: a non-null big integer
@@ -845,32 +842,30 @@ cleanup: // Destroy intermediate allocations on error
}
/**
* bigint_div
* @x: a non-null big integer acting as a dividend
* @y: a non-null big integer acting as a divisor
* bigint_dev
* @x: a valid non-null big integer (dividend)
* @y: a valid non-null big integer (divisor)
*
* Computers the quotient floor (i.e., |X| / |Y|) using Knuth's Algorithm D
* Adaoted from p. 273 of Don Knuth's TAoCP Vol. 2
* The complexity is O(n * m) where 'n' and 'm' are the number of base-10^9
* "parts" (the limbs in the code below) in the divisor and the quotient, respectively.
* Computes division using long division algorithm in O(n^2)
*
* Returns a bigint_result_t containing the quotient.
* The called of this function will be responsible for applying the sign.
* Returns a bigint_result_t data type
*/
static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
bigint_result_t result = {0};
bigint_result_t tmp_res = {0};
bigint_t *quotient = NULL;
long long *u = NULL, *v = NULL, *q = NULL;
bigint_t *remainder = NULL;
bigint_t *abs_y = NULL;
if (x == NULL || y == NULL) {
result.status = BIGINT_ERR_INVALID;
SET_MSG(result, "Invalid big integers");
SET_MSG(result, "Invalid big numbers");
return result;
}
// Check for division by zero
const size_t y_size = vector_size(y->digits);
if (y_size == 0) {
result.status = BIGINT_ERR_DIV_BY_ZERO;
@@ -880,16 +875,16 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
}
if (y_size == 1) {
vector_result_t y0_res = vector_get(y->digits, 0);
if (y0_res.status != VECTOR_OK) {
vector_result_t y_val_res = vector_get(y->digits, 0);
if (y_val_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
COPY_MSG(result, y0_res.message);
COPY_MSG(result, y_val_res.message);
return result;
}
int *y0 = (int *)y0_res.value.element;
if (*y0 == 0) {
int *y_val = (int*)y_val_res.value.element;
if (*y_val == 0) {
result.status = BIGINT_ERR_DIV_BY_ZERO;
SET_MSG(result, "Cannot divide by zero");
@@ -897,230 +892,94 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
}
}
// If |x| < |y| then result is zero
tmp_res = bigint_compare_abs(x, y);
if (tmp_res.status != BIGINT_OK) {
return tmp_res;
}
if (tmp_res.status != BIGINT_OK) { return tmp_res; }
if (tmp_res.value.compare_status < 0) {
return bigint_from_int(0);
}
tmp_res = bigint_from_int(0);
if (tmp_res.status != BIGINT_OK) { return tmp_res; }
const size_t x_size = vector_size(x->digits);
const size_t n = y_size;
const long long BASE = (long long)BIGINT_BASE;
quotient = malloc(sizeof(bigint_t));
if (quotient == NULL) {
result.status = BIGINT_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory for big integer");
goto cleanup;
}
quotient->digits = NULL;
quotient->is_negative = false;
// Single-limb divisor case. Here, we scan using 64-bit arithmetic in O(n)
if (y_size == 1) {
vector_result_t y0_res = vector_get(y->digits, 0);
if (y0_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
COPY_MSG(result, y0_res.message);
goto cleanup;
}
long long divisor = *(int *)y0_res.value.element;
vector_result_t vec_res = vector_new(x_size, sizeof(int));
if (vec_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_ALLOCATE;
COPY_MSG(result, vec_res.message);
goto cleanup;
}
quotient->digits = vec_res.value.vector;
long long remainder = 0;
for (int idx = (int)x_size - 1; idx >= 0; idx--) {
vector_result_t xidx_res = vector_get(x->digits, idx);
if (xidx_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
COPY_MSG(result, xidx_res.message);
goto cleanup;
}
long long current = remainder * BASE + *(int *)xidx_res.value.element;
int q_idx = (int)(current / divisor);
remainder = current % divisor;
vector_result_t push_res = vector_push(quotient->digits, &q_idx);
if (push_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
COPY_MSG(result, push_res.message);
goto cleanup;
}
}
// Restore the LSB-first order
const size_t q_size = vector_size(quotient->digits);
for (size_t lo = 0, hi = q_size - 1; lo < hi; hi--) {
vector_result_t lr = vector_get(quotient->digits, lo);
vector_result_t hr = vector_get(quotient->digits, hi);
if (lr.status != VECTOR_OK || hr.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
SET_MSG(result, "Failed to reverse quotient digits");
goto cleanup;
}
int lower_val = *(int *)lr.value.element;
int higher_val = *(int *)hr.value.element;
vector_set(quotient->digits, lo, &higher_val);
vector_set(quotient->digits, hi, &lower_val);
}
bigint_result_t trim_res = bigint_trim_zeros(quotient);
if (trim_res.status != BIGINT_OK) { result = trim_res; goto cleanup; }
result.value.number = quotient;
result.value.number = tmp_res.value.number;
result.status = BIGINT_OK;
SET_MSG(result, "Division between big integers was successful");
return result;
}
/* General case using Knuth's Algorithm
* First, some definitions:
* index 0 -> least significant limb;
* n -> limb count of divisor y
* m -> limb count of quotient (x_size - n)
* u[0 ... m + n] -> working copy of the (scaled) dividend +1 sentinel limb
* v[0 ... n - 1] -> working copy of the (scaled) divisor
* q[0 ... m] -> output quotient limbs
*/
const size_t m = x_size - n;
// Initialize quotient and remainder
tmp_res = bigint_from_int(0);
if (tmp_res.status != BIGINT_OK) { return tmp_res; }
quotient = tmp_res.value.number;
u = calloc(m + n + 1, sizeof(long long));
v = calloc(n, sizeof(long long));
q = calloc(m + 1, sizeof(long long));
tmp_res = bigint_from_int(0);
if (tmp_res.status != BIGINT_OK) { bigint_destroy(quotient); return tmp_res; }
remainder = tmp_res.value.number;
if (u == NULL || v == NULL || q == NULL) {
result.status = BIGINT_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate scratch arrays for division");
// Create absolute value of y for later comparisons
tmp_res = bigint_clone(y);
if (tmp_res.status != BIGINT_OK) {
bigint_destroy(quotient);
bigint_destroy(remainder);
goto cleanup;
return tmp_res;
}
for (size_t idx = 0; idx < x_size; idx++) {
vector_result_t get_res = vector_get(x->digits, idx);
if (get_res.status != VECTOR_OK) {
abs_y = tmp_res.value.number;
abs_y->is_negative = false;
// Long division algorithm applied from MSB to LSB
const size_t x_size = vector_size(x->digits);
for (int idx = (int)x_size - 1; idx >= 0; idx--) {
// Shift remainder left by one base digit (multiplication by BASE)
tmp_res = bigint_shift_left(remainder, 1);
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
bigint_t *shifted_remainder = tmp_res.value.number;
bigint_destroy(remainder);
remainder = shifted_remainder;
// Add current digit of 'x' to the least significant position of remainder
vector_result_t digit_res = vector_get(x->digits, idx);
if (digit_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
COPY_MSG(result, get_res.message);
COPY_MSG(result, digit_res.message);
goto cleanup;
}
u[idx] = *(int *)get_res.value.element;
}
int *x_digit = (int*)digit_res.value.element;
for (size_t idx = 0; idx < n; idx++) {
vector_result_t get_res = vector_get(y->digits, idx);
if (get_res.status != VECTOR_OK) {
vector_result_t set_res = vector_set(remainder->digits, 0, x_digit);
if (set_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
COPY_MSG(result, get_res.message);
COPY_MSG(result, set_res.message);
goto cleanup;
}
v[idx] = *(int *)get_res.value.element;
tmp_res = bigint_trim_zeros(remainder);
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
// COunt how many times 'y' fits into current remainder
size_t count = 0;
while (1) {
tmp_res = bigint_compare_abs(remainder, abs_y);
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
if (tmp_res.value.compare_status < 0) { break; } // remainder < abs_y
// remainder = remainder - abs_y
tmp_res = bigint_sub_abs(remainder, abs_y);
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
bigint_t *new_remainder = tmp_res.value.number;
bigint_destroy(remainder);
remainder = new_remainder;
count++;
}
// D1 (normalize): choose 'd' so that v[n - 1] >= BASE / 2 (after scaling)
const long long d = BASE / (v[n - 1] + 1);
long long carry = 0;
for (size_t idx = 0; idx < x_size; idx++) {
long long current = u[idx] * d + carry;
u[idx] = current % BASE;
carry = current / BASE;
}
u[x_size] = carry;
carry = 0;
for (size_t idx = 0; idx < n; idx++) {
long long current = v[idx] * d + carry;
v[idx] = current % BASE;
carry = current / BASE;
}
// D2-D6: the main loop. One iteration produces one quotient limb
for (long long j = (long long)m; j >= 0; j--) {
size_t jj = (size_t)j;
// D3: 2-by-1 trial quotient
long long two_limb = u[jj + n] * BASE + u[jj + n - 1];
long long q_hat = two_limb / v[n - 1];
long long r_hat = two_limb % v[n - 1];
while (q_hat >= BASE || ((n >= 2) && (q_hat * v[n - 2]) > (BASE * r_hat + u[jj + n - 2]))) {
q_hat--;
r_hat += v[n - 1];
if (r_hat >= BASE) { break; }
}
// D4: multiply-subtract u[j ... j + n] -= q_hat * v[0 ... n - 1]
long long borrow = 0;
for (size_t idx = 0; idx < n; idx++) {
long long product = q_hat * v[idx] + borrow;
borrow = product / BASE;
long long diff = u[jj + idx] - (product % BASE);
if (diff < 0) {
diff += BASE;
borrow++;
}
u[jj + idx] = diff;
}
u[jj + n] -= borrow;
// D5: store quotient digit
q[jj] = q_hat;
// D6: if 'u' went negative, add 'v' back once and decrement q[j]
if (u[jj + n] < 0) {
q[jj]--;
carry = 0;
for (size_t idx = 0; idx < n; idx++) {
long long sum = u[jj + idx] + v[idx] + carry;
u[jj + idx] = sum % BASE;
carry = sum / BASE;
}
u[jj + n] += carry;
}
}
// Delete working copy from memory
REMOVE(u); REMOVE(v);
// Build the bigint quotient from q[0 ... m] (index 0 = LSB)
vector_result_t vec_res = vector_new(m + 1, sizeof(int));
if (vec_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_ALLOCATE;
COPY_MSG(result, vec_res.message);
goto cleanup;
}
quotient->digits = vec_res.value.vector;
for (size_t idx = 0; idx <= m; idx++) {
int q_idx = (int)q[idx];
vector_result_t push_res = vector_push(quotient->digits, &q_idx);
// Add count to quotient digits
vector_result_t push_res = vector_push(quotient->digits, &count);
if (push_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
COPY_MSG(result, push_res.message);
@@ -1129,10 +988,34 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
}
}
REMOVE(q);
// Reverse quotient digits
const size_t q_size = vector_size(quotient->digits);
for (size_t idx = 0; idx < q_size / 2; idx++) {
vector_result_t left_res = vector_get(quotient->digits, idx);
vector_result_t right_res = vector_get(quotient->digits, q_size - 1 - idx);
bigint_result_t trim_res = bigint_trim_zeros(quotient);
if (trim_res.status != BIGINT_OK) { result = trim_res; goto cleanup; }
if (left_res.status != VECTOR_OK || right_res.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
SET_MSG(result, "Failed to access vector elements");
goto cleanup;
}
int *left = (int*)left_res.value.element;
int *right = (int*)right_res.value.element;
int temp = *left;
vector_set(quotient->digits, idx, right);
vector_set(quotient->digits, q_size - 1 - idx, &temp);
}
quotient->is_negative = (x->is_negative != y->is_negative);
tmp_res = bigint_trim_zeros(quotient);
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
bigint_destroy(remainder);
bigint_destroy(abs_y);
result.value.number = quotient;
result.status = BIGINT_OK;
@@ -1141,20 +1024,20 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
return result;
cleanup:
free(u); free(v); free(q);
if (quotient) { bigint_destroy(quotient); }
if (remainder) { bigint_destroy(remainder); }
if (abs_y) { bigint_destroy(abs_y); }
return result;
}
/**
* bigint_from_int
* @value: an integer value
*
* Takes an integer and convert it to a big integer
*
* Returns a bigint_result_t data type containing a new big integer
* Returns a big_int_result_t data type containing a new big integer
*/
bigint_result_t bigint_from_int(long long value) {
bigint_result_t result = {0};
@@ -1672,14 +1555,14 @@ bigint_result_t bigint_prod(const bigint_t *x, const bigint_t *y) {
return result;
}
/**
* bigint_divmod
* @x: a valid non-null big integer
* @y: a valid non-null big integer
*
* Computes truncated division with remainder. That is:
* quotient = trunc(x / y) sign = sign(x) XOR sign(y)
* remainder = x - y * quotient sign = sign(x)
* Computes division with remainder
*
* Returns a bigint_result_t data type
*/
@@ -1687,6 +1570,7 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
bigint_result_t result = {0};
bigint_result_t tmp_res = {0};
// Intermediate results
bigint_t *quotient = NULL;
bigint_t *y_times_q = NULL;
bigint_t *remainder = NULL;
@@ -1698,10 +1582,11 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
return result;
}
// Check for division by zero
const size_t y_size = vector_size(y->digits);
if (y_size == 0) {
result.status = BIGINT_ERR_DIV_BY_ZERO;
SET_MSG(result, "Cannot divide by zero");
SET_MSG(result, "Division by zero");
return result;
}
@@ -1715,16 +1600,16 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
return result;
}
int *y_val = (int *)y_val_res.value.element;
int *y_val = (int*)y_val_res.value.element;
if (*y_val == 0) {
result.status = BIGINT_ERR_DIV_BY_ZERO;
SET_MSG(result, "Cannot divide by zero");
SET_MSG(result, "Division by zero");
return result;
}
}
// |x| < |y|: quotient is 0, remainder is x
// |x| < |y| then quotient is 0 and remainder is x
tmp_res = bigint_compare_abs(x, y);
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
@@ -1739,7 +1624,6 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
result.value.division.quotient = quotient;
result.value.division.remainder = remainder;
result.status = BIGINT_OK;
SET_MSG(result, "Division between big integers was successful");
@@ -1750,10 +1634,7 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
quotient = tmp_res.value.number;
// Set quotient sign accordingly
quotient->is_negative = (x->is_negative != y->is_negative);
// Compute remainder using r = x - y * q
// Compute r = x - y * q
tmp_res = bigint_prod(y, quotient);
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
y_times_q = tmp_res.value.number;
@@ -1762,24 +1643,13 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
remainder = tmp_res.value.number;
// Ensure that remainder has correct sign (i.e., same as dividend x)
// In C-style division, sign(remainder) = sign(dividend)
remainder->is_negative = x->is_negative;
tmp_res = bigint_trim_zeros(remainder);
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
// Set remainder sign accordingly
vector_result_t r0 = vector_get(remainder->digits, 0);
if (r0.status != VECTOR_OK) {
result.status = BIGINT_ERR_INVALID;
COPY_MSG(result, r0.message);
goto cleanup;
}
bool rem_is_zero = (vector_size(remainder->digits) == 1 && *(int *)r0.value.element == 0);
if (!rem_is_zero) {
remainder->is_negative = x->is_negative;
}
result.value.division.quotient = quotient;
result.value.division.remainder = remainder;
result.status = BIGINT_OK;
@@ -1883,15 +1753,11 @@ bigint_result_t bigint_printf(const char *format, ...) {
// Process string char by char
for (const char *p = format; *p != '\0'; p++) {
if (*p == '%' && *(p + 1) != '%') {
p++;
const char placeholder = *p;
switch (placeholder) {
case 'B': {
if (*p == '%' && *(p + 1) == 'B') {
// Process a big number
bigint_t *num = va_arg(args, bigint_t*);
if (num == NULL) {
for (const char *s = "<invalid big integer>"; *s != '\0'; s++) { putchar(*s); }
printf("<invalid string>");
} else {
bigint_result_t num_str_res = bigint_to_string(num);
if (num_str_res.status != BIGINT_OK) {
@@ -1899,21 +1765,28 @@ bigint_result_t bigint_printf(const char *format, ...) {
return num_str_res;
}
char *number_str = num_str_res.value.string_num;
for (const char *s = number_str; *s != '\0'; s++) { putchar(*s); }
char* const number_str = num_str_res.value.string_num;
printf("%s", number_str);
free(number_str);
}
break;
}
p++;
} else if (*p == '%' && *(p + 1) != '%') {
// Handle common printf placeholders
p++;
char placeholder = *p;
switch (placeholder) {
case 'd':
case 'i': {
int val = va_arg(args, int);
printf("%d", val);
break;
}
case 'u': {
unsigned int val = va_arg(args, unsigned int);
printf("%u", val);
break;
}
case 'l': {
@@ -1933,17 +1806,13 @@ bigint_result_t bigint_printf(const char *format, ...) {
break;
}
case 's': {
char* val = va_arg(args, char*);
if (val) {
for (const char *s = val; *s != '\0'; s++) { putchar(*s); }
} else {
for (const char *s = "<invalid string>"; *s != '\0'; s++) { putchar(*s); }
}
char *val = va_arg(args, char*);
printf("%s", val ? val : "<invalid string>");
break;
}
case 'c': {
int val = va_arg(args, int);
putchar(val);
printf("%c", val);
break;
}
case 'f': {
@@ -1952,7 +1821,7 @@ bigint_result_t bigint_printf(const char *format, ...) {
break;
}
case 'p': {
void* const val = va_arg(args, void*);
void *val = va_arg(args, void*);
printf("%p", val);
break;
}

View File

@@ -10,6 +10,8 @@
#include "map.h"
// Internal methods
/**
* hash_key
* @key: The input string for the hash function

934
src/string.c Normal file
View File

@@ -0,0 +1,934 @@
#define SET_MSG(result, msg) \
do { \
snprintf((char *)(result).message, RESULT_MSG_SIZE, "%s", (const char *)msg); \
} while (0)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "string.h"
// Check if a character is a space
static inline bool is_space(unsigned char c) {
return (c == ' ' || c == '\t' ||
c == '\n' || c == '\r' ||
c == '\f' || c == '\v');
}
// Get byte length of a UTF-8 character/symbol
static inline int utf8_char_len(unsigned char byte) {
if ((byte & 0x80) == 0x00) return 1;
if ((byte & 0xE0) == 0xC0) return 2;
if ((byte & 0xF0) == 0xE0) return 3;
if ((byte & 0xF8) == 0xF0) return 4;
return -1;
}
// Validate an UTF-8 symbol
static bool utf8_is_char_valid(const char *utf8_char, int *out_len) {
if (utf8_char == NULL) {
return false;
}
size_t len = utf8_char_len((unsigned char)utf8_char[0]);
if (len <= 0) {
return false;
}
for (size_t idx = 1; idx < len; idx++) {
if ((utf8_char[idx] & 0xC0) != 0x80) {
return false;
}
}
if (utf8_char[len] != '\0') {
return false;
}
if (out_len) {
*out_len = len;
}
return true;
}
// Validate an UTF-8 symbol and measure byte length and character count in one pass
static bool utf8_scan(const char *str, size_t *out_byte_size, size_t *out_char_count) {
size_t b_size = 0;
size_t c_count = 0;
const unsigned char *p = (const unsigned char *)str;
while (p[b_size] != '\0') {
size_t len = utf8_char_len(p[b_size]);
if (len <= 0) {
return false;
}
for (size_t idx = 1; idx < len; idx++) {
if (p[b_size + idx] == '\0' || (p[b_size + idx] & 0xC0) != 0x80) {
return false;
}
}
b_size += len;
c_count++;
}
*out_byte_size = b_size;
*out_char_count = c_count;
return true;
}
// Decode an UTF-8 symbol to a codepoint
static uint32_t utf8_decode(const char *str, int *char_len) {
unsigned char byte = (unsigned char)*str;
*char_len = utf8_char_len(byte);
uint32_t result = 0;
switch (*char_len) {
case 1:
result = byte;
break;
case 2:
result = ((byte & 0x1F) << 6) |
(str[1] & 0x3F);
break;
case 3:
result = ((byte & 0x0F) << 12) |
((str[1] & 0x3F) << 6) |
(str[2] & 0x3F);
break;
case 4:
result = ((byte & 0x07) << 18) |
((str[1] & 0x3F) << 12) |
((str[2] & 0x3F) << 6) |
(str[3] & 0x3F);
break;
default:
result = 0;
break;
}
return result;
}
// Encode a codepoint to an UTF-8 symbol
static int utf8_encode(uint32_t codepoint, char *out) {
if (codepoint <= 0x7F) {
out[0] = (char)codepoint;
return 1;
}
if (codepoint <= 0x7FF) {
out[0] = (char)(0xC0 | (codepoint >> 6));
out[1] = (char)(0x80 | (codepoint & 0x3F));
return 2;
}
if (codepoint <= 0xFFFF) {
out[0] = (char)(0xE0 | (codepoint >> 12));
out[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
out[2] = (char)(0x80 | (codepoint & 0x3F));
return 3;
}
if (codepoint <= 0x10FFFF) {
out[0] = (char)(0xF0 | (codepoint >> 18));
out[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F));
out[2] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
out[3] = (char)(0x80 | (codepoint & 0x3F));
return 4;
}
return 0;
}
/**
* string_new
* @c_str: a C-string
*
* Returns a string_result_t containing a new String data type
*/
string_result_t string_new(const char *c_str) {
string_result_t result = {0};
if (c_str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid null input string");
return result;
}
size_t b_size, c_count;
if (utf8_scan(c_str, &b_size, &c_count) == 0) {
result.status = STRING_ERR_INVALID_UTF8;
SET_MSG(result, "Malformed UTF-8 sequence");
return result;
}
string_t *str = malloc(sizeof(string_t));
if (str == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
str->data = malloc(b_size + 1);
if (str->data == NULL) {
free(str);
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(str->data, c_str, b_size + 1);
str->byte_size = b_size;
str->byte_capacity = b_size + 1;
str->char_count = c_count;
result.status = STRING_OK;
SET_MSG(result, "String successfully created");
result.value.string = str;
return result;
}
/**
* string_clone
* @str: a non-null string
*
* Deep copies @str
*
* Returns a string_result_t containing the copied string
*/
string_result_t string_clone(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
string_t *str_copy = malloc(sizeof(string_t));
if (str_copy == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
str_copy->data = malloc(str->byte_size + 1);
if (str_copy->data == NULL) {
free(str_copy);
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(str_copy->data, str->data, str->byte_size + 1);
str_copy->byte_size = str->byte_size;
str_copy->byte_capacity = str->byte_size + 1;
str_copy->char_count = str->char_count;
result.status = STRING_OK;
result.value.string = str_copy;
SET_MSG(result, "String successfully copied");
return result;
}
/**
* string_concat
* @x: a non-null string
* @y: a non-null string
*
* Concats @x and @y in a new String
*
* Returns a string_result_t containing the new string
*/
string_result_t string_concat(const string_t *x, const string_t *y) {
string_result_t result = {0};
if (x == NULL || y == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid strings");
return result;
}
if (x->byte_size > SIZE_MAX - y->byte_size - 1) {
result.status = STRING_ERR_OVERFLOW;
SET_MSG(result, "Concatenation exceeds size limits");
return result;
}
size_t new_size = x->byte_size + y->byte_size;
char *buf = malloc(new_size + 1);
if (buf == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(buf, x->data, x->byte_size);
memcpy(buf + x->byte_size, y->data, y->byte_size);
buf[new_size] = '\0';
result = string_new(buf);
free(buf);
return result;
}
/**
* string_contains
* @haystack: a non-null string
* @needle: a non-null string
*
* Finds @needle on @haystack
*
* Returns a string_result_t containing the index to the beginning of the located string
* (if the substring has been found)
*/
string_result_t string_contains(const string_t *haystack, const string_t *needle) {
string_result_t result = {
.status = STRING_OK,
.value.idx = -1
};
if (haystack == NULL || needle == NULL || needle->byte_size == 0) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid substrings");
return result;
}
const char *found = strstr(haystack->data, needle->data);
if (found) {
size_t char_idx = 0;
const char *ptr = haystack->data;
while (ptr < found) {
ptr += utf8_char_len((unsigned char)*ptr);
char_idx++;
}
result.value.idx = (int64_t)char_idx;
SET_MSG(result, "Substring found");
} else {
SET_MSG(result, "Substring not found");
}
return result;
}
/**
* string_slice
* @str: a non-null string
* @start: the lower bound (inclusive)
* @end: the upper bound (inclusive)
*
* Extracts a slice from @str between @start and @end (inclusive)
*
* Returns a string_result_t data type containing the slice
*/
string_result_t string_slice(const string_t *str, size_t start, size_t end) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
if (start > end || end >= str->char_count) {
result.status = STRING_ERR_OVERFLOW;
SET_MSG(result, "Index out of bounds");
return result;
}
size_t start_byte_offset = 0;
for (size_t idx = 0; idx < start; idx++) {
start_byte_offset += utf8_char_len((unsigned char)str->data[start_byte_offset]);
}
size_t end_byte_offset = start_byte_offset;
for (size_t idx = start; idx <= end; idx++) {
end_byte_offset += utf8_char_len((unsigned char)str->data[end_byte_offset]);
}
const size_t slice_byte_size = end_byte_offset - start_byte_offset;
string_t *slice = malloc(sizeof(string_t));
if (slice == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
slice->data = malloc(slice_byte_size + 1);
if (slice->data == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(slice->data, str->data + start_byte_offset, slice_byte_size);
slice->data[slice_byte_size] = '\0';
slice->byte_size = slice_byte_size;
slice->byte_capacity = slice_byte_size + 1;
slice->char_count = end - start + 1;
result.status = STRING_OK;
result.value.string = slice;
SET_MSG(result, "String sliced successfully");
return result;
}
/**
* string_eq
* @x: a non-null string
* @y: a non-null string
* @case_sensitive: boolean value for case sensitive comparison
*
* Compares two Strings
*
* Returns a string_result_t containing the comparison result
*/
string_result_t string_eq(const string_t *x, const string_t *y, bool case_sensitive) {
string_result_t result = {
.status = STRING_OK,
.value.is_equ = false
};
if (x == NULL || y == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid strings");
return result;
}
if (x->char_count != y->char_count) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Strings differ in length");
return result;
}
if (case_sensitive) {
result.value.is_equ = (strcmp(x->data, y->data) == 0);
} else {
const char *p1 = x->data, *p2 = y->data;
while (*p1 && *p2) {
int l1, l2;
const uint32_t codepoint1 = utf8_decode(p1, &l1);
const uint32_t codepoint2 = utf8_decode(p2, &l2);
const uint32_t c1 = (codepoint1 >= 'A' && codepoint1 <= 'Z') ? codepoint1 + 32 : codepoint1;
const uint32_t c2 = (codepoint2 >= 'A' && codepoint2 <= 'Z') ? codepoint2 + 32 : codepoint2;
if (c1 != c2) {
result.value.is_equ = false;
return result;
}
p1 += l1;
p2 += l2;
}
result.value.is_equ = (*p1 == *p2);
}
SET_MSG(result, "Comparison completed successfully");
return result;
}
/**
* string_get_at
* @str: a non-null string
* @position: the position of the symbol to read
*
* Gets symbol indexed by @position from @str
*
* Returns a string_result_t containing the symbol as a C string
*/
string_result_t string_get_at(const string_t *str, size_t position) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
if (position >= str->char_count) {
result.status = STRING_ERR_OVERFLOW;
SET_MSG(result, "Index out of bounds");
return result;
}
const char *ptr = str->data;
for (size_t idx = 0; idx < position; idx++) {
ptr += utf8_char_len((unsigned char)*ptr);
}
int char_len = utf8_char_len((unsigned char)*ptr);
char *utf8_char = malloc(char_len + 1);
if (utf8_char == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(utf8_char, ptr, char_len);
utf8_char[char_len] = '\0';
result.value.symbol = utf8_char;
result.status = STRING_OK;
SET_MSG(result, "Symbol successfully retrieved");
return result;
}
/**
* string_set_at
* @str: a non-null string
* @position: the position to write into
* @utf8_char: an UTF8 symbol
*
* Writes @utf8_char into @str at index @position
*
* Returns a string_result_t data type
*/
string_result_t string_set_at(const string_t *str, size_t position, const char *utf8_char) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
int new_char_bytes;
if (utf8_is_char_valid(utf8_char, &new_char_bytes) == 0) {
result.status = STRING_ERR_INVALID_UTF8;
SET_MSG(result, "Invalid UTF-8 character");
return result;
}
if (position >= str->char_count) {
result.status = STRING_ERR_OVERFLOW;
SET_MSG(result, "Index out of bounds");
return result;
}
// Locate the byte offset of the character to replace
const char *pos = str->data;
for (size_t idx = 0; idx < position; idx++) {
pos += utf8_char_len((unsigned char)*pos);
}
const size_t prefix_len = pos - str->data;
const int old_char_bytes = utf8_char_len((unsigned char)*pos);
const size_t suffix_len = str->byte_size - prefix_len - old_char_bytes;
const size_t new_total_bytes = prefix_len + new_char_bytes + suffix_len;
string_t *new_str = malloc(sizeof(string_t));
if (new_str == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
new_str->data = malloc(new_total_bytes + 1);
if (new_str->data == NULL) {
free(new_str);
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
// Copy prefix data from original string
memcpy(new_str->data, str->data, prefix_len);
// Copy the new character at requested index
memcpy(new_str->data + prefix_len, utf8_char, new_char_bytes);
// Copy suffix data from the original string by skipping the overwritten character
memcpy(new_str->data + prefix_len + new_char_bytes, pos + old_char_bytes, suffix_len);
new_str->data[new_total_bytes] = '\0';
new_str->byte_size = new_total_bytes;
new_str->byte_capacity = new_total_bytes + 1;
new_str->char_count = str->char_count;
result.status = STRING_OK;
result.value.string = new_str;
SET_MSG(result, "Symbol successfully set");
return result;
}
/**
* string_to_lower
* @str: a non-null string
*
* Converts a String to lowercase
*
* Returns a string_result_t containing a new string
*/
string_result_t string_to_lower(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
char *buf = malloc(str->byte_capacity);
if (buf == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char *src = str->data;
char *dst = buf;
while (*src) {
int len;
uint32_t codepoint = utf8_decode(src, &len);
uint32_t lower = (codepoint >= 'A' && codepoint <= 'Z') ? codepoint + 32 : codepoint;
dst += utf8_encode(lower, dst);
src += len;
}
*dst = '\0';
result = string_new(buf);
free(buf);
result.status = STRING_OK;
SET_MSG(result, "String successfully converted to lowercase");
return result;
}
/**
* string_to_upper
* @str: a non-null string
*
* Converts a String to uppercase
*
* Returns a string_result_t containing a new string
*/
string_result_t string_to_upper(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
char *buf = malloc(str->byte_capacity);
if (buf == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char *src = str->data;
char *dst = buf;
while (*src) {
int len;
uint32_t codepoint = utf8_decode(src, &len);
uint32_t upper = (codepoint >= 'a' && codepoint <= 'z') ? codepoint - 32 : codepoint;
dst += utf8_encode(upper, dst);
src += len;
}
*dst = '\0';
result = string_new(buf);
free(buf);
result.status = STRING_OK;
SET_MSG(result, "String successfully converted to uppercase");
return result;
}
/**
* string_reverse
* @str: a non-null string
*
* Reverses @str
*
* Returns a new string_result_t containing the reversed string
*/
string_result_t string_reverse(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
char *buf = malloc(str->byte_capacity);
if (buf == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char **pos = malloc(str->char_count * sizeof(char *));
if (pos == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char *ptr = str->data;
for (size_t idx = 0; idx < str->char_count; idx++) {
pos[idx] = ptr;
ptr += utf8_char_len((unsigned char)*ptr);
}
char *dst = buf;
for (int64_t idx = (int64_t)str->char_count - 1; idx >= 0; idx--) {
int len = utf8_char_len((unsigned char)*pos[idx]);
memcpy(dst, pos[idx], len);
dst += len;
}
*dst = '\0';
free(pos);
result = string_new(buf);
free(buf);
SET_MSG(result, "String successfully reversed");
return result;
}
/**
* string_trim
* @str: a non-null string
*
* Trims whitespace from @str
*
* Returns a string_result_t containing the trimmed string
*/
string_result_t string_trim(const string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
const char *start = str->data;
while (*start && is_space((unsigned char)*start)) {
start++;
}
if (*start == '\0') {
return string_new("");
}
const char *end = str->data + str->byte_size - 1;
while (end > start && is_space((unsigned char)*end)) {
end--;
}
const size_t len = (end - start) + 1;
char *trimmed = malloc(len + 1);
if (trimmed == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
memcpy(trimmed, start, len);
trimmed[len] = '\0';
result = string_new(trimmed);
free(trimmed);
result.status = STRING_OK;
SET_MSG(result, "String successfully trimmed");
return result;
}
/**
* string_split
* @str: a non-null string
* @delim: delimiter string
*
* Splits @str by @delim
*
* Returns a string_result_t containing an array of String pointers
*/
string_result_t string_split(const string_t *str, const char *delim) {
string_result_t result = {0};
string_result_t tmp_res = {0};
if (str == NULL || delim == NULL || delim[0] == '\0') {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid strings");
return result;
}
const char *ptr = str->data;
const size_t delim_len = strlen(delim);
size_t count = 1;
while ((ptr = strstr(ptr, delim))) {
count++;
ptr += delim_len;
}
string_t **string_array = malloc(count * sizeof(string_t *));
if (string_array == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocate memory");
return result;
}
const char *start = str->data;
size_t idx = 0;
while ((ptr = strstr(start, delim))) {
const size_t part_len = ptr - start;
char *tmp = malloc(part_len + 1);
if (tmp == NULL) {
result.status = STRING_ERR_ALLOCATE;
SET_MSG(result, "Cannot allocated memory");
goto cleanup;
}
memcpy(tmp, start, part_len);
tmp[part_len] = '\0';
tmp_res = string_new(tmp);
free(tmp);
if (tmp_res.status != STRING_OK) { result = tmp_res; goto cleanup; }
string_array[idx++] = tmp_res.value.string;
start = ptr + delim_len;
}
tmp_res = string_new(start);
if (tmp_res.status != STRING_OK) { result = tmp_res; goto cleanup; }
string_array[idx] = tmp_res.value.string;
result.status = STRING_OK;
result.value.split.strings = string_array;
result.value.split.count = count;
SET_MSG(result, "String successfully split");
return result;
cleanup:
for (size_t j = 0; j < idx; j++) {
string_destroy(string_array[j]);
}
free(string_array);
return result;
}
/**
* string_destroy
* @str: a non-null string
*
* Destroys @str
*
* Returns a string_result_t data type
*/
string_result_t string_destroy(string_t *str) {
string_result_t result = {0};
if (str == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
free(str->data);
free(str);
result.status = STRING_OK;
SET_MSG(result, "String successfully deleted");
return result;
}
/**
* string_split_destory
* @split: an array of pointers of String
* @count: the number of elements
*
* Destroys the @split array of Strings
*
* Returns a string_result_t data type
*/
string_result_t string_split_destroy(string_t **split, size_t count) {
string_result_t result = {0};
if (split == NULL) {
result.status = STRING_ERR_INVALID;
SET_MSG(result, "Invalid string");
return result;
}
for (size_t idx = 0; idx < count; idx++) {
string_destroy(split[idx]);
}
free(split);
result.status = STRING_OK;
SET_MSG(result, "Array of strings successfully deleted");
return result;
}

70
src/string.h Normal file
View File

@@ -0,0 +1,70 @@
#ifndef STRING_H
#define STRING_H
#define RESULT_MSG_SIZE 64
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
typedef enum {
STRING_OK = 0x0,
STRING_ERR_ALLOCATE,
STRING_ERR_INVALID,
STRING_ERR_INVALID_UTF8,
STRING_ERR_OVERFLOW
} string_status_t;
typedef struct {
char *data;
size_t byte_size; // Size in bytes excluding NULL terminator
size_t byte_capacity; // Total allocated memory
size_t char_count; // Number of symbols
} string_t;
typedef struct {
string_status_t status;
uint8_t message[RESULT_MSG_SIZE];
union {
string_t *string; // For new, clone, slice, reverse, trim
char *symbol; // For get_at
int64_t idx; // For contains
bool is_equ; // For comparison
struct { // For split
string_t **strings;
size_t count;
} split;
} value;
} string_result_t;
#ifdef __cplusplus
extern "C" {
#endif
// Public APIs
string_result_t string_new(const char *c_str);
string_result_t string_clone(const string_t *str);
string_result_t string_concat(const string_t *x, const string_t *y);
string_result_t string_contains(const string_t *haystack, const string_t *needle);
string_result_t string_slice(const string_t *str, size_t start, size_t end);
string_result_t string_eq(const string_t *x, const string_t *y, bool case_sensitive);
string_result_t string_get_at(const string_t *str, size_t position);
string_result_t string_set_at(const string_t *str, size_t position, const char *utf8_char);
string_result_t string_to_lower(const string_t *str);
string_result_t string_to_upper(const string_t *str);
string_result_t string_reverse(const string_t *str);
string_result_t string_trim(const string_t *str);
string_result_t string_split(const string_t *str, const char *delim);
string_result_t string_destroy(string_t *str);
string_result_t string_split_destroy(string_t **split, size_t count);
// Inline methods
static inline size_t string_size(const string_t *str) {
return str ? str->char_count : 0;
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -9,6 +9,7 @@
#include "vector.h"
// Internal methods
/**
* vector_resize
* @vector: a non-null vector

View File

@@ -213,8 +213,8 @@ void test_bigint_prod_neg(void) {
bigint_destroy(prod.value.number);
}
// Test division between big numbers where divisor is a single limb big number
void test_bigint_div_single_limb(void) {
// Test division between big numbers
void test_bigint_div(void) {
bigint_result_t x = bigint_from_int(100);
bigint_result_t y = bigint_from_int(2);
@@ -229,33 +229,11 @@ void test_bigint_div_single_limb(void) {
bigint_eq(quotient, "50");
bigint_eq(remainder, "0");
bigint_destroy(quotient); bigint_destroy(remainder);
bigint_destroy(x.value.number); bigint_destroy(y.value.number);
}
bigint_destroy(quotient);
bigint_destroy(remainder);
// Test division between big numbers using Knuth's algorithm
void test_bigint_div_knuth(void) {
// (1...9) x 8
const char *x_origin = "123456789123456789123456789123456789123456789123456789123456789123456789";
// (9...1) x 5
const char *y_origin = "987654321987654321987654321987654321987654321";
bigint_result_t x = bigint_from_string(x_origin);
bigint_result_t y = bigint_from_string(y_origin);
assert(x.status == BIGINT_OK && y.status == BIGINT_OK);
bigint_result_t div = bigint_divmod(x.value.number, y.value.number);
assert(div.status == BIGINT_OK);
bigint_t* const quotient = div.value.division.quotient;
bigint_t* const remainder = div.value.division.remainder;
bigint_eq(quotient, "124999998860937500014238281");
bigint_eq(remainder, "246737799246737799370194588370194588370194588");
bigint_destroy(quotient); bigint_destroy(remainder);
bigint_destroy(x.value.number); bigint_destroy(y.value.number);
bigint_destroy(x.value.number);
bigint_destroy(y.value.number);
}
// Test division between big numbers with negative dividend
@@ -284,7 +262,7 @@ void test_bigint_div_dividend(void) {
// Test division between big numbers with negative divisor
// This library follows C-style divison such that sign(remainder) = sign(dividend)
void test_bigint_div_neg_divisor(void) {
void test_bigint_div_divisor(void) {
bigint_result_t x = bigint_from_int(13);
bigint_result_t y = bigint_from_int(-4);
@@ -427,10 +405,9 @@ int main(void) {
TEST(bigint_very_large_prod);
TEST(bigint_prod_mixed);
TEST(bigint_prod_neg);
TEST(bigint_div_single_limb);
TEST(bigint_div_knuth);
TEST(bigint_div);
TEST(bigint_div_dividend);
TEST(bigint_div_neg_divisor);
TEST(bigint_div_divisor);
TEST(bigint_div_neg);
TEST(bigint_div_by_zero);
TEST(bigint_clone);

329
tests/test_string.c Normal file
View File

@@ -0,0 +1,329 @@
/*
* Unit tests for String data type
*/
#define TEST(NAME) do { \
printf("Running test_%s...", #NAME); \
test_##NAME(); \
printf(" PASSED\n"); \
} while(0)
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "../src/string.h"
// Test string creation
void test_string_new(void) {
string_result_t res = string_new("hello");
assert(res.status == STRING_OK);
assert(res.value.string != NULL);
assert(strcmp(res.value.string->data, "hello") == 0);
assert(string_size(res.value.string) == 5);
assert(res.value.string->byte_size == 5);
string_destroy(res.value.string);
}
// Test empty string
void test_string_new_empty(void) {
string_result_t res = string_new("");
assert(res.status == STRING_OK);
assert(string_size(res.value.string) == 0);
assert(res.value.string->byte_size == 0);
assert(res.value.string->data[0] == '\0');
string_destroy(res.value.string);
}
// Test cloning an existing string
void test_string_clone(void) {
string_t *original = string_new("Original").value.string;
string_result_t res = string_clone(original);
assert(res.status == STRING_OK);
assert(res.value.string != original); // Different memory address
assert(strcmp(res.value.string->data, original->data) == 0);
assert(res.value.string->byte_size == original->byte_size);
string_destroy(original);
string_destroy(res.value.string);
}
// Test string concatenation
void test_string_concat(void) {
string_t *str1 = string_new("Foo").value.string;
string_t *str2 = string_new(" Bar").value.string;
string_result_t res = string_concat(str1, str2);
assert(res.status == STRING_OK);
assert(strcmp(res.value.string->data, "Foo Bar") == 0);
assert(string_size(res.value.string) == 7);
string_destroy(str1);
string_destroy(str2);
string_destroy(res.value.string);
}
// Test if string contains a substring
void test_string_contains(void) {
string_t *haystack = string_new("Hello 🌍 World").value.string;
string_t *needle_ascii = string_new("World").value.string;
string_t *needle_utf8 = string_new("🌍").value.string;
string_t *needle_none = string_new("not found").value.string;
// World starts at symbol 8
string_result_t res1 = string_contains(haystack, needle_ascii);
assert(res1.status == STRING_OK);
assert(res1.value.idx == 8);
// 🌍 is at position 6
string_result_t res2 = string_contains(haystack, needle_utf8);
assert(res2.status == STRING_OK);
assert(res2.value.idx == 6);
// Not found should return -1
string_result_t res3 = string_contains(haystack, needle_none);
assert(res3.status == STRING_OK);
assert(res3.value.idx == -1);
string_destroy(haystack);
string_destroy(needle_ascii);
string_destroy(needle_utf8);
string_destroy(needle_none);
}
// Test string slicing
void test_string_slice(void) {
// ASCII slice
string_t *str1 = string_new("foobar").value.string;
string_result_t res1 = string_slice(str1, 2, 4);
assert(res1.status == STRING_OK);
assert(strcmp(res1.value.string->data, "oba") == 0);
assert(res1.value.string->char_count == 3);
// UTF-8 slice
string_t *str2 = string_new("AB😆🌍").value.string;
string_result_t res2 = string_slice(str2, 2, 2);
assert(res2.status == STRING_OK);
assert(strcmp(res2.value.string->data, "😆") == 0);
assert(res2.value.string->byte_size == 4); // emoji = 4 bytes
// UTF-8 + ASCII slice
string_result_t res3 = string_slice(str2, 0, 2);
assert(res3.status == STRING_OK);
assert(strcmp(res3.value.string->data, "AB😆") == 0);
// Invalid bounds
string_result_t res4 = string_slice(str1, 5, 2);
assert(res4.status == STRING_ERR_OVERFLOW);
res4 = string_slice(str1, 1, 50);
assert(res4.status == STRING_ERR_OVERFLOW);
string_destroy(str1);
string_destroy(str2);
string_destroy(res1.value.string);
string_destroy(res2.value.string);
string_destroy(res3.value.string);
}
// Test case-insensitive and sensitive comparison
void test_string_eq(void) {
string_t *str1 = string_new("Foo").value.string;
string_t *str2 = string_new("foo").value.string;
// Case sensitive comparison should be false
assert(string_eq(str1, str2, true).value.is_equ == false);
// Case insensitive comparison should be true
assert(string_eq(str1, str2, false).value.is_equ == true);
string_destroy(str1);
string_destroy(str2);
}
// Test string reverse using UTF-8 symbols
void test_string_reverse_utf8(void) {
string_t *str = string_new("A🌍Z").value.string;
string_result_t res = string_reverse(str);
assert(res.status == STRING_OK);
assert(string_size(res.value.string) == 3);
assert(strcmp(res.value.string->data, "Z🌍A") == 0);
assert(string_size(res.value.string) == 3);
string_destroy(str);
string_destroy(res.value.string);
}
// Test string get_at
void test_string_get_at(void) {
string_t *str = string_new("AB😆🌍").value.string;
// 😆 is at index 2
string_result_t res1 = string_get_at(str, 2);
assert(res1.status == STRING_OK);
assert(strcmp((char*)res1.value.symbol, "😆") == 0);
free(res1.value.symbol);
// 🌍 is at index 3
string_result_t res2 = string_get_at(str, 3);
assert(res2.status == STRING_OK);
assert(strcmp((char*)res2.value.symbol, "🌍") == 0);
free(res2.value.symbol);
string_destroy(str);
}
// Test string get_at with invalid index
void test_string_get_at_overflow(void) {
string_t *str = string_new("ABC").value.string;
string_result_t res = string_get_at(str, 50);
assert(res.status == STRING_ERR_OVERFLOW);
string_destroy(str);
}
// Test mutation of UTF-8 symbol
void test_string_set_at(void) {
string_t *str = string_new("ABC").value.string;
// Replace 'B' with emoji
string_result_t res = string_set_at(str, 1, "😆");
string_t *altered = res.value.string;
assert(res.status == STRING_OK);
assert(strcmp(altered->data, "A😆C") == 0);
assert(string_size(altered) == 3);
assert(altered->byte_size == 6); // that is: A (1B) + emoji (4B) + C (1B)
string_destroy(str);
string_destroy(altered);
}
// Test mutation of invalid UTF-8 symbol
void test_string_set_at_invalid_utf8(void) {
string_t *str = string_new("ABC").value.string;
const char * const invalid_sym1 = "\xFF";
const char * const invalid_sym2 = "\x80";
string_result_t res1 = string_set_at(str, 1, invalid_sym1);
assert(res1.status == STRING_ERR_INVALID_UTF8);
string_result_t res2 = string_set_at(str, 1, invalid_sym2);
assert(res2.status == STRING_ERR_INVALID_UTF8);
string_destroy(str);
}
// Test mutation with overflow
void test_string_set_at_overflow(void) {
string_t *str = string_new("ABC").value.string;
string_result_t res = string_set_at(str, 10, "a");
assert(res.status == STRING_ERR_OVERFLOW);
string_destroy(str);
}
// Test string to lowercase
void test_string_to_lower(void) {
string_t *str = string_new("AbC").value.string;
string_result_t res = string_to_lower(str);
assert(res.status == STRING_OK);
assert(strcmp(res.value.string->data, "abc") == 0);
string_destroy(str);
string_destroy(res.value.string);
}
// Test string to uppercase
void test_string_to_upper(void) {
string_t *str = string_new("aBc").value.string;
string_result_t res = string_to_upper(str);
assert(res.status == STRING_OK);
assert(strcmp(res.value.string->data, "ABC") == 0);
string_destroy(str);
string_destroy(res.value.string);
}
// Test whitespace trimming
void test_string_trim(void) {
string_t *str = string_new(" \t Foo Bar \n ").value.string;
string_result_t res = string_trim(str);
assert(res.status == STRING_OK);
assert(strcmp(res.value.string->data, "Foo Bar") == 0);
string_destroy(str);
string_destroy(res.value.string);
}
// Test string splitting into an array
void test_string_split(void) {
string_t *str = string_new("Red,Green,Blue").value.string;
string_result_t res = string_split(str, ",");
assert(res.status == STRING_OK);
assert(res.value.split.count == 3);
const size_t count = res.value.split.count;
string_t **strings = res.value.split.strings;
const char *expected[] = { "Red", "Green", "Blue" };
for (size_t idx = 0; idx < count; idx++) {
assert(strcmp(strings[idx]->data, expected[idx]) == 0);
}
string_split_destroy(strings, count);
string_destroy(str);
}
// Test string destroy
void test_string_destroy(void) {
string_t *str = string_new("delete me").value.string;
string_result_t res = string_destroy(str);
assert(res.status == STRING_OK);
string_result_t res_null = string_destroy(NULL);
assert(res_null.status == STRING_ERR_INVALID);
}
int main(void) {
printf("=== Running String unit tests ===\n\n");
TEST(string_new);
TEST(string_new_empty);
TEST(string_clone);
TEST(string_concat);
TEST(string_contains);
TEST(string_slice);
TEST(string_eq);
TEST(string_reverse_utf8);
TEST(string_get_at);
TEST(string_get_at_overflow);
TEST(string_set_at);
TEST(string_set_at_overflow);
TEST(string_set_at_invalid_utf8);
TEST(string_to_lower);
TEST(string_to_upper);
TEST(string_trim);
TEST(string_split);
TEST(string_destroy);
printf("\n=== All tests passed! ===\n");
return 0;
}

187
usage.c
View File

@@ -25,10 +25,12 @@
#include "src/vector.h"
#include "src/map.h"
#include "src/bigint.h"
#include "src/string.h"
static int vector_usage(void);
static int map_usage(void);
static int bigint_usage(void);
static int string_usage(void);
static vector_order_t cmp_int_asc(const void *x, const void *y);
static vector_order_t cmp_int_desc(const void *x, const void *y);
@@ -52,6 +54,11 @@ int main(void) {
st = bigint_usage();
if (st) { return st; }
SEP(50);
st = string_usage();
if (st) { return st; }
return 0;
}
@@ -495,7 +502,7 @@ int bigint_usage(void) {
// Print result
bigint_printf("multiplication result = %B\n", prod);
bigint_t *a = bigint_from_string(large_x).value.number;
bigint_t *a = bigint_from_string(x_origin).value.number;
bigint_t *b = bigint_from_string(y_origin).value.number;
// Divide two big integers
@@ -524,3 +531,181 @@ int bigint_usage(void) {
return 0;
}
int string_usage(void) {
// Create a new string
string_result_t res = string_new("Hello, ");
if (res.status != STRING_OK) {
printf("Error: %s\n", res.message);
return 1;
}
string_t *str1 = res.value.string;
printf("Created string: \"%s\"\n", str1->data);
printf("Character count: %zu (%zu actual bytes)\n", string_size(str1), str1->byte_size);
string_result_t res_clone = string_clone(str1);
if (res_clone.status != STRING_OK) {
printf("Error: %s\n", res.message);
return 1;
}
string_t *cloned = res_clone.value.string;
printf("Cloned string: \"%s\"\n\n", cloned->data);
string_destroy(cloned);
// Concatenation of strings
string_result_t res_suffix = string_new("World! 🦜");
if (res_suffix.status != STRING_OK) {
printf("Error: %s\n", res.message);
return 1;
}
string_t *suffix = res_suffix.value.string;
printf("Created another string: \"%s\"\n", suffix->data);
printf("Character count: %zu (%zu actual bytes)\n\n", string_size(suffix), suffix->byte_size);
string_result_t res_cat = string_concat(str1, suffix);
if (res_cat.status != STRING_OK) {
printf("Error: %s\n", res_cat.message);
return 1;
}
string_destroy(suffix);
string_t *concat_str = res_cat.value.string;
printf("Concatenation result: \"%s\"\n\n", concat_str->data);
// String contains
string_t *haystack = string_new("The quick brown fox jumps over the lazy dog.").value.string;
string_t *needle = string_new("brown fox").value.string;
string_result_t res_contains = string_contains(haystack, needle);
if (res_contains.status != STRING_OK) {
printf("Error: %s\n", res_contains.message);
return 1;
}
if (res_contains.value.idx != -1) {
printf("Substring found. Starting at index %zu\n\n", res_contains.value.idx);
}
string_destroy(haystack);
string_destroy(needle);
// String slicing
string_result_t res_slice = string_slice(concat_str, 7, 14);
if (res_slice.status != STRING_OK) {
printf("Error: %s\n", res_slice.message);
return 1;
}
printf("Slice of string: \"%s\"\n\n", res_slice.value.string->data);
string_destroy(res_slice.value.string);
// String equality
string_t *compare = string_new("hello, World! 🦜").value.string;
string_result_t res_eq = string_eq(concat_str, compare, true);
if (res_eq.value.is_equ) {
printf("The two strings are equal\n\n");
} else {
printf("The two strings are not equal\n\n");
}
string_destroy(compare);
// Uppercase string
string_result_t res_upper = string_to_upper(concat_str);
if (res_upper.status != STRING_OK) {
printf("Error: %s\n", res_upper.message);
return 1;
}
printf("Uppercase: \"%s\"\n", res_upper.value.string->data);
string_destroy(res_upper.value.string);
// Lowercase string
string_result_t res_lower = string_to_lower(concat_str);
if (res_lower.status != STRING_OK) {
printf("Error: %s\n", res_lower.message);
return 1;
}
printf("Lowercase: \"%s\"\n\n", res_lower.value.string->data);
string_destroy(res_lower.value.string);
// Reverse string
string_result_t res_rev = string_reverse(concat_str);
if (res_rev.status != STRING_OK) {
printf("Error: %s\n", res_rev.message);
return 1;
}
printf("Reversed: \"%s\"\n\n", res_rev.value.string->data);
string_destroy(res_rev.value.string);
// Change first character of the string
string_result_t res_set = string_set_at(concat_str, 0, "J");
if (res_set.status != STRING_OK) {
printf("Error: %s\n", res_set.message);
return 1;
}
printf("Updated string: \"%s\"\n\n", res_set.value.string->data);
string_destroy(res_set.value.string);
// Get character from string (the emoji)
string_result_t res_get = string_get_at(concat_str, 14);
if (res_get.status != STRING_OK) {
printf("Error: %s\n", res_get.message);
return 1;
}
printf("Extracted symbol: \"%s\"\n", res_get.value.symbol);
free(res_get.value.symbol);
// Trim string
string_t *to_trim = string_new(" foo ").value.string;
string_result_t res_trim = string_trim(to_trim);
if (res_trim.status != STRING_OK) {
printf("Error: %s\n", res_trim.message);
return 1;
}
printf("Trimmed string: \"%s\"\n\n", res_trim.value.string->data);
string_destroy(to_trim);
string_destroy(res_trim.value.string);
// Split string
string_t *to_split = string_new("foo/bar/biz").value.string;
string_result_t res_split = string_split(to_split, "/");
if (res_split.status != STRING_OK) {
printf("Error: %s\n", res_split.message);
return 1;
}
const size_t count = res_split.value.split.count;
string_t **strings = res_split.value.split.strings;
printf("Original string: \"%s\"\nSplitted string: ", to_split->data);
for (size_t idx = 0; idx < count; idx++) {
printf("\"%s\" ", strings[idx]->data);
}
printf("\n");
string_split_destroy(strings, count);
string_destroy(to_split);
string_destroy(concat_str);
string_destroy(str1);
return 0;
}