Compare commits
6 Commits
7d95b32e52
...
fast_div_p
| Author | SHA1 | Date | |
|---|---|---|---|
|
40d343c02b
|
|||
|
eb670e26a5
|
|||
|
a02f2dff40
|
|||
|
ea9ef9de4b
|
|||
|
dd6e7a9c9e
|
|||
|
6cd90467c6
|
@@ -16,7 +16,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
run: |
|
run: |
|
||||||
./test_vector && ./test_map && ./test_bigint && ./test_string
|
./test_vector && ./test_map && ./test_bigint
|
||||||
|
|
||||||
- name: Run benchmarks
|
- name: Run benchmarks
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
run: |
|
run: |
|
||||||
./test_vector && ./test_map && ./test_bigint && ./test_string
|
./test_vector && ./test_map && ./test_bigint
|
||||||
|
|
||||||
- name: Run benchmarks
|
- name: Run benchmarks
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
14
Makefile
14
Makefile
@@ -1,7 +1,7 @@
|
|||||||
CC = gcc
|
CC = gcc
|
||||||
CFLAGS = -Wall -Wextra -Werror -pedantic-errors -fstack-protector-strong \
|
CFLAGS = -Wall -Wextra -Werror -pedantic-errors -fstack-protector-strong \
|
||||||
-fsanitize=address -fsanitize=undefined -fstack-clash-protection \
|
-fsanitize=address -fsanitize=undefined -fstack-clash-protection \
|
||||||
-Wwrite-strings -g -std=c99
|
-Wwrite-strings -g -std=c99
|
||||||
|
|
||||||
BENCH_FLAGS = -Wall -Wextra -Werror -O3
|
BENCH_FLAGS = -Wall -Wextra -Werror -O3
|
||||||
|
|
||||||
@@ -17,15 +17,14 @@ TARGET = usage
|
|||||||
TEST_V_TARGET = test_vector
|
TEST_V_TARGET = test_vector
|
||||||
TEST_M_TARGET = test_map
|
TEST_M_TARGET = test_map
|
||||||
TEST_B_TARGET = test_bigint
|
TEST_B_TARGET = test_bigint
|
||||||
TEST_S_TARGET = test_string
|
|
||||||
BENCH_TARGET = benchmark_datum
|
BENCH_TARGET = benchmark_datum
|
||||||
|
|
||||||
LIB_OBJS = $(OBJ_DIR)/vector.o $(OBJ_DIR)/map.o $(OBJ_DIR)/bigint.o $(OBJ_DIR)/string.o
|
LIB_OBJS = $(OBJ_DIR)/vector.o $(OBJ_DIR)/map.o $(OBJ_DIR)/bigint.o
|
||||||
PROG_OBJS = $(OBJ_DIR)/usage.o
|
PROG_OBJS = $(OBJ_DIR)/usage.o
|
||||||
|
|
||||||
.PHONY: all clean
|
.PHONY: all clean
|
||||||
|
|
||||||
all: $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(TEST_S_TARGET) $(BENCH_TARGET)
|
all: $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(BENCH_TARGET)
|
||||||
bench: $(BENCH_TARGET)
|
bench: $(BENCH_TARGET)
|
||||||
|
|
||||||
$(TARGET): $(PROG_OBJS) $(LIB_OBJS)
|
$(TARGET): $(PROG_OBJS) $(LIB_OBJS)
|
||||||
@@ -40,9 +39,6 @@ $(TEST_M_TARGET): $(OBJ_DIR)/test_map.o $(OBJ_DIR)/map.o
|
|||||||
$(TEST_B_TARGET): $(OBJ_DIR)/test_bigint.o $(OBJ_DIR)/bigint.o $(OBJ_DIR)/vector.o
|
$(TEST_B_TARGET): $(OBJ_DIR)/test_bigint.o $(OBJ_DIR)/bigint.o $(OBJ_DIR)/vector.o
|
||||||
$(CC) $(CFLAGS) -o $@ $^
|
$(CC) $(CFLAGS) -o $@ $^
|
||||||
|
|
||||||
$(TEST_S_TARGET): $(OBJ_DIR)/test_string.o $(OBJ_DIR)/string.o
|
|
||||||
$(CC) $(CFLAGS) -o $@ $^
|
|
||||||
|
|
||||||
$(OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(OBJ_DIR)
|
$(OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(OBJ_DIR)
|
||||||
$(CC) $(CFLAGS) -c -o $@ $<
|
$(CC) $(CFLAGS) -c -o $@ $<
|
||||||
|
|
||||||
@@ -56,7 +52,7 @@ $(OBJ_DIR):
|
|||||||
mkdir -p $(OBJ_DIR)
|
mkdir -p $(OBJ_DIR)
|
||||||
|
|
||||||
# Benchmark rules
|
# Benchmark rules
|
||||||
$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_OBJ_DIR)/string.o
|
$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_OBJ_DIR)/bigint.o
|
||||||
$(CC) $(BENCH_FLAGS) -o $@ $^
|
$(CC) $(BENCH_FLAGS) -o $@ $^
|
||||||
|
|
||||||
$(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR)
|
$(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR)
|
||||||
@@ -69,4 +65,4 @@ $(BENCH_OBJ_DIR):
|
|||||||
mkdir -p $(BENCH_OBJ_DIR)
|
mkdir -p $(BENCH_OBJ_DIR)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf $(OBJ_DIR) $(BENCH_OBJ_DIR) $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(TEST_S_TARGET) $(BENCH_TARGET)
|
rm -rf $(OBJ_DIR) $(BENCH_OBJ_DIR) $(TARGET) $(TEST_V_TARGET) $(TEST_M_TARGET) $(TEST_B_TARGET) $(BENCH_TARGET)
|
||||||
|
|||||||
57
README.md
57
README.md
@@ -4,16 +4,14 @@
|
|||||||
|
|
||||||

|

|
||||||

|

|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond
|
Datum is a collection of dynamic and generic data structures implemented from scratch in C with no external dependencies beyond
|
||||||
the standard library. It currently features:
|
the standard library. It currently features:
|
||||||
|
|
||||||
- [**Vector**](/docs/vector.md): a growable, contiguous array of homogenous generic data types;
|
- [**Vector**](/docs/vector.md): a growable, contiguous array of homogenous generic data types;
|
||||||
- [**Map**](/docs/map.md): an associative array that handles generic heterogenous data types;
|
- [**Map**](/docs/map.md): an associative array of generic heterogenous data types;
|
||||||
- [**BigInt**](/docs/bigint.md): a data type for arbitrary large integers;
|
- [**BigInt**](/docs/bigint.md): a data type for arbitrary large integers.
|
||||||
- [**String**](/docs/string.md): an immutable string type with partial UTF-8 support.
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
At its simplest, you can use this library as follows:
|
At its simplest, you can use this library as follows:
|
||||||
@@ -112,9 +110,9 @@ int main(void) {
|
|||||||
#include "src/bigint.h"
|
#include "src/bigint.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compile with: gcc -O3 main.c src/bigint.c src/vector.c
|
* Compile with: clang -O3 fact.c src/bigint.c src/vector.c -o fact
|
||||||
* Output: 20000! = 1819206320230345134827641...
|
* Output: 20000! = 1819206320230345134827641...
|
||||||
* Time: 4.01s user 0.00s system 99% cpu 4.021 total
|
* Time: 1.49s user 0.00s system 99% cpu 1.501 total
|
||||||
*/
|
*/
|
||||||
int main(void) {
|
int main(void) {
|
||||||
const int n = 20000;
|
const int n = 20000;
|
||||||
@@ -136,39 +134,6 @@ int main(void) {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### `String` usage:
|
|
||||||
```c
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include "src/string.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compile with: gcc main.c src/string.c
|
|
||||||
* Output: Final string: "Hello,World,😀" Splitted: ["Hello" "World" "😀" ]
|
|
||||||
*/
|
|
||||||
int main(void) {
|
|
||||||
string_t *x = string_new(" Hello, ").value.string;
|
|
||||||
string_t *x_trm = string_trim(x).value.string;
|
|
||||||
|
|
||||||
string_t *y = string_new("😀,dlroW").value.string;
|
|
||||||
string_t *y_rev = string_reverse(y).value.string;
|
|
||||||
|
|
||||||
string_t *str = string_concat(x_trm, y_rev).value.string;
|
|
||||||
string_t **strings = string_split(str, ",").value.split.strings;
|
|
||||||
|
|
||||||
printf("Final string: \"%s\" Splitted: [", str->data);
|
|
||||||
for (int idx = 0; idx < 3; idx++) { printf("\"%s\" ", strings[idx]->data); }
|
|
||||||
printf("]\n");
|
|
||||||
|
|
||||||
string_split_destroy(strings, 3); string_destroy(str);
|
|
||||||
string_destroy(x); string_destroy(y);
|
|
||||||
string_destroy(x_trm); string_destroy(y_rev);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
For a more exhaustive example, refer to the `usage.c` file. There, you will find a program with proper error management
|
For a more exhaustive example, refer to the `usage.c` file. There, you will find a program with proper error management
|
||||||
and a sample usage for every available method. To run it, first issue the following command:
|
and a sample usage for every available method. To run it, first issue the following command:
|
||||||
|
|
||||||
@@ -180,9 +145,7 @@ This will compile the library as well as the `usage.c` file, the unit tests and
|
|||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> This project is primarily developed for learning purposes and was not created with industrial
|
> This project is primarily developed for learning purposes and was not created with industrial
|
||||||
> or production use in mind. As such, it is not intended to compete with any existing C library.
|
> or production use in mind. As such, it is not intended to compete with any existing C library such as the
|
||||||
> In particular, the big number implementation does not aim to match the design, the maturity and
|
|
||||||
> the performance of established solutions such as the
|
|
||||||
> GNU Multiple Precision Arithmetic Library (GMP).
|
> GNU Multiple Precision Arithmetic Library (GMP).
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
@@ -199,14 +162,14 @@ $ ./test_bigint
|
|||||||
```
|
```
|
||||||
|
|
||||||
## Benchmark
|
## Benchmark
|
||||||
Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector`, `Map` and the `String` data structures.
|
Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector` and the `Map` data structures. You can run it by issuing the following command:
|
||||||
You can run it by issuing the following command:
|
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
$ make clean all CC=clang
|
||||||
$ ./benchmark_datum
|
$ ./benchmark_datum
|
||||||
Computing Vector average time...average time: 19 ms
|
omputing Vector average time...average time: 8 ms
|
||||||
Computing Map average time...average time: 55 ms
|
Computing Map average time...average time: 53 ms
|
||||||
Computing String average time...average time: 24 ms
|
Computing BigInt average time...average time: 76 ms
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
#define _POSIX_C_SOURCE 200809L
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
@@ -8,7 +6,7 @@
|
|||||||
|
|
||||||
#include "../src/vector.h"
|
#include "../src/vector.h"
|
||||||
#include "../src/map.h"
|
#include "../src/map.h"
|
||||||
#include "../src/string.h"
|
#include "../src/bigint.h"
|
||||||
|
|
||||||
typedef void (*test_fn_t)(size_t iterations);
|
typedef void (*test_fn_t)(size_t iterations);
|
||||||
|
|
||||||
@@ -16,10 +14,10 @@ void test_vector(size_t iterations) {
|
|||||||
vector_t *vec = vector_new(16, sizeof(int)).value.vector;
|
vector_t *vec = vector_new(16, sizeof(int)).value.vector;
|
||||||
|
|
||||||
for (size_t idx = 0; idx < iterations; idx++) {
|
for (size_t idx = 0; idx < iterations; idx++) {
|
||||||
vector_push(vec, &(int){idx});
|
vector_push(vec, &idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
volatile uint64_t sum = 0;
|
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum
|
||||||
for (size_t idx = 0; idx < iterations; idx++) {
|
for (size_t idx = 0; idx < iterations; idx++) {
|
||||||
const int *val = (int*)vector_get(vec, idx).value.element;
|
const int *val = (int*)vector_get(vec, idx).value.element;
|
||||||
sum += *val;
|
sum += *val;
|
||||||
@@ -41,7 +39,7 @@ void test_map(size_t iterations) {
|
|||||||
map_add(map, key, (void*)value);
|
map_add(map, key, (void*)value);
|
||||||
}
|
}
|
||||||
|
|
||||||
volatile uint64_t sum = 0;
|
volatile uint64_t sum = 0; // prevent the compiler from optimizing away the sum
|
||||||
for (size_t idx = 0; idx < iterations; idx++) {
|
for (size_t idx = 0; idx < iterations; idx++) {
|
||||||
snprintf(key, sizeof(key), "key_%zu", idx);
|
snprintf(key, sizeof(key), "key_%zu", idx);
|
||||||
|
|
||||||
@@ -53,7 +51,7 @@ void test_map(size_t iterations) {
|
|||||||
for (size_t idx = 0; idx < map->capacity; idx++) {
|
for (size_t idx = 0; idx < map->capacity; idx++) {
|
||||||
snprintf(key, sizeof(key), "key_%zu", idx);
|
snprintf(key, sizeof(key), "key_%zu", idx);
|
||||||
|
|
||||||
int *val = (int*)map_get(map, key).value.element;
|
int *val = (int *)map_get(map, key).value.element;
|
||||||
free(val);
|
free(val);
|
||||||
|
|
||||||
map_remove(map, key);
|
map_remove(map, key);
|
||||||
@@ -62,28 +60,59 @@ void test_map(size_t iterations) {
|
|||||||
map_destroy(map);
|
map_destroy(map);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_string(size_t iterations) {
|
void test_bigint(size_t iterations) {
|
||||||
volatile size_t total_len = 0;
|
volatile uint64_t accumulator = 0;
|
||||||
|
|
||||||
for (size_t idx = 0; idx < iterations; idx++) {
|
for (size_t idx = 1; idx <= iterations; idx++) {
|
||||||
string_t *str1 = string_new("hello").value.string;
|
long long a_val = (long long)idx * 123456789LL;
|
||||||
string_t *str2 = string_new(" World").value.string;
|
long long b_val = (long long)idx * 17777LL;
|
||||||
|
|
||||||
string_result_t concat = string_concat(str1, str2);
|
bigint_result_t a_res = bigint_from_int(a_val);
|
||||||
string_result_t upper = string_to_upper(concat.value.string);
|
bigint_result_t b_res = bigint_from_int(b_val);
|
||||||
total_len += string_size(upper.value.string);
|
|
||||||
string_result_t needle = string_new("WORLD");
|
|
||||||
string_result_t contains = string_contains(upper.value.string, needle.value.string);
|
|
||||||
|
|
||||||
if (contains.value.idx >= 0) {
|
if (a_res.status != BIGINT_OK || b_res.status != BIGINT_OK) {
|
||||||
total_len += contains.value.idx;
|
bigint_destroy(a_res.value.number);
|
||||||
|
bigint_destroy(b_res.value.number);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
string_destroy(str1);
|
bigint_t *a = a_res.value.number;
|
||||||
string_destroy(str2);
|
bigint_t *b = b_res.value.number;
|
||||||
string_destroy(concat.value.string);
|
|
||||||
string_destroy(upper.value.string);
|
// Addition
|
||||||
string_destroy(needle.value.string);
|
bigint_result_t add_res = bigint_add(a, b);
|
||||||
|
if (add_res.status == BIGINT_OK) {
|
||||||
|
vector_result_t v = vector_get(add_res.value.number->digits, 0);
|
||||||
|
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
|
||||||
|
bigint_destroy(add_res.value.number);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Substraction
|
||||||
|
bigint_result_t sub_res = bigint_sub(a, b);
|
||||||
|
if (sub_res.status == BIGINT_OK) {
|
||||||
|
vector_result_t v = vector_get(sub_res.value.number->digits, 0);
|
||||||
|
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
|
||||||
|
bigint_destroy(sub_res.value.number);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiplication
|
||||||
|
bigint_result_t mul_res = bigint_prod(a, b);
|
||||||
|
if (mul_res.status == BIGINT_OK) {
|
||||||
|
vector_result_t v = vector_get(mul_res.value.number->digits, 0);
|
||||||
|
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
|
||||||
|
bigint_destroy(mul_res.value.number);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Division
|
||||||
|
bigint_result_t div_res = bigint_divmod(a, b);
|
||||||
|
if (div_res.status == BIGINT_OK) {
|
||||||
|
vector_result_t v = vector_get(div_res.value.division.quotient->digits, 0);
|
||||||
|
if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
|
||||||
|
bigint_destroy(div_res.value.division.quotient);
|
||||||
|
bigint_destroy(div_res.value.division.remainder);
|
||||||
|
}
|
||||||
|
|
||||||
|
bigint_destroy(a); bigint_destroy(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -112,7 +141,7 @@ int main(void) {
|
|||||||
// Do a warmup run
|
// Do a warmup run
|
||||||
test_vector(1000);
|
test_vector(1000);
|
||||||
test_map(1000);
|
test_map(1000);
|
||||||
test_string(1000);
|
test_bigint(1000);
|
||||||
|
|
||||||
printf("Computing Vector average time...");
|
printf("Computing Vector average time...");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
@@ -122,9 +151,9 @@ int main(void) {
|
|||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30));
|
printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30));
|
||||||
|
|
||||||
printf("Computing String average time...");
|
printf("Computing BigInt average time...");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
printf("average time: %lld ms\n", benchmark(test_string, 1e5, 30));
|
printf("average time: %lld ms\n", benchmark(test_bigint, 1e5, 30));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,5 +7,4 @@ At the time being, this documentation includes the following pages:
|
|||||||
|
|
||||||
- [vector.md](vector.md): vector documentation;
|
- [vector.md](vector.md): vector documentation;
|
||||||
- [map.md](map.md): map documentation;
|
- [map.md](map.md): map documentation;
|
||||||
- [bigint.md](bigint.md): bigint documentation;
|
- [bigint.md](bigint.md): bigint documentation.
|
||||||
- [string.md](string.md): string documentation.
|
|
||||||
|
|||||||
@@ -33,17 +33,18 @@ and the boolean `is_negative` variable denotes its sign.
|
|||||||
|
|
||||||
The `BigInt` data structure supports the following methods:
|
The `BigInt` data structure supports the following methods:
|
||||||
|
|
||||||
- `bigint_result_t bigint_from_int(value)`: create a big integer from a primitive `int` type;
|
- `bigint_result_t bigint_from_int(value)`: creates a big integer from a primitive `int` type;
|
||||||
- `bigint_result_t bigint_from_string(string_num)`: create a big integer from a C string;
|
- `bigint_result_t bigint_from_string(string_num)`: creates a big integer from a C string;
|
||||||
- `bigint_result_t bigint_to_string(number)`: convert a big integer to a C string;
|
- `bigint_result_t bigint_to_string(number)`: converts a big integer to a C string;
|
||||||
- `bigint_result_t bigint_clone(number)`: clone a big integer;
|
- `bigint_result_t bigint_clone(number)`: clones a big integer;
|
||||||
- `bigint_result_t bigint_compare(x, y)`: compare two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively;
|
- `bigint_result_t bigint_compare(x, y)`: compares two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively;
|
||||||
- `bigint_result_t bigint_add(x, y)`: add two big integers together in $\mathcal{O}(n)$;
|
- `bigint_result_t bigint_add(x, y)`: adds two big integers together in $\mathcal{O}(n)$;
|
||||||
- `bigint_result_t bigint_sub(x, y)`: subtract two big integers in $\mathcal{O}(n)$;
|
- `bigint_result_t bigint_sub(x, y)`: subtracts two big integers in $\mathcal{O}(n)$;
|
||||||
- `bigint_result_t bigint_prod(x, y)`: multiply two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$;
|
- `bigint_result_t bigint_prod(x, y)`: multiplies two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$;
|
||||||
- `bigint_result_t bigint_divmod(x, y)`: divide two big integers using *long division* algorithm in $\mathcal{O}(n^2)$, returning both the quotient and the remainder;
|
- `bigint_result_t bigint_divmod(x, y)`: divides two big integers using _Knuth's Algorithm D_ in $\mathcal{O}(n \times m)$ where $n$ and $m$ are the number of base-10^9
|
||||||
- `bigint_result_t bigint_mod(x, y)`: computes modulo of two big integers using *long division* algorithm in $\mathcal{O}(n^2)$;
|
parts/limbs in the divisor and the quotient, respectively. This method returns both the quotient and the remainder;
|
||||||
- `bigint_result_t bigint_destroy(number)`: delete the big number;
|
- `bigint_result_t bigint_mod(x, y)`: calls `bigint_divmod`, discards the quotient and yields the remainder;
|
||||||
|
- `bigint_result_t bigint_destroy(number)`: deletes the big number;
|
||||||
- `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters.
|
- `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters.
|
||||||
|
|
||||||
As you can see from the previous function signatures, methods that operate on the
|
As you can see from the previous function signatures, methods that operate on the
|
||||||
@@ -90,12 +91,3 @@ of them has an unique scope as described below:
|
|||||||
- `compare_status`: result of `bigint_compare`;
|
- `compare_status`: result of `bigint_compare`;
|
||||||
- `string_num`: result of `bigint_to_string`.
|
- `string_num`: result of `bigint_to_string`.
|
||||||
|
|
||||||
|
|
||||||
> [!IMPORTANT]
|
|
||||||
> Currently, the division implementation employs a quadratic-time algorithm derived from the conventional _"grade school"_ long-division method.
|
|
||||||
> This approach performs adequately for integers of modest size (up to approximately 200 digits) but becomes highly inefficient when handling
|
|
||||||
> substantially larger integers (~1500 digits).
|
|
||||||
>
|
|
||||||
> Improving the efficiency of this algorithm would require further research into advanced
|
|
||||||
> numerical algorithms, which is something that I currently not inclined to pursue.
|
|
||||||
|
|
||||||
|
|||||||
12
docs/map.md
12
docs/map.md
@@ -37,12 +37,12 @@ free them before removing the keys or destroying the map.
|
|||||||
|
|
||||||
The `Map` data structure supports the following methods:
|
The `Map` data structure supports the following methods:
|
||||||
|
|
||||||
- `map_result_t map_new()`: initialize a new map;
|
- `map_result_t map_new()`: initializes a new map;
|
||||||
- `map_result_t map_add(map, key, value)`: add a `(key, value)` pair to the map;
|
- `map_result_t map_add(map, key, value)`: adds a `(key, value)` pair to the map;
|
||||||
- `map_result_t map_get(map, key)`: retrieve a values indexed by `key` if it exists;
|
- `map_result_t map_get(map, key)`: retrieves a values indexed by `key` if it exists;
|
||||||
- `map_result_t map_remove(map, key)`: remove a key from the map if it exists;
|
- `map_result_t map_remove(map, key)`: removes a key from the map if it exists;
|
||||||
- `map_result_t map_clear(map)`: reset the map state;
|
- `map_result_t map_clear(map)`: resets the map state;
|
||||||
- `map_result_t map_destroy(map)`: delete the map;
|
- `map_result_t map_destroy(map)`: deletes the map;
|
||||||
- `size_t map_size(map)`: returns map size (i.e., the number of elements);
|
- `size_t map_size(map)`: returns map size (i.e., the number of elements);
|
||||||
- `size_t map_capacity(map)`: returns map capacity (i.e., map total size).
|
- `size_t map_capacity(map)`: returns map capacity (i.e., map total size).
|
||||||
|
|
||||||
|
|||||||
@@ -1,96 +0,0 @@
|
|||||||
# String Technical Details
|
|
||||||
In this document you can find a quick overview of the technical
|
|
||||||
aspects (internal design, memory layout, etc.) of the `String` data structure.
|
|
||||||
|
|
||||||
`String` is an immutable string data type with partial UTF-8 support.
|
|
||||||
This means that methods return a new string instance rather than modifying the string in-place.
|
|
||||||
Internally, this data structure is represented by the following layout:
|
|
||||||
|
|
||||||
```c
|
|
||||||
typedef struct {
|
|
||||||
char *data;
|
|
||||||
size_t byte_size;
|
|
||||||
size_t byte_capacity;
|
|
||||||
size_t char_count;
|
|
||||||
} string_t;
|
|
||||||
```
|
|
||||||
|
|
||||||
where the `data` variable represents the actual string (represented as a pointer to `char`),
|
|
||||||
the `byte_size` variable indicates the actual size (in bytes) of the string, the
|
|
||||||
`byte_capacity` variable represents the total number of allocated memory (in bytes) and the
|
|
||||||
`char_count` variable represent the number of logical characters, that is the number of
|
|
||||||
symbols.
|
|
||||||
|
|
||||||
As mentioned earlier, this library provides partial UTF-8 support. It is able to recognize
|
|
||||||
UTF-8 byte sequences as individual Unicode code points, which allows it to correctly distinguish
|
|
||||||
between byte length and character count. It fully supports Unicode symbols and emojis, while
|
|
||||||
remaining backward compatible with ASCII strings.
|
|
||||||
|
|
||||||
However, this data structure does not support localization. In particular, it does not perform
|
|
||||||
locale-aware conversion; for instance, uppercase/lowercase transformations are limited to ASCII
|
|
||||||
characters only. As a result, the German scharfes S (`ß`) is not convert to `SS`, the Spanish
|
|
||||||
`Ñ` is not converted to `ñ` and the Italian `é` (and its variants) is not treated as a single
|
|
||||||
symbol, but rather as a base letter combined with an accent.
|
|
||||||
|
|
||||||
At the time being, `String` supports the following methods:
|
|
||||||
|
|
||||||
- `string_result_t string_new(c_str)`: create a new string;
|
|
||||||
- `string_result_t string_clone(str)`: clone an existing string;
|
|
||||||
- `string_result_t string_concat(x, y)`: concatenate two strings together;
|
|
||||||
- `string_result_t string_contains(haystack, needle)`: search whether the `haystack` string contains `needle`;
|
|
||||||
- `string_result_t string_slice(str, start, end)`: return a slice (a new string) from `str` between `start` and `end` indices (inclusive);
|
|
||||||
- `string_result_t string_eq(x, y, case_sensitive)`: check whether `x` and `y` are equal;
|
|
||||||
- `string_result_t string_get_at(str, position)`: get the UTF-8 symbol indexed by `position` from `str`;
|
|
||||||
- `string_result_t string_set_at(str, position, utf8_char)`: write a UTF-8 symbol into `str` at index `position`;
|
|
||||||
- `string_result_t string_to_lower(str)`: convert a string to lowercase;
|
|
||||||
- `string_result_t string_to_upper(str)`: convert a string to uppercase;
|
|
||||||
- `string_result_t string_reverse(str)`: reverse a string;
|
|
||||||
- `string_result_t string_trim(str)`: remove leading and trailing white space from a string;
|
|
||||||
- `string_result_t string_split(str, delim)`: split a string into an array of `string_t` by specifying a separator;
|
|
||||||
- `string_result_t string_destroy(str)`: remove a string from memory;
|
|
||||||
- `string_result_t string_split_destroy(split, count)`: remove an array of strings from memory;
|
|
||||||
- `size_t string_size(str)`: return string character count.
|
|
||||||
|
|
||||||
As you can see from the previous function signatures, most methods that operate on the `String`
|
|
||||||
data type return a custom type called `string_result_t` which is defined as follows:
|
|
||||||
|
|
||||||
```c
|
|
||||||
typedef enum {
|
|
||||||
STRING_OK = 0x0,
|
|
||||||
STRING_ERR_ALLOCATE,
|
|
||||||
STRING_ERR_INVALID,
|
|
||||||
STRING_ERR_INVALID_UTF8,
|
|
||||||
STRING_ERR_OVERFLOW
|
|
||||||
} string_status_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
string_status_t status;
|
|
||||||
uint8_t message[RESULT_MSG_SIZE];
|
|
||||||
union {
|
|
||||||
string_t *string; // For new, clone, slice, reverse, trim
|
|
||||||
char *symbol; // For get_at
|
|
||||||
int64_t idx; // For contains
|
|
||||||
bool is_equ; // For comparison
|
|
||||||
struct { // For split
|
|
||||||
string_t **strings;
|
|
||||||
size_t count;
|
|
||||||
} split;
|
|
||||||
} value;
|
|
||||||
} string_result_t;
|
|
||||||
```
|
|
||||||
|
|
||||||
Each method that returns such type indicates whether the operation was successful or not
|
|
||||||
by setting the `status` field and by providing a descriptive message on the `message`
|
|
||||||
field. If the operation was successful (that is, `status == STRING_OK`) you can either
|
|
||||||
move on with the rest of your program or read the returned value from the sum data type.
|
|
||||||
Of course, you can choose to ignore the return value (if you're brave enough :D) as illustrated
|
|
||||||
on the first part of the README.
|
|
||||||
|
|
||||||
The sum data type (i.e., the `value` union) defines five different variables.
|
|
||||||
Each of them has an unique scope as described below:
|
|
||||||
|
|
||||||
- `string`: result of `new`, `clone`, `slice`, `reverse` and `trim` functions;
|
|
||||||
- `symbol`: result of `get_at` function;
|
|
||||||
- `idx`: result of `contains` function;
|
|
||||||
- `is_eq`: result of `equ` function. It's true when two strings are equal, false otherwise;
|
|
||||||
- `split`: result of `split` function. It contains an array of `string_t` and its number of elements.
|
|
||||||
@@ -25,19 +25,19 @@ deletion.
|
|||||||
|
|
||||||
At the time being, `Vector` supports the following methods:
|
At the time being, `Vector` supports the following methods:
|
||||||
|
|
||||||
- `vector_result_t vector_new(size, data_size)`: create a new vector;
|
- `vector_result_t vector_new(size, data_size)`: creates a new vector;
|
||||||
- `vector_result_t vector_push(vector, value)`: add a new value to the vector;
|
- `vector_result_t vector_push(vector, value)`: adds a new value to the vector;
|
||||||
- `vector_result_t vector_set(vector, index, value)`: update the value of a given index if it exists;
|
- `vector_result_t vector_set(vector, index, value)`: updates the value of a given index if it exists;
|
||||||
- `vector_result_t vector_get(vector, index)`: return the value indexed by `index` if it exists;
|
- `vector_result_t vector_get(vector, index)`: returns the value indexed by `index` if it exists;
|
||||||
- `vector_result_t vector_sort(vector, cmp)`: sort vector using `cmp` function;
|
- `vector_result_t vector_sort(vector, cmp)`: sorts vector using `cmp` function;
|
||||||
- `vector_result_t vector_pop(vector)`: pop last element from the vector following the LIFO policy;
|
- `vector_result_t vector_pop(vector)`: pops last element from the vector following the LIFO policy;
|
||||||
- `vector_result_t vector_map(vector, callback, env)`: apply `callback` function to vector (in-place);
|
- `vector_result_t vector_map(vector, callback, env)`: applies `callback` function to vector (in-place);
|
||||||
- `vector_result_t vector_filter(vector, callback, env)`: filter vector using `callback` (in-place);
|
- `vector_result_t vector_filter(vector, callback, env)`: filters vector using `callback` (in-place);
|
||||||
- `vector_result_t vector_reduce(vector, accumulator, callback, env)`: fold/reduce vector using `callback`;
|
- `vector_result_t vector_reduce(vector, accumulator, callback, env)`: folds/reduces vector using `callback`;
|
||||||
- `vector_result_t vector_clear(vector)`: logically reset the vector. That is, new pushes will overwrite the memory;
|
- `vector_result_t vector_clear(vector)`: resets the vector logically. That is, new pushes will overwrite the memory;
|
||||||
- `vector_result_t vector_destroy(vector)`: delete the vector;
|
- `vector_result_t vector_destroy(vector)`: deletes the vector;
|
||||||
- `size_t vector_size(vector)`: return vector size (i.e., the number of elements);
|
- `size_t vector_size(vector)`: returns vector size (i.e., the number of elements);
|
||||||
- `size_t vector_capacity(vector)`: return vector capacity (i.e., vector total size).
|
- `size_t vector_capacity(vector)`: returns vector capacity (i.e., vector total size).
|
||||||
|
|
||||||
As you can see from the previous function signatures, most methods that operate
|
As you can see from the previous function signatures, most methods that operate
|
||||||
on the `Vector` data type return a custom type called `vector_result_t` which is
|
on the `Vector` data type return a custom type called `vector_result_t` which is
|
||||||
|
|||||||
415
src/bigint.c
415
src/bigint.c
@@ -9,6 +9,10 @@
|
|||||||
(result).message[RESULT_MSG_SIZE - 1] = '\0'; \
|
(result).message[RESULT_MSG_SIZE - 1] = '\0'; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define REMOVE(ptr) \
|
||||||
|
free(ptr); \
|
||||||
|
ptr = NULL
|
||||||
|
|
||||||
#define IS_DIGIT(c) ((c) >= '0') && ((c) <= '9')
|
#define IS_DIGIT(c) ((c) >= '0') && ((c) <= '9')
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@@ -19,7 +23,6 @@
|
|||||||
#include "bigint.h"
|
#include "bigint.h"
|
||||||
#include "vector.h"
|
#include "vector.h"
|
||||||
|
|
||||||
// Internal methods
|
|
||||||
/**
|
/**
|
||||||
* bigint_trim_zeros
|
* bigint_trim_zeros
|
||||||
* @number: a non-null big integer
|
* @number: a non-null big integer
|
||||||
@@ -842,30 +845,32 @@ cleanup: // Destroy intermediate allocations on error
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bigint_dev
|
* bigint_div
|
||||||
* @x: a valid non-null big integer (dividend)
|
* @x: a non-null big integer acting as a dividend
|
||||||
* @y: a valid non-null big integer (divisor)
|
* @y: a non-null big integer acting as a divisor
|
||||||
*
|
*
|
||||||
* Computes division using long division algorithm in O(n^2)
|
* Computers the quotient floor (i.e., |X| / |Y|) using Knuth's Algorithm D
|
||||||
|
* Adaoted from p. 273 of Don Knuth's TAoCP Vol. 2
|
||||||
|
* The complexity is O(n * m) where 'n' and 'm' are the number of base-10^9
|
||||||
|
* "parts" (the limbs in the code below) in the divisor and the quotient, respectively.
|
||||||
*
|
*
|
||||||
* Returns a bigint_result_t data type
|
* Returns a bigint_result_t containing the quotient.
|
||||||
|
* The called of this function will be responsible for applying the sign.
|
||||||
*/
|
*/
|
||||||
static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
|
static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
|
||||||
bigint_result_t result = {0};
|
bigint_result_t result = {0};
|
||||||
bigint_result_t tmp_res = {0};
|
bigint_result_t tmp_res = {0};
|
||||||
|
|
||||||
bigint_t *quotient = NULL;
|
bigint_t *quotient = NULL;
|
||||||
bigint_t *remainder = NULL;
|
long long *u = NULL, *v = NULL, *q = NULL;
|
||||||
bigint_t *abs_y = NULL;
|
|
||||||
|
|
||||||
if (x == NULL || y == NULL) {
|
if (x == NULL || y == NULL) {
|
||||||
result.status = BIGINT_ERR_INVALID;
|
result.status = BIGINT_ERR_INVALID;
|
||||||
SET_MSG(result, "Invalid big numbers");
|
SET_MSG(result, "Invalid big integers");
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for division by zero
|
|
||||||
const size_t y_size = vector_size(y->digits);
|
const size_t y_size = vector_size(y->digits);
|
||||||
if (y_size == 0) {
|
if (y_size == 0) {
|
||||||
result.status = BIGINT_ERR_DIV_BY_ZERO;
|
result.status = BIGINT_ERR_DIV_BY_ZERO;
|
||||||
@@ -875,16 +880,16 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (y_size == 1) {
|
if (y_size == 1) {
|
||||||
vector_result_t y_val_res = vector_get(y->digits, 0);
|
vector_result_t y0_res = vector_get(y->digits, 0);
|
||||||
if (y_val_res.status != VECTOR_OK) {
|
if (y0_res.status != VECTOR_OK) {
|
||||||
result.status = BIGINT_ERR_INVALID;
|
result.status = BIGINT_ERR_INVALID;
|
||||||
COPY_MSG(result, y_val_res.message);
|
COPY_MSG(result, y0_res.message);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int *y_val = (int*)y_val_res.value.element;
|
int *y0 = (int *)y0_res.value.element;
|
||||||
if (*y_val == 0) {
|
if (*y0 == 0) {
|
||||||
result.status = BIGINT_ERR_DIV_BY_ZERO;
|
result.status = BIGINT_ERR_DIV_BY_ZERO;
|
||||||
SET_MSG(result, "Cannot divide by zero");
|
SET_MSG(result, "Cannot divide by zero");
|
||||||
|
|
||||||
@@ -892,94 +897,230 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If |x| < |y| then result is zero
|
|
||||||
tmp_res = bigint_compare_abs(x, y);
|
tmp_res = bigint_compare_abs(x, y);
|
||||||
if (tmp_res.status != BIGINT_OK) { return tmp_res; }
|
if (tmp_res.status != BIGINT_OK) {
|
||||||
|
return tmp_res;
|
||||||
|
}
|
||||||
|
|
||||||
if (tmp_res.value.compare_status < 0) {
|
if (tmp_res.value.compare_status < 0) {
|
||||||
tmp_res = bigint_from_int(0);
|
return bigint_from_int(0);
|
||||||
if (tmp_res.status != BIGINT_OK) { return tmp_res; }
|
}
|
||||||
|
|
||||||
result.value.number = tmp_res.value.number;
|
const size_t x_size = vector_size(x->digits);
|
||||||
|
const size_t n = y_size;
|
||||||
|
const long long BASE = (long long)BIGINT_BASE;
|
||||||
|
|
||||||
|
quotient = malloc(sizeof(bigint_t));
|
||||||
|
if (quotient == NULL) {
|
||||||
|
result.status = BIGINT_ERR_ALLOCATE;
|
||||||
|
SET_MSG(result, "Cannot allocate memory for big integer");
|
||||||
|
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
quotient->digits = NULL;
|
||||||
|
quotient->is_negative = false;
|
||||||
|
|
||||||
|
// Single-limb divisor case. Here, we scan using 64-bit arithmetic in O(n)
|
||||||
|
if (y_size == 1) {
|
||||||
|
vector_result_t y0_res = vector_get(y->digits, 0);
|
||||||
|
if (y0_res.status != VECTOR_OK) {
|
||||||
|
result.status = BIGINT_ERR_INVALID;
|
||||||
|
COPY_MSG(result, y0_res.message);
|
||||||
|
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
long long divisor = *(int *)y0_res.value.element;
|
||||||
|
|
||||||
|
vector_result_t vec_res = vector_new(x_size, sizeof(int));
|
||||||
|
if (vec_res.status != VECTOR_OK) {
|
||||||
|
result.status = BIGINT_ERR_ALLOCATE;
|
||||||
|
COPY_MSG(result, vec_res.message);
|
||||||
|
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
quotient->digits = vec_res.value.vector;
|
||||||
|
|
||||||
|
long long remainder = 0;
|
||||||
|
for (int idx = (int)x_size - 1; idx >= 0; idx--) {
|
||||||
|
vector_result_t xidx_res = vector_get(x->digits, idx);
|
||||||
|
if (xidx_res.status != VECTOR_OK) {
|
||||||
|
result.status = BIGINT_ERR_INVALID;
|
||||||
|
COPY_MSG(result, xidx_res.message);
|
||||||
|
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
long long current = remainder * BASE + *(int *)xidx_res.value.element;
|
||||||
|
int q_idx = (int)(current / divisor);
|
||||||
|
remainder = current % divisor;
|
||||||
|
|
||||||
|
vector_result_t push_res = vector_push(quotient->digits, &q_idx);
|
||||||
|
if (push_res.status != VECTOR_OK) {
|
||||||
|
result.status = BIGINT_ERR_INVALID;
|
||||||
|
COPY_MSG(result, push_res.message);
|
||||||
|
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore the LSB-first order
|
||||||
|
const size_t q_size = vector_size(quotient->digits);
|
||||||
|
for (size_t lo = 0, hi = q_size - 1; lo < hi; hi--) {
|
||||||
|
vector_result_t lr = vector_get(quotient->digits, lo);
|
||||||
|
vector_result_t hr = vector_get(quotient->digits, hi);
|
||||||
|
|
||||||
|
if (lr.status != VECTOR_OK || hr.status != VECTOR_OK) {
|
||||||
|
result.status = BIGINT_ERR_INVALID;
|
||||||
|
SET_MSG(result, "Failed to reverse quotient digits");
|
||||||
|
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
int lower_val = *(int *)lr.value.element;
|
||||||
|
int higher_val = *(int *)hr.value.element;
|
||||||
|
vector_set(quotient->digits, lo, &higher_val);
|
||||||
|
vector_set(quotient->digits, hi, &lower_val);
|
||||||
|
}
|
||||||
|
|
||||||
|
bigint_result_t trim_res = bigint_trim_zeros(quotient);
|
||||||
|
if (trim_res.status != BIGINT_OK) { result = trim_res; goto cleanup; }
|
||||||
|
|
||||||
|
result.value.number = quotient;
|
||||||
result.status = BIGINT_OK;
|
result.status = BIGINT_OK;
|
||||||
SET_MSG(result, "Division between big integers was successful");
|
SET_MSG(result, "Division between big integers was successful");
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize quotient and remainder
|
/* General case using Knuth's Algorithm
|
||||||
tmp_res = bigint_from_int(0);
|
* First, some definitions:
|
||||||
if (tmp_res.status != BIGINT_OK) { return tmp_res; }
|
* index 0 -> least significant limb;
|
||||||
quotient = tmp_res.value.number;
|
* n -> limb count of divisor y
|
||||||
|
* m -> limb count of quotient (x_size - n)
|
||||||
|
* u[0 ... m + n] -> working copy of the (scaled) dividend +1 sentinel limb
|
||||||
|
* v[0 ... n - 1] -> working copy of the (scaled) divisor
|
||||||
|
* q[0 ... m] -> output quotient limbs
|
||||||
|
*/
|
||||||
|
const size_t m = x_size - n;
|
||||||
|
|
||||||
tmp_res = bigint_from_int(0);
|
u = calloc(m + n + 1, sizeof(long long));
|
||||||
if (tmp_res.status != BIGINT_OK) { bigint_destroy(quotient); return tmp_res; }
|
v = calloc(n, sizeof(long long));
|
||||||
remainder = tmp_res.value.number;
|
q = calloc(m + 1, sizeof(long long));
|
||||||
|
|
||||||
// Create absolute value of y for later comparisons
|
if (u == NULL || v == NULL || q == NULL) {
|
||||||
tmp_res = bigint_clone(y);
|
result.status = BIGINT_ERR_ALLOCATE;
|
||||||
if (tmp_res.status != BIGINT_OK) {
|
SET_MSG(result, "Cannot allocate scratch arrays for division");
|
||||||
bigint_destroy(quotient);
|
|
||||||
bigint_destroy(remainder);
|
|
||||||
|
|
||||||
return tmp_res;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
abs_y = tmp_res.value.number;
|
for (size_t idx = 0; idx < x_size; idx++) {
|
||||||
abs_y->is_negative = false;
|
vector_result_t get_res = vector_get(x->digits, idx);
|
||||||
|
if (get_res.status != VECTOR_OK) {
|
||||||
// Long division algorithm applied from MSB to LSB
|
|
||||||
const size_t x_size = vector_size(x->digits);
|
|
||||||
for (int idx = (int)x_size - 1; idx >= 0; idx--) {
|
|
||||||
// Shift remainder left by one base digit (multiplication by BASE)
|
|
||||||
tmp_res = bigint_shift_left(remainder, 1);
|
|
||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
|
||||||
|
|
||||||
bigint_t *shifted_remainder = tmp_res.value.number;
|
|
||||||
bigint_destroy(remainder);
|
|
||||||
remainder = shifted_remainder;
|
|
||||||
|
|
||||||
// Add current digit of 'x' to the least significant position of remainder
|
|
||||||
vector_result_t digit_res = vector_get(x->digits, idx);
|
|
||||||
if (digit_res.status != VECTOR_OK) {
|
|
||||||
result.status = BIGINT_ERR_INVALID;
|
result.status = BIGINT_ERR_INVALID;
|
||||||
COPY_MSG(result, digit_res.message);
|
COPY_MSG(result, get_res.message);
|
||||||
|
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
int *x_digit = (int*)digit_res.value.element;
|
u[idx] = *(int *)get_res.value.element;
|
||||||
|
}
|
||||||
|
|
||||||
vector_result_t set_res = vector_set(remainder->digits, 0, x_digit);
|
for (size_t idx = 0; idx < n; idx++) {
|
||||||
if (set_res.status != VECTOR_OK) {
|
vector_result_t get_res = vector_get(y->digits, idx);
|
||||||
|
if (get_res.status != VECTOR_OK) {
|
||||||
result.status = BIGINT_ERR_INVALID;
|
result.status = BIGINT_ERR_INVALID;
|
||||||
COPY_MSG(result, set_res.message);
|
COPY_MSG(result, get_res.message);
|
||||||
|
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp_res = bigint_trim_zeros(remainder);
|
v[idx] = *(int *)get_res.value.element;
|
||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
}
|
||||||
|
|
||||||
// COunt how many times 'y' fits into current remainder
|
// D1 (normalize): choose 'd' so that v[n - 1] >= BASE / 2 (after scaling)
|
||||||
size_t count = 0;
|
const long long d = BASE / (v[n - 1] + 1);
|
||||||
while (1) {
|
|
||||||
tmp_res = bigint_compare_abs(remainder, abs_y);
|
|
||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
|
||||||
if (tmp_res.value.compare_status < 0) { break; } // remainder < abs_y
|
|
||||||
|
|
||||||
// remainder = remainder - abs_y
|
long long carry = 0;
|
||||||
tmp_res = bigint_sub_abs(remainder, abs_y);
|
for (size_t idx = 0; idx < x_size; idx++) {
|
||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
long long current = u[idx] * d + carry;
|
||||||
|
u[idx] = current % BASE;
|
||||||
|
carry = current / BASE;
|
||||||
|
}
|
||||||
|
u[x_size] = carry;
|
||||||
|
|
||||||
bigint_t *new_remainder = tmp_res.value.number;
|
carry = 0;
|
||||||
bigint_destroy(remainder);
|
for (size_t idx = 0; idx < n; idx++) {
|
||||||
remainder = new_remainder;
|
long long current = v[idx] * d + carry;
|
||||||
count++;
|
v[idx] = current % BASE;
|
||||||
|
carry = current / BASE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// D2-D6: the main loop. One iteration produces one quotient limb
|
||||||
|
for (long long j = (long long)m; j >= 0; j--) {
|
||||||
|
size_t jj = (size_t)j;
|
||||||
|
|
||||||
|
// D3: 2-by-1 trial quotient
|
||||||
|
long long two_limb = u[jj + n] * BASE + u[jj + n - 1];
|
||||||
|
long long q_hat = two_limb / v[n - 1];
|
||||||
|
long long r_hat = two_limb % v[n - 1];
|
||||||
|
|
||||||
|
while (q_hat >= BASE || ((n >= 2) && (q_hat * v[n - 2]) > (BASE * r_hat + u[jj + n - 2]))) {
|
||||||
|
q_hat--;
|
||||||
|
r_hat += v[n - 1];
|
||||||
|
if (r_hat >= BASE) { break; }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add count to quotient digits
|
// D4: multiply-subtract u[j ... j + n] -= q_hat * v[0 ... n - 1]
|
||||||
vector_result_t push_res = vector_push(quotient->digits, &count);
|
long long borrow = 0;
|
||||||
|
for (size_t idx = 0; idx < n; idx++) {
|
||||||
|
long long product = q_hat * v[idx] + borrow;
|
||||||
|
borrow = product / BASE;
|
||||||
|
long long diff = u[jj + idx] - (product % BASE);
|
||||||
|
if (diff < 0) {
|
||||||
|
diff += BASE;
|
||||||
|
borrow++;
|
||||||
|
}
|
||||||
|
u[jj + idx] = diff;
|
||||||
|
}
|
||||||
|
u[jj + n] -= borrow;
|
||||||
|
|
||||||
|
// D5: store quotient digit
|
||||||
|
q[jj] = q_hat;
|
||||||
|
|
||||||
|
// D6: if 'u' went negative, add 'v' back once and decrement q[j]
|
||||||
|
if (u[jj + n] < 0) {
|
||||||
|
q[jj]--;
|
||||||
|
carry = 0;
|
||||||
|
for (size_t idx = 0; idx < n; idx++) {
|
||||||
|
long long sum = u[jj + idx] + v[idx] + carry;
|
||||||
|
u[jj + idx] = sum % BASE;
|
||||||
|
carry = sum / BASE;
|
||||||
|
}
|
||||||
|
u[jj + n] += carry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete working copy from memory
|
||||||
|
REMOVE(u); REMOVE(v);
|
||||||
|
|
||||||
|
// Build the bigint quotient from q[0 ... m] (index 0 = LSB)
|
||||||
|
vector_result_t vec_res = vector_new(m + 1, sizeof(int));
|
||||||
|
if (vec_res.status != VECTOR_OK) {
|
||||||
|
result.status = BIGINT_ERR_ALLOCATE;
|
||||||
|
COPY_MSG(result, vec_res.message);
|
||||||
|
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
quotient->digits = vec_res.value.vector;
|
||||||
|
for (size_t idx = 0; idx <= m; idx++) {
|
||||||
|
int q_idx = (int)q[idx];
|
||||||
|
|
||||||
|
vector_result_t push_res = vector_push(quotient->digits, &q_idx);
|
||||||
if (push_res.status != VECTOR_OK) {
|
if (push_res.status != VECTOR_OK) {
|
||||||
result.status = BIGINT_ERR_INVALID;
|
result.status = BIGINT_ERR_INVALID;
|
||||||
COPY_MSG(result, push_res.message);
|
COPY_MSG(result, push_res.message);
|
||||||
@@ -988,34 +1129,10 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reverse quotient digits
|
REMOVE(q);
|
||||||
const size_t q_size = vector_size(quotient->digits);
|
|
||||||
for (size_t idx = 0; idx < q_size / 2; idx++) {
|
|
||||||
vector_result_t left_res = vector_get(quotient->digits, idx);
|
|
||||||
vector_result_t right_res = vector_get(quotient->digits, q_size - 1 - idx);
|
|
||||||
|
|
||||||
if (left_res.status != VECTOR_OK || right_res.status != VECTOR_OK) {
|
bigint_result_t trim_res = bigint_trim_zeros(quotient);
|
||||||
result.status = BIGINT_ERR_INVALID;
|
if (trim_res.status != BIGINT_OK) { result = trim_res; goto cleanup; }
|
||||||
SET_MSG(result, "Failed to access vector elements");
|
|
||||||
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
int *left = (int*)left_res.value.element;
|
|
||||||
int *right = (int*)right_res.value.element;
|
|
||||||
int temp = *left;
|
|
||||||
|
|
||||||
vector_set(quotient->digits, idx, right);
|
|
||||||
vector_set(quotient->digits, q_size - 1 - idx, &temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
quotient->is_negative = (x->is_negative != y->is_negative);
|
|
||||||
|
|
||||||
tmp_res = bigint_trim_zeros(quotient);
|
|
||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
|
||||||
|
|
||||||
bigint_destroy(remainder);
|
|
||||||
bigint_destroy(abs_y);
|
|
||||||
|
|
||||||
result.value.number = quotient;
|
result.value.number = quotient;
|
||||||
result.status = BIGINT_OK;
|
result.status = BIGINT_OK;
|
||||||
@@ -1024,20 +1141,20 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
|
|||||||
return result;
|
return result;
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
|
free(u); free(v); free(q);
|
||||||
if (quotient) { bigint_destroy(quotient); }
|
if (quotient) { bigint_destroy(quotient); }
|
||||||
if (remainder) { bigint_destroy(remainder); }
|
|
||||||
if (abs_y) { bigint_destroy(abs_y); }
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bigint_from_int
|
* bigint_from_int
|
||||||
* @value: an integer value
|
* @value: an integer value
|
||||||
*
|
*
|
||||||
* Takes an integer and convert it to a big integer
|
* Takes an integer and convert it to a big integer
|
||||||
*
|
*
|
||||||
* Returns a big_int_result_t data type containing a new big integer
|
* Returns a bigint_result_t data type containing a new big integer
|
||||||
*/
|
*/
|
||||||
bigint_result_t bigint_from_int(long long value) {
|
bigint_result_t bigint_from_int(long long value) {
|
||||||
bigint_result_t result = {0};
|
bigint_result_t result = {0};
|
||||||
@@ -1555,14 +1672,14 @@ bigint_result_t bigint_prod(const bigint_t *x, const bigint_t *y) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bigint_divmod
|
* bigint_divmod
|
||||||
* @x: a valid non-null big integer
|
* @x: a valid non-null big integer
|
||||||
* @y: a valid non-null big integer
|
* @y: a valid non-null big integer
|
||||||
*
|
*
|
||||||
* Computes division with remainder
|
* Computes truncated division with remainder. That is:
|
||||||
|
* quotient = trunc(x / y) sign = sign(x) XOR sign(y)
|
||||||
|
* remainder = x - y * quotient sign = sign(x)
|
||||||
*
|
*
|
||||||
* Returns a bigint_result_t data type
|
* Returns a bigint_result_t data type
|
||||||
*/
|
*/
|
||||||
@@ -1570,7 +1687,6 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
|
|||||||
bigint_result_t result = {0};
|
bigint_result_t result = {0};
|
||||||
bigint_result_t tmp_res = {0};
|
bigint_result_t tmp_res = {0};
|
||||||
|
|
||||||
// Intermediate results
|
|
||||||
bigint_t *quotient = NULL;
|
bigint_t *quotient = NULL;
|
||||||
bigint_t *y_times_q = NULL;
|
bigint_t *y_times_q = NULL;
|
||||||
bigint_t *remainder = NULL;
|
bigint_t *remainder = NULL;
|
||||||
@@ -1582,11 +1698,10 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for division by zero
|
|
||||||
const size_t y_size = vector_size(y->digits);
|
const size_t y_size = vector_size(y->digits);
|
||||||
if (y_size == 0) {
|
if (y_size == 0) {
|
||||||
result.status = BIGINT_ERR_DIV_BY_ZERO;
|
result.status = BIGINT_ERR_DIV_BY_ZERO;
|
||||||
SET_MSG(result, "Division by zero");
|
SET_MSG(result, "Cannot divide by zero");
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@@ -1600,16 +1715,16 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int *y_val = (int*)y_val_res.value.element;
|
int *y_val = (int *)y_val_res.value.element;
|
||||||
if (*y_val == 0) {
|
if (*y_val == 0) {
|
||||||
result.status = BIGINT_ERR_DIV_BY_ZERO;
|
result.status = BIGINT_ERR_DIV_BY_ZERO;
|
||||||
SET_MSG(result, "Division by zero");
|
SET_MSG(result, "Cannot divide by zero");
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// |x| < |y| then quotient is 0 and remainder is x
|
// |x| < |y|: quotient is 0, remainder is x
|
||||||
tmp_res = bigint_compare_abs(x, y);
|
tmp_res = bigint_compare_abs(x, y);
|
||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
||||||
|
|
||||||
@@ -1624,6 +1739,7 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
|
|||||||
|
|
||||||
result.value.division.quotient = quotient;
|
result.value.division.quotient = quotient;
|
||||||
result.value.division.remainder = remainder;
|
result.value.division.remainder = remainder;
|
||||||
|
|
||||||
result.status = BIGINT_OK;
|
result.status = BIGINT_OK;
|
||||||
SET_MSG(result, "Division between big integers was successful");
|
SET_MSG(result, "Division between big integers was successful");
|
||||||
|
|
||||||
@@ -1634,7 +1750,10 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
|
|||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
||||||
quotient = tmp_res.value.number;
|
quotient = tmp_res.value.number;
|
||||||
|
|
||||||
// Compute r = x - y * q
|
// Set quotient sign accordingly
|
||||||
|
quotient->is_negative = (x->is_negative != y->is_negative);
|
||||||
|
|
||||||
|
// Compute remainder using r = x - y * q
|
||||||
tmp_res = bigint_prod(y, quotient);
|
tmp_res = bigint_prod(y, quotient);
|
||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
||||||
y_times_q = tmp_res.value.number;
|
y_times_q = tmp_res.value.number;
|
||||||
@@ -1643,13 +1762,24 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
|
|||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
||||||
remainder = tmp_res.value.number;
|
remainder = tmp_res.value.number;
|
||||||
|
|
||||||
// Ensure that remainder has correct sign (i.e., same as dividend x)
|
|
||||||
// In C-style division, sign(remainder) = sign(dividend)
|
|
||||||
remainder->is_negative = x->is_negative;
|
|
||||||
|
|
||||||
tmp_res = bigint_trim_zeros(remainder);
|
tmp_res = bigint_trim_zeros(remainder);
|
||||||
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
|
||||||
|
|
||||||
|
// Set remainder sign accordingly
|
||||||
|
vector_result_t r0 = vector_get(remainder->digits, 0);
|
||||||
|
if (r0.status != VECTOR_OK) {
|
||||||
|
result.status = BIGINT_ERR_INVALID;
|
||||||
|
COPY_MSG(result, r0.message);
|
||||||
|
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool rem_is_zero = (vector_size(remainder->digits) == 1 && *(int *)r0.value.element == 0);
|
||||||
|
|
||||||
|
if (!rem_is_zero) {
|
||||||
|
remainder->is_negative = x->is_negative;
|
||||||
|
}
|
||||||
|
|
||||||
result.value.division.quotient = quotient;
|
result.value.division.quotient = quotient;
|
||||||
result.value.division.remainder = remainder;
|
result.value.division.remainder = remainder;
|
||||||
result.status = BIGINT_OK;
|
result.status = BIGINT_OK;
|
||||||
@@ -1753,40 +1883,37 @@ bigint_result_t bigint_printf(const char *format, ...) {
|
|||||||
|
|
||||||
// Process string char by char
|
// Process string char by char
|
||||||
for (const char *p = format; *p != '\0'; p++) {
|
for (const char *p = format; *p != '\0'; p++) {
|
||||||
if (*p == '%' && *(p + 1) == 'B') {
|
if (*p == '%' && *(p + 1) != '%') {
|
||||||
// Process a big number
|
|
||||||
bigint_t *num = va_arg(args, bigint_t*);
|
|
||||||
if (num == NULL) {
|
|
||||||
printf("<invalid string>");
|
|
||||||
} else {
|
|
||||||
bigint_result_t num_str_res = bigint_to_string(num);
|
|
||||||
if (num_str_res.status != BIGINT_OK) {
|
|
||||||
va_end(args);
|
|
||||||
return num_str_res;
|
|
||||||
}
|
|
||||||
|
|
||||||
char* const number_str = num_str_res.value.string_num;
|
|
||||||
printf("%s", number_str);
|
|
||||||
free(number_str);
|
|
||||||
}
|
|
||||||
p++;
|
p++;
|
||||||
} else if (*p == '%' && *(p + 1) != '%') {
|
const char placeholder = *p;
|
||||||
// Handle common printf placeholders
|
|
||||||
p++;
|
|
||||||
char placeholder = *p;
|
|
||||||
|
|
||||||
switch (placeholder) {
|
switch (placeholder) {
|
||||||
|
case 'B': {
|
||||||
|
bigint_t *num = va_arg(args, bigint_t*);
|
||||||
|
if (num == NULL) {
|
||||||
|
for (const char *s = "<invalid big integer>"; *s != '\0'; s++) { putchar(*s); }
|
||||||
|
} else {
|
||||||
|
bigint_result_t num_str_res = bigint_to_string(num);
|
||||||
|
if (num_str_res.status != BIGINT_OK) {
|
||||||
|
va_end(args);
|
||||||
|
return num_str_res;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *number_str = num_str_res.value.string_num;
|
||||||
|
for (const char *s = number_str; *s != '\0'; s++) { putchar(*s); }
|
||||||
|
free(number_str);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case 'd':
|
case 'd':
|
||||||
case 'i': {
|
case 'i': {
|
||||||
int val = va_arg(args, int);
|
int val = va_arg(args, int);
|
||||||
printf("%d", val);
|
printf("%d", val);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'u': {
|
case 'u': {
|
||||||
unsigned int val = va_arg(args, unsigned int);
|
unsigned int val = va_arg(args, unsigned int);
|
||||||
printf("%u", val);
|
printf("%u", val);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'l': {
|
case 'l': {
|
||||||
@@ -1806,13 +1933,17 @@ bigint_result_t bigint_printf(const char *format, ...) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 's': {
|
case 's': {
|
||||||
char *val = va_arg(args, char*);
|
char* val = va_arg(args, char*);
|
||||||
printf("%s", val ? val : "<invalid string>");
|
if (val) {
|
||||||
|
for (const char *s = val; *s != '\0'; s++) { putchar(*s); }
|
||||||
|
} else {
|
||||||
|
for (const char *s = "<invalid string>"; *s != '\0'; s++) { putchar(*s); }
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'c': {
|
case 'c': {
|
||||||
int val = va_arg(args, int);
|
int val = va_arg(args, int);
|
||||||
printf("%c", val);
|
putchar(val);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'f': {
|
case 'f': {
|
||||||
@@ -1821,7 +1952,7 @@ bigint_result_t bigint_printf(const char *format, ...) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'p': {
|
case 'p': {
|
||||||
void *val = va_arg(args, void*);
|
void* const val = va_arg(args, void*);
|
||||||
printf("%p", val);
|
printf("%p", val);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,8 +10,6 @@
|
|||||||
|
|
||||||
#include "map.h"
|
#include "map.h"
|
||||||
|
|
||||||
// Internal methods
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* hash_key
|
* hash_key
|
||||||
* @key: The input string for the hash function
|
* @key: The input string for the hash function
|
||||||
|
|||||||
934
src/string.c
934
src/string.c
@@ -1,934 +0,0 @@
|
|||||||
#define SET_MSG(result, msg) \
|
|
||||||
do { \
|
|
||||||
snprintf((char *)(result).message, RESULT_MSG_SIZE, "%s", (const char *)msg); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "string.h"
|
|
||||||
|
|
||||||
// Check if a character is a space
|
|
||||||
static inline bool is_space(unsigned char c) {
|
|
||||||
return (c == ' ' || c == '\t' ||
|
|
||||||
c == '\n' || c == '\r' ||
|
|
||||||
c == '\f' || c == '\v');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get byte length of a UTF-8 character/symbol
|
|
||||||
static inline int utf8_char_len(unsigned char byte) {
|
|
||||||
if ((byte & 0x80) == 0x00) return 1;
|
|
||||||
if ((byte & 0xE0) == 0xC0) return 2;
|
|
||||||
if ((byte & 0xF0) == 0xE0) return 3;
|
|
||||||
if ((byte & 0xF8) == 0xF0) return 4;
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate an UTF-8 symbol
|
|
||||||
static bool utf8_is_char_valid(const char *utf8_char, int *out_len) {
|
|
||||||
if (utf8_char == NULL) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t len = utf8_char_len((unsigned char)utf8_char[0]);
|
|
||||||
if (len <= 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t idx = 1; idx < len; idx++) {
|
|
||||||
if ((utf8_char[idx] & 0xC0) != 0x80) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (utf8_char[len] != '\0') {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (out_len) {
|
|
||||||
*out_len = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate an UTF-8 symbol and measure byte length and character count in one pass
|
|
||||||
static bool utf8_scan(const char *str, size_t *out_byte_size, size_t *out_char_count) {
|
|
||||||
size_t b_size = 0;
|
|
||||||
size_t c_count = 0;
|
|
||||||
const unsigned char *p = (const unsigned char *)str;
|
|
||||||
|
|
||||||
while (p[b_size] != '\0') {
|
|
||||||
size_t len = utf8_char_len(p[b_size]);
|
|
||||||
if (len <= 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t idx = 1; idx < len; idx++) {
|
|
||||||
if (p[b_size + idx] == '\0' || (p[b_size + idx] & 0xC0) != 0x80) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b_size += len;
|
|
||||||
c_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
*out_byte_size = b_size;
|
|
||||||
*out_char_count = c_count;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decode an UTF-8 symbol to a codepoint
|
|
||||||
static uint32_t utf8_decode(const char *str, int *char_len) {
|
|
||||||
unsigned char byte = (unsigned char)*str;
|
|
||||||
*char_len = utf8_char_len(byte);
|
|
||||||
|
|
||||||
uint32_t result = 0;
|
|
||||||
|
|
||||||
switch (*char_len) {
|
|
||||||
case 1:
|
|
||||||
result = byte;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
result = ((byte & 0x1F) << 6) |
|
|
||||||
(str[1] & 0x3F);
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
result = ((byte & 0x0F) << 12) |
|
|
||||||
((str[1] & 0x3F) << 6) |
|
|
||||||
(str[2] & 0x3F);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
result = ((byte & 0x07) << 18) |
|
|
||||||
((str[1] & 0x3F) << 12) |
|
|
||||||
((str[2] & 0x3F) << 6) |
|
|
||||||
(str[3] & 0x3F);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
result = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode a codepoint to an UTF-8 symbol
|
|
||||||
static int utf8_encode(uint32_t codepoint, char *out) {
|
|
||||||
if (codepoint <= 0x7F) {
|
|
||||||
out[0] = (char)codepoint;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (codepoint <= 0x7FF) {
|
|
||||||
out[0] = (char)(0xC0 | (codepoint >> 6));
|
|
||||||
out[1] = (char)(0x80 | (codepoint & 0x3F));
|
|
||||||
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (codepoint <= 0xFFFF) {
|
|
||||||
out[0] = (char)(0xE0 | (codepoint >> 12));
|
|
||||||
out[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
|
|
||||||
out[2] = (char)(0x80 | (codepoint & 0x3F));
|
|
||||||
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (codepoint <= 0x10FFFF) {
|
|
||||||
out[0] = (char)(0xF0 | (codepoint >> 18));
|
|
||||||
out[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F));
|
|
||||||
out[2] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
|
|
||||||
out[3] = (char)(0x80 | (codepoint & 0x3F));
|
|
||||||
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_new
|
|
||||||
* @c_str: a C-string
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing a new String data type
|
|
||||||
*/
|
|
||||||
string_result_t string_new(const char *c_str) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (c_str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid null input string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t b_size, c_count;
|
|
||||||
if (utf8_scan(c_str, &b_size, &c_count) == 0) {
|
|
||||||
result.status = STRING_ERR_INVALID_UTF8;
|
|
||||||
SET_MSG(result, "Malformed UTF-8 sequence");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
string_t *str = malloc(sizeof(string_t));
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
str->data = malloc(b_size + 1);
|
|
||||||
if (str->data == NULL) {
|
|
||||||
free(str);
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(str->data, c_str, b_size + 1);
|
|
||||||
str->byte_size = b_size;
|
|
||||||
str->byte_capacity = b_size + 1;
|
|
||||||
str->char_count = c_count;
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
SET_MSG(result, "String successfully created");
|
|
||||||
result.value.string = str;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_clone
|
|
||||||
* @str: a non-null string
|
|
||||||
*
|
|
||||||
* Deep copies @str
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing the copied string
|
|
||||||
*/
|
|
||||||
string_result_t string_clone(const string_t *str) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
string_t *str_copy = malloc(sizeof(string_t));
|
|
||||||
if (str_copy == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
str_copy->data = malloc(str->byte_size + 1);
|
|
||||||
if (str_copy->data == NULL) {
|
|
||||||
free(str_copy);
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(str_copy->data, str->data, str->byte_size + 1);
|
|
||||||
str_copy->byte_size = str->byte_size;
|
|
||||||
str_copy->byte_capacity = str->byte_size + 1;
|
|
||||||
str_copy->char_count = str->char_count;
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
result.value.string = str_copy;
|
|
||||||
SET_MSG(result, "String successfully copied");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_concat
|
|
||||||
* @x: a non-null string
|
|
||||||
* @y: a non-null string
|
|
||||||
*
|
|
||||||
* Concats @x and @y in a new String
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing the new string
|
|
||||||
*/
|
|
||||||
string_result_t string_concat(const string_t *x, const string_t *y) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (x == NULL || y == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid strings");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (x->byte_size > SIZE_MAX - y->byte_size - 1) {
|
|
||||||
result.status = STRING_ERR_OVERFLOW;
|
|
||||||
SET_MSG(result, "Concatenation exceeds size limits");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t new_size = x->byte_size + y->byte_size;
|
|
||||||
char *buf = malloc(new_size + 1);
|
|
||||||
if (buf == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(buf, x->data, x->byte_size);
|
|
||||||
memcpy(buf + x->byte_size, y->data, y->byte_size);
|
|
||||||
buf[new_size] = '\0';
|
|
||||||
result = string_new(buf);
|
|
||||||
free(buf);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_contains
|
|
||||||
* @haystack: a non-null string
|
|
||||||
* @needle: a non-null string
|
|
||||||
*
|
|
||||||
* Finds @needle on @haystack
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing the index to the beginning of the located string
|
|
||||||
* (if the substring has been found)
|
|
||||||
*/
|
|
||||||
string_result_t string_contains(const string_t *haystack, const string_t *needle) {
|
|
||||||
string_result_t result = {
|
|
||||||
.status = STRING_OK,
|
|
||||||
.value.idx = -1
|
|
||||||
};
|
|
||||||
|
|
||||||
if (haystack == NULL || needle == NULL || needle->byte_size == 0) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid substrings");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *found = strstr(haystack->data, needle->data);
|
|
||||||
if (found) {
|
|
||||||
size_t char_idx = 0;
|
|
||||||
const char *ptr = haystack->data;
|
|
||||||
while (ptr < found) {
|
|
||||||
ptr += utf8_char_len((unsigned char)*ptr);
|
|
||||||
char_idx++;
|
|
||||||
}
|
|
||||||
|
|
||||||
result.value.idx = (int64_t)char_idx;
|
|
||||||
SET_MSG(result, "Substring found");
|
|
||||||
} else {
|
|
||||||
SET_MSG(result, "Substring not found");
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_slice
|
|
||||||
* @str: a non-null string
|
|
||||||
* @start: the lower bound (inclusive)
|
|
||||||
* @end: the upper bound (inclusive)
|
|
||||||
*
|
|
||||||
* Extracts a slice from @str between @start and @end (inclusive)
|
|
||||||
*
|
|
||||||
* Returns a string_result_t data type containing the slice
|
|
||||||
*/
|
|
||||||
string_result_t string_slice(const string_t *str, size_t start, size_t end) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (start > end || end >= str->char_count) {
|
|
||||||
result.status = STRING_ERR_OVERFLOW;
|
|
||||||
SET_MSG(result, "Index out of bounds");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t start_byte_offset = 0;
|
|
||||||
for (size_t idx = 0; idx < start; idx++) {
|
|
||||||
start_byte_offset += utf8_char_len((unsigned char)str->data[start_byte_offset]);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t end_byte_offset = start_byte_offset;
|
|
||||||
for (size_t idx = start; idx <= end; idx++) {
|
|
||||||
end_byte_offset += utf8_char_len((unsigned char)str->data[end_byte_offset]);
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t slice_byte_size = end_byte_offset - start_byte_offset;
|
|
||||||
|
|
||||||
string_t *slice = malloc(sizeof(string_t));
|
|
||||||
if (slice == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
slice->data = malloc(slice_byte_size + 1);
|
|
||||||
if (slice->data == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(slice->data, str->data + start_byte_offset, slice_byte_size);
|
|
||||||
slice->data[slice_byte_size] = '\0';
|
|
||||||
|
|
||||||
slice->byte_size = slice_byte_size;
|
|
||||||
slice->byte_capacity = slice_byte_size + 1;
|
|
||||||
slice->char_count = end - start + 1;
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
result.value.string = slice;
|
|
||||||
SET_MSG(result, "String sliced successfully");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_eq
|
|
||||||
* @x: a non-null string
|
|
||||||
* @y: a non-null string
|
|
||||||
* @case_sensitive: boolean value for case sensitive comparison
|
|
||||||
*
|
|
||||||
* Compares two Strings
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing the comparison result
|
|
||||||
*/
|
|
||||||
string_result_t string_eq(const string_t *x, const string_t *y, bool case_sensitive) {
|
|
||||||
string_result_t result = {
|
|
||||||
.status = STRING_OK,
|
|
||||||
.value.is_equ = false
|
|
||||||
};
|
|
||||||
|
|
||||||
if (x == NULL || y == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid strings");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (x->char_count != y->char_count) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Strings differ in length");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (case_sensitive) {
|
|
||||||
result.value.is_equ = (strcmp(x->data, y->data) == 0);
|
|
||||||
} else {
|
|
||||||
const char *p1 = x->data, *p2 = y->data;
|
|
||||||
while (*p1 && *p2) {
|
|
||||||
int l1, l2;
|
|
||||||
|
|
||||||
const uint32_t codepoint1 = utf8_decode(p1, &l1);
|
|
||||||
const uint32_t codepoint2 = utf8_decode(p2, &l2);
|
|
||||||
const uint32_t c1 = (codepoint1 >= 'A' && codepoint1 <= 'Z') ? codepoint1 + 32 : codepoint1;
|
|
||||||
const uint32_t c2 = (codepoint2 >= 'A' && codepoint2 <= 'Z') ? codepoint2 + 32 : codepoint2;
|
|
||||||
|
|
||||||
if (c1 != c2) {
|
|
||||||
result.value.is_equ = false;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
p1 += l1;
|
|
||||||
p2 += l2;
|
|
||||||
}
|
|
||||||
result.value.is_equ = (*p1 == *p2);
|
|
||||||
}
|
|
||||||
|
|
||||||
SET_MSG(result, "Comparison completed successfully");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_get_at
|
|
||||||
* @str: a non-null string
|
|
||||||
* @position: the position of the symbol to read
|
|
||||||
*
|
|
||||||
* Gets symbol indexed by @position from @str
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing the symbol as a C string
|
|
||||||
*/
|
|
||||||
string_result_t string_get_at(const string_t *str, size_t position) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (position >= str->char_count) {
|
|
||||||
result.status = STRING_ERR_OVERFLOW;
|
|
||||||
SET_MSG(result, "Index out of bounds");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *ptr = str->data;
|
|
||||||
for (size_t idx = 0; idx < position; idx++) {
|
|
||||||
ptr += utf8_char_len((unsigned char)*ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
int char_len = utf8_char_len((unsigned char)*ptr);
|
|
||||||
char *utf8_char = malloc(char_len + 1);
|
|
||||||
if (utf8_char == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(utf8_char, ptr, char_len);
|
|
||||||
utf8_char[char_len] = '\0';
|
|
||||||
|
|
||||||
result.value.symbol = utf8_char;
|
|
||||||
result.status = STRING_OK;
|
|
||||||
SET_MSG(result, "Symbol successfully retrieved");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_set_at
|
|
||||||
* @str: a non-null string
|
|
||||||
* @position: the position to write into
|
|
||||||
* @utf8_char: an UTF8 symbol
|
|
||||||
*
|
|
||||||
* Writes @utf8_char into @str at index @position
|
|
||||||
*
|
|
||||||
* Returns a string_result_t data type
|
|
||||||
*/
|
|
||||||
string_result_t string_set_at(const string_t *str, size_t position, const char *utf8_char) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
int new_char_bytes;
|
|
||||||
if (utf8_is_char_valid(utf8_char, &new_char_bytes) == 0) {
|
|
||||||
result.status = STRING_ERR_INVALID_UTF8;
|
|
||||||
SET_MSG(result, "Invalid UTF-8 character");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (position >= str->char_count) {
|
|
||||||
result.status = STRING_ERR_OVERFLOW;
|
|
||||||
SET_MSG(result, "Index out of bounds");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Locate the byte offset of the character to replace
|
|
||||||
const char *pos = str->data;
|
|
||||||
for (size_t idx = 0; idx < position; idx++) {
|
|
||||||
pos += utf8_char_len((unsigned char)*pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t prefix_len = pos - str->data;
|
|
||||||
const int old_char_bytes = utf8_char_len((unsigned char)*pos);
|
|
||||||
const size_t suffix_len = str->byte_size - prefix_len - old_char_bytes;
|
|
||||||
const size_t new_total_bytes = prefix_len + new_char_bytes + suffix_len;
|
|
||||||
|
|
||||||
string_t *new_str = malloc(sizeof(string_t));
|
|
||||||
if (new_str == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
new_str->data = malloc(new_total_bytes + 1);
|
|
||||||
if (new_str->data == NULL) {
|
|
||||||
free(new_str);
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy prefix data from original string
|
|
||||||
memcpy(new_str->data, str->data, prefix_len);
|
|
||||||
// Copy the new character at requested index
|
|
||||||
memcpy(new_str->data + prefix_len, utf8_char, new_char_bytes);
|
|
||||||
// Copy suffix data from the original string by skipping the overwritten character
|
|
||||||
memcpy(new_str->data + prefix_len + new_char_bytes, pos + old_char_bytes, suffix_len);
|
|
||||||
new_str->data[new_total_bytes] = '\0';
|
|
||||||
|
|
||||||
new_str->byte_size = new_total_bytes;
|
|
||||||
new_str->byte_capacity = new_total_bytes + 1;
|
|
||||||
new_str->char_count = str->char_count;
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
result.value.string = new_str;
|
|
||||||
SET_MSG(result, "Symbol successfully set");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_to_lower
|
|
||||||
* @str: a non-null string
|
|
||||||
*
|
|
||||||
* Converts a String to lowercase
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing a new string
|
|
||||||
*/
|
|
||||||
string_result_t string_to_lower(const string_t *str) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *buf = malloc(str->byte_capacity);
|
|
||||||
if (buf == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *src = str->data;
|
|
||||||
char *dst = buf;
|
|
||||||
|
|
||||||
while (*src) {
|
|
||||||
int len;
|
|
||||||
uint32_t codepoint = utf8_decode(src, &len);
|
|
||||||
uint32_t lower = (codepoint >= 'A' && codepoint <= 'Z') ? codepoint + 32 : codepoint;
|
|
||||||
dst += utf8_encode(lower, dst);
|
|
||||||
src += len;
|
|
||||||
}
|
|
||||||
*dst = '\0';
|
|
||||||
result = string_new(buf);
|
|
||||||
free(buf);
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
SET_MSG(result, "String successfully converted to lowercase");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_to_upper
|
|
||||||
* @str: a non-null string
|
|
||||||
*
|
|
||||||
* Converts a String to uppercase
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing a new string
|
|
||||||
*/
|
|
||||||
string_result_t string_to_upper(const string_t *str) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *buf = malloc(str->byte_capacity);
|
|
||||||
if (buf == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *src = str->data;
|
|
||||||
char *dst = buf;
|
|
||||||
while (*src) {
|
|
||||||
int len;
|
|
||||||
uint32_t codepoint = utf8_decode(src, &len);
|
|
||||||
uint32_t upper = (codepoint >= 'a' && codepoint <= 'z') ? codepoint - 32 : codepoint;
|
|
||||||
dst += utf8_encode(upper, dst);
|
|
||||||
src += len;
|
|
||||||
}
|
|
||||||
*dst = '\0';
|
|
||||||
result = string_new(buf);
|
|
||||||
free(buf);
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
SET_MSG(result, "String successfully converted to uppercase");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_reverse
|
|
||||||
* @str: a non-null string
|
|
||||||
*
|
|
||||||
* Reverses @str
|
|
||||||
*
|
|
||||||
* Returns a new string_result_t containing the reversed string
|
|
||||||
*/
|
|
||||||
string_result_t string_reverse(const string_t *str) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *buf = malloc(str->byte_capacity);
|
|
||||||
if (buf == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char **pos = malloc(str->char_count * sizeof(char *));
|
|
||||||
if (pos == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *ptr = str->data;
|
|
||||||
for (size_t idx = 0; idx < str->char_count; idx++) {
|
|
||||||
pos[idx] = ptr;
|
|
||||||
ptr += utf8_char_len((unsigned char)*ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
char *dst = buf;
|
|
||||||
for (int64_t idx = (int64_t)str->char_count - 1; idx >= 0; idx--) {
|
|
||||||
int len = utf8_char_len((unsigned char)*pos[idx]);
|
|
||||||
memcpy(dst, pos[idx], len);
|
|
||||||
dst += len;
|
|
||||||
}
|
|
||||||
|
|
||||||
*dst = '\0';
|
|
||||||
free(pos);
|
|
||||||
result = string_new(buf);
|
|
||||||
free(buf);
|
|
||||||
|
|
||||||
SET_MSG(result, "String successfully reversed");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_trim
|
|
||||||
* @str: a non-null string
|
|
||||||
*
|
|
||||||
* Trims whitespace from @str
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing the trimmed string
|
|
||||||
*/
|
|
||||||
string_result_t string_trim(const string_t *str) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *start = str->data;
|
|
||||||
while (*start && is_space((unsigned char)*start)) {
|
|
||||||
start++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*start == '\0') {
|
|
||||||
return string_new("");
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *end = str->data + str->byte_size - 1;
|
|
||||||
while (end > start && is_space((unsigned char)*end)) {
|
|
||||||
end--;
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t len = (end - start) + 1;
|
|
||||||
char *trimmed = malloc(len + 1);
|
|
||||||
if (trimmed == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(trimmed, start, len);
|
|
||||||
trimmed[len] = '\0';
|
|
||||||
result = string_new(trimmed);
|
|
||||||
free(trimmed);
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
SET_MSG(result, "String successfully trimmed");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_split
|
|
||||||
* @str: a non-null string
|
|
||||||
* @delim: delimiter string
|
|
||||||
*
|
|
||||||
* Splits @str by @delim
|
|
||||||
*
|
|
||||||
* Returns a string_result_t containing an array of String pointers
|
|
||||||
*/
|
|
||||||
string_result_t string_split(const string_t *str, const char *delim) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
string_result_t tmp_res = {0};
|
|
||||||
|
|
||||||
if (str == NULL || delim == NULL || delim[0] == '\0') {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid strings");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *ptr = str->data;
|
|
||||||
const size_t delim_len = strlen(delim);
|
|
||||||
size_t count = 1;
|
|
||||||
|
|
||||||
while ((ptr = strstr(ptr, delim))) {
|
|
||||||
count++;
|
|
||||||
ptr += delim_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
string_t **string_array = malloc(count * sizeof(string_t *));
|
|
||||||
if (string_array == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocate memory");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *start = str->data;
|
|
||||||
size_t idx = 0;
|
|
||||||
|
|
||||||
while ((ptr = strstr(start, delim))) {
|
|
||||||
const size_t part_len = ptr - start;
|
|
||||||
char *tmp = malloc(part_len + 1);
|
|
||||||
if (tmp == NULL) {
|
|
||||||
result.status = STRING_ERR_ALLOCATE;
|
|
||||||
SET_MSG(result, "Cannot allocated memory");
|
|
||||||
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(tmp, start, part_len);
|
|
||||||
tmp[part_len] = '\0';
|
|
||||||
|
|
||||||
tmp_res = string_new(tmp);
|
|
||||||
free(tmp);
|
|
||||||
if (tmp_res.status != STRING_OK) { result = tmp_res; goto cleanup; }
|
|
||||||
|
|
||||||
string_array[idx++] = tmp_res.value.string;
|
|
||||||
start = ptr + delim_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
tmp_res = string_new(start);
|
|
||||||
if (tmp_res.status != STRING_OK) { result = tmp_res; goto cleanup; }
|
|
||||||
|
|
||||||
string_array[idx] = tmp_res.value.string;
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
result.value.split.strings = string_array;
|
|
||||||
result.value.split.count = count;
|
|
||||||
SET_MSG(result, "String successfully split");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
cleanup:
|
|
||||||
for (size_t j = 0; j < idx; j++) {
|
|
||||||
string_destroy(string_array[j]);
|
|
||||||
}
|
|
||||||
free(string_array);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_destroy
|
|
||||||
* @str: a non-null string
|
|
||||||
*
|
|
||||||
* Destroys @str
|
|
||||||
*
|
|
||||||
* Returns a string_result_t data type
|
|
||||||
*/
|
|
||||||
string_result_t string_destroy(string_t *str) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (str == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
free(str->data);
|
|
||||||
free(str);
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
SET_MSG(result, "String successfully deleted");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* string_split_destory
|
|
||||||
* @split: an array of pointers of String
|
|
||||||
* @count: the number of elements
|
|
||||||
*
|
|
||||||
* Destroys the @split array of Strings
|
|
||||||
*
|
|
||||||
* Returns a string_result_t data type
|
|
||||||
*/
|
|
||||||
string_result_t string_split_destroy(string_t **split, size_t count) {
|
|
||||||
string_result_t result = {0};
|
|
||||||
|
|
||||||
if (split == NULL) {
|
|
||||||
result.status = STRING_ERR_INVALID;
|
|
||||||
SET_MSG(result, "Invalid string");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t idx = 0; idx < count; idx++) {
|
|
||||||
string_destroy(split[idx]);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(split);
|
|
||||||
|
|
||||||
result.status = STRING_OK;
|
|
||||||
SET_MSG(result, "Array of strings successfully deleted");
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
70
src/string.h
70
src/string.h
@@ -1,70 +0,0 @@
|
|||||||
#ifndef STRING_H
|
|
||||||
#define STRING_H
|
|
||||||
|
|
||||||
#define RESULT_MSG_SIZE 64
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
STRING_OK = 0x0,
|
|
||||||
STRING_ERR_ALLOCATE,
|
|
||||||
STRING_ERR_INVALID,
|
|
||||||
STRING_ERR_INVALID_UTF8,
|
|
||||||
STRING_ERR_OVERFLOW
|
|
||||||
} string_status_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
char *data;
|
|
||||||
size_t byte_size; // Size in bytes excluding NULL terminator
|
|
||||||
size_t byte_capacity; // Total allocated memory
|
|
||||||
size_t char_count; // Number of symbols
|
|
||||||
} string_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
string_status_t status;
|
|
||||||
uint8_t message[RESULT_MSG_SIZE];
|
|
||||||
union {
|
|
||||||
string_t *string; // For new, clone, slice, reverse, trim
|
|
||||||
char *symbol; // For get_at
|
|
||||||
int64_t idx; // For contains
|
|
||||||
bool is_equ; // For comparison
|
|
||||||
struct { // For split
|
|
||||||
string_t **strings;
|
|
||||||
size_t count;
|
|
||||||
} split;
|
|
||||||
} value;
|
|
||||||
} string_result_t;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Public APIs
|
|
||||||
string_result_t string_new(const char *c_str);
|
|
||||||
string_result_t string_clone(const string_t *str);
|
|
||||||
string_result_t string_concat(const string_t *x, const string_t *y);
|
|
||||||
string_result_t string_contains(const string_t *haystack, const string_t *needle);
|
|
||||||
string_result_t string_slice(const string_t *str, size_t start, size_t end);
|
|
||||||
string_result_t string_eq(const string_t *x, const string_t *y, bool case_sensitive);
|
|
||||||
string_result_t string_get_at(const string_t *str, size_t position);
|
|
||||||
string_result_t string_set_at(const string_t *str, size_t position, const char *utf8_char);
|
|
||||||
string_result_t string_to_lower(const string_t *str);
|
|
||||||
string_result_t string_to_upper(const string_t *str);
|
|
||||||
string_result_t string_reverse(const string_t *str);
|
|
||||||
string_result_t string_trim(const string_t *str);
|
|
||||||
string_result_t string_split(const string_t *str, const char *delim);
|
|
||||||
string_result_t string_destroy(string_t *str);
|
|
||||||
string_result_t string_split_destroy(string_t **split, size_t count);
|
|
||||||
|
|
||||||
// Inline methods
|
|
||||||
static inline size_t string_size(const string_t *str) {
|
|
||||||
return str ? str->char_count : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -9,7 +9,6 @@
|
|||||||
|
|
||||||
#include "vector.h"
|
#include "vector.h"
|
||||||
|
|
||||||
// Internal methods
|
|
||||||
/**
|
/**
|
||||||
* vector_resize
|
* vector_resize
|
||||||
* @vector: a non-null vector
|
* @vector: a non-null vector
|
||||||
|
|||||||
@@ -213,8 +213,8 @@ void test_bigint_prod_neg(void) {
|
|||||||
bigint_destroy(prod.value.number);
|
bigint_destroy(prod.value.number);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test division between big numbers
|
// Test division between big numbers where divisor is a single limb big number
|
||||||
void test_bigint_div(void) {
|
void test_bigint_div_single_limb(void) {
|
||||||
bigint_result_t x = bigint_from_int(100);
|
bigint_result_t x = bigint_from_int(100);
|
||||||
bigint_result_t y = bigint_from_int(2);
|
bigint_result_t y = bigint_from_int(2);
|
||||||
|
|
||||||
@@ -229,11 +229,33 @@ void test_bigint_div(void) {
|
|||||||
bigint_eq(quotient, "50");
|
bigint_eq(quotient, "50");
|
||||||
bigint_eq(remainder, "0");
|
bigint_eq(remainder, "0");
|
||||||
|
|
||||||
bigint_destroy(quotient);
|
bigint_destroy(quotient); bigint_destroy(remainder);
|
||||||
bigint_destroy(remainder);
|
bigint_destroy(x.value.number); bigint_destroy(y.value.number);
|
||||||
|
}
|
||||||
|
|
||||||
bigint_destroy(x.value.number);
|
// Test division between big numbers using Knuth's algorithm
|
||||||
bigint_destroy(y.value.number);
|
void test_bigint_div_knuth(void) {
|
||||||
|
// (1...9) x 8
|
||||||
|
const char *x_origin = "123456789123456789123456789123456789123456789123456789123456789123456789";
|
||||||
|
// (9...1) x 5
|
||||||
|
const char *y_origin = "987654321987654321987654321987654321987654321";
|
||||||
|
|
||||||
|
bigint_result_t x = bigint_from_string(x_origin);
|
||||||
|
bigint_result_t y = bigint_from_string(y_origin);
|
||||||
|
|
||||||
|
assert(x.status == BIGINT_OK && y.status == BIGINT_OK);
|
||||||
|
|
||||||
|
bigint_result_t div = bigint_divmod(x.value.number, y.value.number);
|
||||||
|
assert(div.status == BIGINT_OK);
|
||||||
|
|
||||||
|
bigint_t* const quotient = div.value.division.quotient;
|
||||||
|
bigint_t* const remainder = div.value.division.remainder;
|
||||||
|
|
||||||
|
bigint_eq(quotient, "124999998860937500014238281");
|
||||||
|
bigint_eq(remainder, "246737799246737799370194588370194588370194588");
|
||||||
|
|
||||||
|
bigint_destroy(quotient); bigint_destroy(remainder);
|
||||||
|
bigint_destroy(x.value.number); bigint_destroy(y.value.number);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test division between big numbers with negative dividend
|
// Test division between big numbers with negative dividend
|
||||||
@@ -262,7 +284,7 @@ void test_bigint_div_dividend(void) {
|
|||||||
|
|
||||||
// Test division between big numbers with negative divisor
|
// Test division between big numbers with negative divisor
|
||||||
// This library follows C-style divison such that sign(remainder) = sign(dividend)
|
// This library follows C-style divison such that sign(remainder) = sign(dividend)
|
||||||
void test_bigint_div_divisor(void) {
|
void test_bigint_div_neg_divisor(void) {
|
||||||
bigint_result_t x = bigint_from_int(13);
|
bigint_result_t x = bigint_from_int(13);
|
||||||
bigint_result_t y = bigint_from_int(-4);
|
bigint_result_t y = bigint_from_int(-4);
|
||||||
|
|
||||||
@@ -405,9 +427,10 @@ int main(void) {
|
|||||||
TEST(bigint_very_large_prod);
|
TEST(bigint_very_large_prod);
|
||||||
TEST(bigint_prod_mixed);
|
TEST(bigint_prod_mixed);
|
||||||
TEST(bigint_prod_neg);
|
TEST(bigint_prod_neg);
|
||||||
TEST(bigint_div);
|
TEST(bigint_div_single_limb);
|
||||||
|
TEST(bigint_div_knuth);
|
||||||
TEST(bigint_div_dividend);
|
TEST(bigint_div_dividend);
|
||||||
TEST(bigint_div_divisor);
|
TEST(bigint_div_neg_divisor);
|
||||||
TEST(bigint_div_neg);
|
TEST(bigint_div_neg);
|
||||||
TEST(bigint_div_by_zero);
|
TEST(bigint_div_by_zero);
|
||||||
TEST(bigint_clone);
|
TEST(bigint_clone);
|
||||||
|
|||||||
@@ -1,329 +0,0 @@
|
|||||||
/*
|
|
||||||
* Unit tests for String data type
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define TEST(NAME) do { \
|
|
||||||
printf("Running test_%s...", #NAME); \
|
|
||||||
test_##NAME(); \
|
|
||||||
printf(" PASSED\n"); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "../src/string.h"
|
|
||||||
|
|
||||||
// Test string creation
|
|
||||||
void test_string_new(void) {
|
|
||||||
string_result_t res = string_new("hello");
|
|
||||||
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(res.value.string != NULL);
|
|
||||||
assert(strcmp(res.value.string->data, "hello") == 0);
|
|
||||||
assert(string_size(res.value.string) == 5);
|
|
||||||
assert(res.value.string->byte_size == 5);
|
|
||||||
|
|
||||||
string_destroy(res.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test empty string
|
|
||||||
void test_string_new_empty(void) {
|
|
||||||
string_result_t res = string_new("");
|
|
||||||
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(string_size(res.value.string) == 0);
|
|
||||||
assert(res.value.string->byte_size == 0);
|
|
||||||
assert(res.value.string->data[0] == '\0');
|
|
||||||
|
|
||||||
string_destroy(res.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test cloning an existing string
|
|
||||||
void test_string_clone(void) {
|
|
||||||
string_t *original = string_new("Original").value.string;
|
|
||||||
string_result_t res = string_clone(original);
|
|
||||||
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(res.value.string != original); // Different memory address
|
|
||||||
assert(strcmp(res.value.string->data, original->data) == 0);
|
|
||||||
assert(res.value.string->byte_size == original->byte_size);
|
|
||||||
|
|
||||||
string_destroy(original);
|
|
||||||
string_destroy(res.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string concatenation
|
|
||||||
void test_string_concat(void) {
|
|
||||||
string_t *str1 = string_new("Foo").value.string;
|
|
||||||
string_t *str2 = string_new(" Bar").value.string;
|
|
||||||
|
|
||||||
string_result_t res = string_concat(str1, str2);
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(strcmp(res.value.string->data, "Foo Bar") == 0);
|
|
||||||
assert(string_size(res.value.string) == 7);
|
|
||||||
|
|
||||||
string_destroy(str1);
|
|
||||||
string_destroy(str2);
|
|
||||||
string_destroy(res.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test if string contains a substring
|
|
||||||
void test_string_contains(void) {
|
|
||||||
string_t *haystack = string_new("Hello 🌍 World").value.string;
|
|
||||||
string_t *needle_ascii = string_new("World").value.string;
|
|
||||||
string_t *needle_utf8 = string_new("🌍").value.string;
|
|
||||||
string_t *needle_none = string_new("not found").value.string;
|
|
||||||
|
|
||||||
// World starts at symbol 8
|
|
||||||
string_result_t res1 = string_contains(haystack, needle_ascii);
|
|
||||||
assert(res1.status == STRING_OK);
|
|
||||||
assert(res1.value.idx == 8);
|
|
||||||
|
|
||||||
// 🌍 is at position 6
|
|
||||||
string_result_t res2 = string_contains(haystack, needle_utf8);
|
|
||||||
assert(res2.status == STRING_OK);
|
|
||||||
assert(res2.value.idx == 6);
|
|
||||||
|
|
||||||
// Not found should return -1
|
|
||||||
string_result_t res3 = string_contains(haystack, needle_none);
|
|
||||||
assert(res3.status == STRING_OK);
|
|
||||||
assert(res3.value.idx == -1);
|
|
||||||
|
|
||||||
string_destroy(haystack);
|
|
||||||
string_destroy(needle_ascii);
|
|
||||||
string_destroy(needle_utf8);
|
|
||||||
string_destroy(needle_none);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string slicing
|
|
||||||
void test_string_slice(void) {
|
|
||||||
// ASCII slice
|
|
||||||
string_t *str1 = string_new("foobar").value.string;
|
|
||||||
string_result_t res1 = string_slice(str1, 2, 4);
|
|
||||||
|
|
||||||
assert(res1.status == STRING_OK);
|
|
||||||
assert(strcmp(res1.value.string->data, "oba") == 0);
|
|
||||||
assert(res1.value.string->char_count == 3);
|
|
||||||
|
|
||||||
// UTF-8 slice
|
|
||||||
string_t *str2 = string_new("AB😆🌍").value.string;
|
|
||||||
string_result_t res2 = string_slice(str2, 2, 2);
|
|
||||||
|
|
||||||
assert(res2.status == STRING_OK);
|
|
||||||
assert(strcmp(res2.value.string->data, "😆") == 0);
|
|
||||||
assert(res2.value.string->byte_size == 4); // emoji = 4 bytes
|
|
||||||
|
|
||||||
// UTF-8 + ASCII slice
|
|
||||||
string_result_t res3 = string_slice(str2, 0, 2);
|
|
||||||
assert(res3.status == STRING_OK);
|
|
||||||
assert(strcmp(res3.value.string->data, "AB😆") == 0);
|
|
||||||
|
|
||||||
// Invalid bounds
|
|
||||||
string_result_t res4 = string_slice(str1, 5, 2);
|
|
||||||
assert(res4.status == STRING_ERR_OVERFLOW);
|
|
||||||
|
|
||||||
res4 = string_slice(str1, 1, 50);
|
|
||||||
assert(res4.status == STRING_ERR_OVERFLOW);
|
|
||||||
|
|
||||||
string_destroy(str1);
|
|
||||||
string_destroy(str2);
|
|
||||||
string_destroy(res1.value.string);
|
|
||||||
string_destroy(res2.value.string);
|
|
||||||
string_destroy(res3.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test case-insensitive and sensitive comparison
|
|
||||||
void test_string_eq(void) {
|
|
||||||
string_t *str1 = string_new("Foo").value.string;
|
|
||||||
string_t *str2 = string_new("foo").value.string;
|
|
||||||
|
|
||||||
// Case sensitive comparison should be false
|
|
||||||
assert(string_eq(str1, str2, true).value.is_equ == false);
|
|
||||||
// Case insensitive comparison should be true
|
|
||||||
assert(string_eq(str1, str2, false).value.is_equ == true);
|
|
||||||
|
|
||||||
string_destroy(str1);
|
|
||||||
string_destroy(str2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string reverse using UTF-8 symbols
|
|
||||||
void test_string_reverse_utf8(void) {
|
|
||||||
string_t *str = string_new("A🌍Z").value.string;
|
|
||||||
|
|
||||||
string_result_t res = string_reverse(str);
|
|
||||||
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(string_size(res.value.string) == 3);
|
|
||||||
assert(strcmp(res.value.string->data, "Z🌍A") == 0);
|
|
||||||
assert(string_size(res.value.string) == 3);
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
string_destroy(res.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string get_at
|
|
||||||
void test_string_get_at(void) {
|
|
||||||
string_t *str = string_new("AB😆🌍").value.string;
|
|
||||||
|
|
||||||
// 😆 is at index 2
|
|
||||||
string_result_t res1 = string_get_at(str, 2);
|
|
||||||
assert(res1.status == STRING_OK);
|
|
||||||
assert(strcmp((char*)res1.value.symbol, "😆") == 0);
|
|
||||||
free(res1.value.symbol);
|
|
||||||
|
|
||||||
// 🌍 is at index 3
|
|
||||||
string_result_t res2 = string_get_at(str, 3);
|
|
||||||
assert(res2.status == STRING_OK);
|
|
||||||
assert(strcmp((char*)res2.value.symbol, "🌍") == 0);
|
|
||||||
free(res2.value.symbol);
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string get_at with invalid index
|
|
||||||
void test_string_get_at_overflow(void) {
|
|
||||||
string_t *str = string_new("ABC").value.string;
|
|
||||||
|
|
||||||
string_result_t res = string_get_at(str, 50);
|
|
||||||
assert(res.status == STRING_ERR_OVERFLOW);
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test mutation of UTF-8 symbol
|
|
||||||
void test_string_set_at(void) {
|
|
||||||
string_t *str = string_new("ABC").value.string;
|
|
||||||
|
|
||||||
// Replace 'B' with emoji
|
|
||||||
string_result_t res = string_set_at(str, 1, "😆");
|
|
||||||
string_t *altered = res.value.string;
|
|
||||||
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(strcmp(altered->data, "A😆C") == 0);
|
|
||||||
assert(string_size(altered) == 3);
|
|
||||||
assert(altered->byte_size == 6); // that is: A (1B) + emoji (4B) + C (1B)
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
string_destroy(altered);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test mutation of invalid UTF-8 symbol
|
|
||||||
void test_string_set_at_invalid_utf8(void) {
|
|
||||||
string_t *str = string_new("ABC").value.string;
|
|
||||||
|
|
||||||
const char * const invalid_sym1 = "\xFF";
|
|
||||||
const char * const invalid_sym2 = "\x80";
|
|
||||||
|
|
||||||
string_result_t res1 = string_set_at(str, 1, invalid_sym1);
|
|
||||||
assert(res1.status == STRING_ERR_INVALID_UTF8);
|
|
||||||
|
|
||||||
string_result_t res2 = string_set_at(str, 1, invalid_sym2);
|
|
||||||
assert(res2.status == STRING_ERR_INVALID_UTF8);
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test mutation with overflow
|
|
||||||
void test_string_set_at_overflow(void) {
|
|
||||||
string_t *str = string_new("ABC").value.string;
|
|
||||||
|
|
||||||
string_result_t res = string_set_at(str, 10, "a");
|
|
||||||
assert(res.status == STRING_ERR_OVERFLOW);
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string to lowercase
|
|
||||||
void test_string_to_lower(void) {
|
|
||||||
string_t *str = string_new("AbC").value.string;
|
|
||||||
string_result_t res = string_to_lower(str);
|
|
||||||
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(strcmp(res.value.string->data, "abc") == 0);
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
string_destroy(res.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string to uppercase
|
|
||||||
void test_string_to_upper(void) {
|
|
||||||
string_t *str = string_new("aBc").value.string;
|
|
||||||
string_result_t res = string_to_upper(str);
|
|
||||||
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(strcmp(res.value.string->data, "ABC") == 0);
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
string_destroy(res.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test whitespace trimming
|
|
||||||
void test_string_trim(void) {
|
|
||||||
string_t *str = string_new(" \t Foo Bar \n ").value.string;
|
|
||||||
|
|
||||||
string_result_t res = string_trim(str);
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(strcmp(res.value.string->data, "Foo Bar") == 0);
|
|
||||||
|
|
||||||
string_destroy(str);
|
|
||||||
string_destroy(res.value.string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string splitting into an array
|
|
||||||
void test_string_split(void) {
|
|
||||||
string_t *str = string_new("Red,Green,Blue").value.string;
|
|
||||||
|
|
||||||
string_result_t res = string_split(str, ",");
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
assert(res.value.split.count == 3);
|
|
||||||
|
|
||||||
const size_t count = res.value.split.count;
|
|
||||||
string_t **strings = res.value.split.strings;
|
|
||||||
|
|
||||||
const char *expected[] = { "Red", "Green", "Blue" };
|
|
||||||
for (size_t idx = 0; idx < count; idx++) {
|
|
||||||
assert(strcmp(strings[idx]->data, expected[idx]) == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
string_split_destroy(strings, count);
|
|
||||||
string_destroy(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test string destroy
|
|
||||||
void test_string_destroy(void) {
|
|
||||||
string_t *str = string_new("delete me").value.string;
|
|
||||||
|
|
||||||
string_result_t res = string_destroy(str);
|
|
||||||
assert(res.status == STRING_OK);
|
|
||||||
|
|
||||||
string_result_t res_null = string_destroy(NULL);
|
|
||||||
assert(res_null.status == STRING_ERR_INVALID);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(void) {
|
|
||||||
printf("=== Running String unit tests ===\n\n");
|
|
||||||
|
|
||||||
TEST(string_new);
|
|
||||||
TEST(string_new_empty);
|
|
||||||
TEST(string_clone);
|
|
||||||
TEST(string_concat);
|
|
||||||
TEST(string_contains);
|
|
||||||
TEST(string_slice);
|
|
||||||
TEST(string_eq);
|
|
||||||
TEST(string_reverse_utf8);
|
|
||||||
TEST(string_get_at);
|
|
||||||
TEST(string_get_at_overflow);
|
|
||||||
TEST(string_set_at);
|
|
||||||
TEST(string_set_at_overflow);
|
|
||||||
TEST(string_set_at_invalid_utf8);
|
|
||||||
TEST(string_to_lower);
|
|
||||||
TEST(string_to_upper);
|
|
||||||
TEST(string_trim);
|
|
||||||
TEST(string_split);
|
|
||||||
TEST(string_destroy);
|
|
||||||
|
|
||||||
printf("\n=== All tests passed! ===\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
187
usage.c
187
usage.c
@@ -25,12 +25,10 @@
|
|||||||
#include "src/vector.h"
|
#include "src/vector.h"
|
||||||
#include "src/map.h"
|
#include "src/map.h"
|
||||||
#include "src/bigint.h"
|
#include "src/bigint.h"
|
||||||
#include "src/string.h"
|
|
||||||
|
|
||||||
static int vector_usage(void);
|
static int vector_usage(void);
|
||||||
static int map_usage(void);
|
static int map_usage(void);
|
||||||
static int bigint_usage(void);
|
static int bigint_usage(void);
|
||||||
static int string_usage(void);
|
|
||||||
|
|
||||||
static vector_order_t cmp_int_asc(const void *x, const void *y);
|
static vector_order_t cmp_int_asc(const void *x, const void *y);
|
||||||
static vector_order_t cmp_int_desc(const void *x, const void *y);
|
static vector_order_t cmp_int_desc(const void *x, const void *y);
|
||||||
@@ -54,11 +52,6 @@ int main(void) {
|
|||||||
st = bigint_usage();
|
st = bigint_usage();
|
||||||
if (st) { return st; }
|
if (st) { return st; }
|
||||||
|
|
||||||
SEP(50);
|
|
||||||
|
|
||||||
st = string_usage();
|
|
||||||
if (st) { return st; }
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -502,7 +495,7 @@ int bigint_usage(void) {
|
|||||||
// Print result
|
// Print result
|
||||||
bigint_printf("multiplication result = %B\n", prod);
|
bigint_printf("multiplication result = %B\n", prod);
|
||||||
|
|
||||||
bigint_t *a = bigint_from_string(x_origin).value.number;
|
bigint_t *a = bigint_from_string(large_x).value.number;
|
||||||
bigint_t *b = bigint_from_string(y_origin).value.number;
|
bigint_t *b = bigint_from_string(y_origin).value.number;
|
||||||
|
|
||||||
// Divide two big integers
|
// Divide two big integers
|
||||||
@@ -531,181 +524,3 @@ int bigint_usage(void) {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int string_usage(void) {
|
|
||||||
// Create a new string
|
|
||||||
string_result_t res = string_new("Hello, ");
|
|
||||||
if (res.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
string_t *str1 = res.value.string;
|
|
||||||
printf("Created string: \"%s\"\n", str1->data);
|
|
||||||
printf("Character count: %zu (%zu actual bytes)\n", string_size(str1), str1->byte_size);
|
|
||||||
|
|
||||||
string_result_t res_clone = string_clone(str1);
|
|
||||||
if (res_clone.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
string_t *cloned = res_clone.value.string;
|
|
||||||
printf("Cloned string: \"%s\"\n\n", cloned->data);
|
|
||||||
string_destroy(cloned);
|
|
||||||
|
|
||||||
// Concatenation of strings
|
|
||||||
string_result_t res_suffix = string_new("World! 🦜");
|
|
||||||
if (res_suffix.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
string_t *suffix = res_suffix.value.string;
|
|
||||||
printf("Created another string: \"%s\"\n", suffix->data);
|
|
||||||
printf("Character count: %zu (%zu actual bytes)\n\n", string_size(suffix), suffix->byte_size);
|
|
||||||
|
|
||||||
string_result_t res_cat = string_concat(str1, suffix);
|
|
||||||
if (res_cat.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_cat.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
string_destroy(suffix);
|
|
||||||
|
|
||||||
string_t *concat_str = res_cat.value.string;
|
|
||||||
printf("Concatenation result: \"%s\"\n\n", concat_str->data);
|
|
||||||
|
|
||||||
// String contains
|
|
||||||
string_t *haystack = string_new("The quick brown fox jumps over the lazy dog.").value.string;
|
|
||||||
string_t *needle = string_new("brown fox").value.string;
|
|
||||||
|
|
||||||
string_result_t res_contains = string_contains(haystack, needle);
|
|
||||||
if (res_contains.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_contains.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (res_contains.value.idx != -1) {
|
|
||||||
printf("Substring found. Starting at index %zu\n\n", res_contains.value.idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
string_destroy(haystack);
|
|
||||||
string_destroy(needle);
|
|
||||||
|
|
||||||
// String slicing
|
|
||||||
string_result_t res_slice = string_slice(concat_str, 7, 14);
|
|
||||||
if (res_slice.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_slice.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Slice of string: \"%s\"\n\n", res_slice.value.string->data);
|
|
||||||
string_destroy(res_slice.value.string);
|
|
||||||
|
|
||||||
// String equality
|
|
||||||
string_t *compare = string_new("hello, World! 🦜").value.string;
|
|
||||||
string_result_t res_eq = string_eq(concat_str, compare, true);
|
|
||||||
if (res_eq.value.is_equ) {
|
|
||||||
printf("The two strings are equal\n\n");
|
|
||||||
} else {
|
|
||||||
printf("The two strings are not equal\n\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
string_destroy(compare);
|
|
||||||
|
|
||||||
// Uppercase string
|
|
||||||
string_result_t res_upper = string_to_upper(concat_str);
|
|
||||||
if (res_upper.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_upper.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
printf("Uppercase: \"%s\"\n", res_upper.value.string->data);
|
|
||||||
string_destroy(res_upper.value.string);
|
|
||||||
|
|
||||||
// Lowercase string
|
|
||||||
string_result_t res_lower = string_to_lower(concat_str);
|
|
||||||
if (res_lower.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_lower.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
printf("Lowercase: \"%s\"\n\n", res_lower.value.string->data);
|
|
||||||
string_destroy(res_lower.value.string);
|
|
||||||
|
|
||||||
// Reverse string
|
|
||||||
string_result_t res_rev = string_reverse(concat_str);
|
|
||||||
if (res_rev.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_rev.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
printf("Reversed: \"%s\"\n\n", res_rev.value.string->data);
|
|
||||||
string_destroy(res_rev.value.string);
|
|
||||||
|
|
||||||
// Change first character of the string
|
|
||||||
string_result_t res_set = string_set_at(concat_str, 0, "J");
|
|
||||||
if (res_set.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_set.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
printf("Updated string: \"%s\"\n\n", res_set.value.string->data);
|
|
||||||
string_destroy(res_set.value.string);
|
|
||||||
|
|
||||||
// Get character from string (the emoji)
|
|
||||||
string_result_t res_get = string_get_at(concat_str, 14);
|
|
||||||
if (res_get.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_get.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
printf("Extracted symbol: \"%s\"\n", res_get.value.symbol);
|
|
||||||
free(res_get.value.symbol);
|
|
||||||
|
|
||||||
// Trim string
|
|
||||||
string_t *to_trim = string_new(" foo ").value.string;
|
|
||||||
string_result_t res_trim = string_trim(to_trim);
|
|
||||||
if (res_trim.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_trim.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Trimmed string: \"%s\"\n\n", res_trim.value.string->data);
|
|
||||||
string_destroy(to_trim);
|
|
||||||
string_destroy(res_trim.value.string);
|
|
||||||
|
|
||||||
// Split string
|
|
||||||
string_t *to_split = string_new("foo/bar/biz").value.string;
|
|
||||||
string_result_t res_split = string_split(to_split, "/");
|
|
||||||
if (res_split.status != STRING_OK) {
|
|
||||||
printf("Error: %s\n", res_split.message);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t count = res_split.value.split.count;
|
|
||||||
string_t **strings = res_split.value.split.strings;
|
|
||||||
|
|
||||||
printf("Original string: \"%s\"\nSplitted string: ", to_split->data);
|
|
||||||
for (size_t idx = 0; idx < count; idx++) {
|
|
||||||
printf("\"%s\" ", strings[idx]->data);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
string_split_destroy(strings, count);
|
|
||||||
string_destroy(to_split);
|
|
||||||
|
|
||||||
string_destroy(concat_str);
|
|
||||||
string_destroy(str1);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user