Fixed minor bugs and added documentation for big integers

2025-11-18 11:27:34 +01:00
parent 9e419d09ac
commit c658c93182
9 changed files with 141 additions and 28 deletions
--- a/.github/workflows/clang-build.yml
+++ b/.github/workflows/clang-build.yml
@@ -12,7 +12,7 @@ jobs:

      - name: Build Datum
        run: |
-          make clean all
+          make clean all CC=clang

      - name: Run unit tests
        run: |
--- a/docs/bigint.md
+++ b/docs/bigint.md
@@ -0,0 +1,92 @@
+# BigInt Technical Details
+In this document you can find a quick overview of the technical
+aspects (internal design, memory layout, etc.) of the `BigInt` data structure. 
+
+`BigInt` is a data type for arbitrary precision arithmetic that supports addition,
+subtraction, multiplication, division and modulo operations on signed integers of unlimited size. Internally, it uses
+the `Vector` data structure to represent big numbers using the following layout:
+
+```
+Number:     2485795518678991171206065
+Internally: [ 171206065, 518678991, 2485795 ]
+                /            |          \
+               /             |           \
+           digit[0]      digit[1]      digit[2]
+            (LSB)                        (MSB)
+```
+
+That is, each element of the vector stores 9 digits in base $10^9$ using
+**little-endian order**. Each such digits can therefore store values from `0` up to
+`999,999,999`.
+
+This scheme maps to the following structure:
+
+```c
+typedef struct {
+    vector_t *digits;
+    bool is_negative;
+} bigint_t;
+```
+
+where the `digits` array stores the representation in base $10^9$ of the big integer
+and the boolean `is_negative` variable denotes its sign.
+
+The `BigInt` data structure supports the following methods:
+
+- `bigint_result_t bigint_from_int(value)`: create a big integer from a primitive `int` type;  
+- `bigint_result_t bigint_from_string(string_num)`: create a big integer from a C string;  
+- `bigint_result_t bigint_to_string(number)`: convert a big integer to a C string;  
+- `bigint_result_t bigint_clone(number)`:  clone a big integer;  
+- `bigint_result_t bigint_compare(x, y)`: compare two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively;  
+- `bigint_result_t bigint_add(x, y)`: add two big integers together in $\mathcal{O}(n)$;  
+- `bigint_result_t bigint_sub(x, y)`: subtract two big integers in $\mathcal{O}(n)$;  
+- `bigint_result_t bigint_prod(x, y)`: multiply two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$;  
+- `bigint_result_t bigint_divmod(x, y)`: divide two big integers using *long division* algorithm in $\mathcal{O}(n^2)$, returning both the quotient and the remainder;  
+- `bigint_result_t bigint_mod(x, y)`: computes modulo of two big integers using *long division* algorithm in $\mathcal{O}(n^2)$;  
+- `bigint_result_t bigint_destroy(number)`: delete the big number;  
+- `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters.
+
+As you can see by the previous function signatures, methods that operate on the
+`BigInt` data type return a custom type called `bigint_result_t` which is defined as
+follows:
+
+```c
+typedef enum {
+    BIGINT_OK = 0x0,
+    BIGINT_ERR_ALLOCATE,
+    BIGINT_ERR_DIV_BY_ZERO,
+    BIGINT_ERR_INVALID
+} bigint_status_t;
+
+typedef struct {
+    bigint_t *quotient;
+    bigint_t *remainder;
+} div_result_t;
+
+typedef struct {
+    bigint_status_t status;
+    uint8_t message[RESULT_MSG_SIZE];
+    union {
+        bigint_t *number;
+        div_result_t division;
+        int8_t compare_status;
+        char *string_num;
+    } value;
+} bigint_result_t;
+```
+
+Each method that returns such type indicates whether the operation was successful or not
+by setting the `status` field and by providing a descriptive message on the `message`
+field. If the operation was successful (that is, `status == BIGINT_OK`), you can either
+move on with the rest of the program or read the returned value from the sum data type.
+Of course, you can choose to ignore the return value (if you're brave enough :D) as 
+illustrated in the first part of the README.
+
+The sum data type (i.e., the `value` union) defines four different variables. Each
+of them has an unique scope as described below:
+
+- `number`: result of arithmetical, cloning and creating functions;  
+- `division`: result of `bigint_divmod`;  
+- `compare_status`: result of `bigint_compare`;  
+- `string_num`: result of `bigint_to_string`.
+
--- a/docs/map.md
+++ b/docs/map.md
@@ -71,5 +71,5 @@ typedef struct {
 Each method that returns such type indicates whether the operation was successful or not by setting
 the `status` field and by providing a descriptive message on the `message` field. If the operation was
 successful (that is, `status == MAP_OK`), you can either move on with the rest of the program or read
-the returned value from the sum data type. Of course, you can choose to ignore the return value (if you're brave enough :D), as illustrated
+the returned value from the sum data type. Of course, you can choose to ignore the return value (if you're brave enough :D) as illustrated
 in the first part of the README.
--- a/docs/sort.md
+++ b/docs/sort.md
@@ -42,7 +42,7 @@ vector_order_t cmp_int_desc(const void *x, const void *y) {
 }

 /*
- * Compile with: gcc main.c src/vector.h
+ * Compile with: gcc main.c src/vector.c
 * Output: Before sorting: -8 20 -10 125 34 9 
 *         After sorting (ascending order): -10 -8 9 20 34 125 
 *         After sorting (descending order): 125 34 20 9 -8 -10 
@@ -55,9 +55,11 @@ int main(void) {
        vector_push(v, &values[idx]);
    }

+    const size_t sz = vector_size(v);
+
    // Print unsorted array
    printf("Before sorting: ");
-    for (size_t idx = 0; idx < vector_size(v); idx++) {
+    for (size_t idx = 0; idx < sz; idx++) {
        printf("%d ", *(int*)vector_get(v, idx).value.element);
    }

@@ -66,7 +68,7 @@ int main(void) {

    // Print sorted array
    printf("\nAfter sorting (ascending order): ");
-    for (size_t idx = 0; idx < vector_size(v); idx++) {
+    for (size_t idx = 0; idx < sz; idx++) {
        printf("%d ", *(int*)vector_get(v, idx).value.element);
    }

@@ -75,7 +77,7 @@ int main(void) {

    // Print sorted array
    printf("\nAfter sorting (descending order): ");
-    for (size_t idx = 0; idx < vector_size(v); idx++) {
+    for (size_t idx = 0; idx < sz; idx++) {
        printf("%d ", *(int*)vector_get(v, idx).value.element);
    }

@@ -124,7 +126,7 @@ vector_order_t cmp_person_by_name(const void *x, const void *y) {
 }

 /*
- * Compile with: gcc main.c src/vector.h
+ * Compile with: gcc main.c src/vector.c
 * Output: Sort by age:
 *         Name: Marco, Age: 25
 *         Name: Alice, Age: 28
@@ -149,9 +151,11 @@ int main(void) {
    // Sort array by age
    vector_sort(employees, cmp_person_by_age);

+    const size_t sz = vector_size(employees);
+
    // Print sorted array
    printf("Sort by age:\n");
-    for (size_t idx = 0; idx < vector_size(employees); idx++) {
+    for (size_t idx = 0; idx < sz; idx++) {
        Employee *p = (Employee*)vector_get(employees, idx).value.element;
        printf("Name: %s, Age: %d\n", p->name, p->age);
    }
@@ -161,7 +165,7 @@ int main(void) {
    
    // Print sorted array
    printf("\nSort by name:\n");
-    for (size_t idx = 0; idx < vector_size(employees); idx++) {
+    for (size_t idx = 0; idx < sz; idx++) {
        Employee *p = (Employee*)vector_get(employees, idx).value.element;
        printf("Name: %s, Age: %d\n", p->name, p->age);
    }
--- a/docs/vector.md
+++ b/docs/vector.md
@@ -64,7 +64,7 @@ Each method that returns such type indicates whether the operation was successfu
 by setting the `status` field and by providing a descriptive message on the `message`
 field. If the operation was successful (that is, `status == VECTOR_OK`), you can either
 move on with the rest of the program or read the returned value from the sum data type. Of course, you can choose to 
-ignore the return value (if you're brave enough :D), as illustrated in the first part of the README.
+ignore the return value (if you're brave enough :D) as illustrated in the first part of the README.

 The documentation for the `vector_sort(map, cmp)` method can be found
 in [the following document](/docs/sort.md). 
--- a/src/bigint.c
+++ b/src/bigint.c
@@ -315,7 +315,8 @@ bigint_result_t bigint_clone(const bigint_t *number) {
    cloned->is_negative = number->is_negative;

    // Copy digits
-    for (size_t idx = 0; idx < vector_size(number->digits); idx++) {
+    const size_t sz = vector_size(number->digits);
+    for (size_t idx = 0; idx < sz; idx++) {
        vector_result_t get_res = vector_get(number->digits, idx);
        if (get_res.status != VECTOR_OK) {
            vector_destroy(cloned->digits);
@@ -546,10 +547,12 @@ bigint_result_t bigint_add_abs(const bigint_t *x, const bigint_t *y) {
    long long carry = 0;
    size_t idx = 0;

-    while (idx < vector_size(x->digits) || idx < vector_size(y->digits) || carry) {
+    const size_t x_size = vector_size(x->digits);
+    const size_t y_size = vector_size(y->digits);
+    while (idx < x_size || idx < y_size || carry) {
        long long partial_sum = carry;

-        if (idx < vector_size(x->digits)) {
+        if (idx < x_size) {
            vector_result_t get_res = vector_get(x->digits, idx);
            if (get_res.status != VECTOR_OK) {
                vector_destroy(sum->digits);
@@ -564,7 +567,7 @@ bigint_result_t bigint_add_abs(const bigint_t *x, const bigint_t *y) {
            partial_sum += *x_digit;
        }

-        if (idx < vector_size(y->digits)) {
+        if (idx < y_size) {
            vector_result_t get_res = vector_get(y->digits, idx);
            if (get_res.status != VECTOR_OK) {
                vector_destroy(sum->digits);
@@ -643,7 +646,9 @@ bigint_result_t bigint_sub_abs(const bigint_t *x, const bigint_t *y) {

    long long borrow = 0;

-    for (size_t idx = 0; idx < vector_size(x->digits); idx++) {
+    const size_t x_size = vector_size(x->digits);
+    const size_t y_size = vector_size(y->digits);
+    for (size_t idx = 0; idx < x_size; idx++) {
        vector_result_t x_get_res = vector_get(x->digits, idx);
        if (x_get_res.status != VECTOR_OK) {
            vector_destroy(difference->digits);
@@ -657,7 +662,7 @@ bigint_result_t bigint_sub_abs(const bigint_t *x, const bigint_t *y) {
        int *x_digit = (int*)x_get_res.value.element;
        long long partial_difference = *x_digit - borrow;

-        if (idx < vector_size(y->digits)) {
+        if (idx < y_size) {
            vector_result_t y_get_res = vector_get(y->digits, idx);
            if (y_get_res.status != VECTOR_OK) {
                vector_destroy(difference->digits);
@@ -1074,7 +1079,8 @@ bigint_result_t bigint_shift_left(const bigint_t *num, size_t n) {
    }

    // Copy back original digits
-    for (size_t idx = 0; idx < vector_size(num->digits); idx++) {
+    const size_t num_size = vector_size(num->digits);
+    for (size_t idx = 0; idx < num_size; idx++) {
        vector_result_t get_res = vector_get(num->digits, idx);
        if (get_res.status != VECTOR_OK) {
            vector_destroy(shifted->digits);
--- a/src/bigint.h
+++ b/src/bigint.h
@@ -3,7 +3,7 @@

 #define RESULT_MSG_SIZE 64

-// Big numbers numerical base (10^9)
+// Numerical base (10^9)
 #define BIGINT_BASE 1000000000
 // Each digit stores values from 0 to 999,999,999
 #define BIGINT_BASE_DIGITS 9
--- a/tests/test_vector.c
+++ b/tests/test_vector.c
@@ -134,7 +134,9 @@ void test_vector_sort_int_asc(void) {
    assert(sort_res.status == VECTOR_OK);

    const int expected[] = { -7, 1, 4, 6, 12, 25, 25, 71 };
-    for (size_t idx = 0; idx < vector_size(v); idx++) {
+
+    const size_t sz = vector_size(v);
+    for (size_t idx = 0; idx < sz; idx++) {
        int *val = (int*)vector_get(v, idx).value.element;
        assert(*val == expected[idx]);
    }
@@ -157,7 +159,9 @@ void test_vector_sort_int_desc(void) {
    assert(sort_res.status == VECTOR_OK);

    const int expected[] = { 71, 25, 25, 12, 6, 4, 1, -7 };
-    for (size_t idx = 0; idx < vector_size(v); idx++) {
+
+    const size_t sz = vector_size(v);
+    for (size_t idx = 0; idx < sz; idx++) {
        int *val = (int*)vector_get(v, idx).value.element;
        assert(*val == expected[idx]);
    }
@@ -198,7 +202,9 @@ void test_vector_sort_string(void) {
    assert(sort_res.status == VECTOR_OK);

    const char *expected[] = { "world!", "system-programming", "hello", "foo", "embedded", "bar"};
-    for (size_t idx = 0; idx < vector_size(v); idx++) {
+
+    const size_t sz = vector_size(v);
+    for (size_t idx = 0; idx < sz; idx++) {
        const char *val = *(const char**)vector_get(v, idx).value.element;
        assert(!strcmp(val, expected[idx]));
    }
@@ -208,7 +214,7 @@ void test_vector_sort_string(void) {

 // Sort vector with custom data type
 typedef struct {
-    const char *name;
+    char name[256];
    int age;
 } Person;

@@ -257,7 +263,8 @@ void test_vector_sort_struct_by_age(void) {
        { .name = "Bob", .age = 45 }
    };

-    for (size_t idx = 0; idx < vector_size(people); idx++) {
+    const size_t sz = sizeof(expected) / sizeof(expected[0]);
+    for (size_t idx = 0; idx < sz; idx++) {
        Person *p = (Person*)vector_get(people, idx).value.element;
        assert(!strcmp(p->name, expected[idx].name));
        assert(p->age == expected[idx].age);
@@ -295,7 +302,8 @@ void test_vector_sort_struct_by_name(void) {
        { .name = "Sophia", .age = 45 }
    };

-    for (size_t idx = 0; idx < vector_size(people); idx++) {
+    const size_t sz = vector_size(people);
+    for (size_t idx = 0; idx < sz; idx++) {
        Person *p = (Person*)vector_get(people, idx).value.element;
        assert(!strcmp(p->name, expected[idx].name));
        assert(p->age == expected[idx].age);
--- a/usage.c
+++ b/usage.c
@@ -88,7 +88,8 @@ int vector_usage(void) {
    printf("Vector capacity (should be > 5): %zu\n\n", vector_capacity(vector));

    // Print the whole vector
-    for (size_t idx = 0; idx < vector_size(vector); idx++) {
+    size_t sz = vector_size(vector);
+    for (size_t idx = 0; idx < sz; idx++) {
        vector_result_t get_res = vector_get(vector, idx);
        if (get_res.status != VECTOR_OK) {
            printf("Cannot retrieve vec[%zu]: %s\n", idx, get_res.message);
@@ -136,7 +137,9 @@ int vector_usage(void) {
    }

    printf("Added new elements. Before sort: ");
-    for (size_t idx = 0; idx < vector_size(vector); idx++) {
+
+    sz = vector_size(vector);
+    for (size_t idx = 0; idx < sz; idx++) {
        vector_result_t sort_get_res = vector_get(vector, idx);
        if (sort_get_res.status != VECTOR_OK) {
            printf("Cannot retrieve vec[%zu]: %s\n", idx, sort_get_res.message);
@@ -157,7 +160,7 @@ int vector_usage(void) {
    }

    printf("After sort in ascending order: ");
-    for (size_t idx = 0; idx < vector_size(vector); idx++) {
+    for (size_t idx = 0; idx < sz; idx++) {
        vector_result_t sort_get_res = vector_get(vector, idx);
        if (sort_get_res.status != VECTOR_OK) {
            printf("Cannot retrieve vec[%zu]: %s\n", idx, sort_get_res.message);
@@ -179,7 +182,7 @@ int vector_usage(void) {
    }

    printf("After sort in descending order: ");
-    for (size_t idx = 0; idx < vector_size(vector); idx++) {
+    for (size_t idx = 0; idx < sz; idx++) {
        vector_result_t sort_get_res = vector_get(vector, idx);
        if (sort_get_res.status != VECTOR_OK) {
            printf("Cannot retrieve vec[%zu]: %s\n", idx, sort_get_res.message);