From a02f2dff40153f239ae663da3d4a61fbb61cfbe9 Mon Sep 17 00:00:00 2001
From: Marco Cetica <email@marcocetica.com>
Date: Wed, 25 Feb 2026 17:13:49 +0100
Subject: [PATCH 1/3] Added Knuth's "Algorithm D" from TAOCP "Seminumerical
 algorithms"

---
 Makefile              |   2 +-
 README.md             |  10 +-
 benchmark/benchmark.c |  99 ++++++++++--
 docs/bigint.md        |  32 ++--
 docs/map.md           |  12 +-
 docs/vector.md        |  26 +--
 src/bigint.c          | 367 ++++++++++++++++++++++++++++--------------
 tests/test_bigint.c   |  41 +++--
 usage.c               |   2 +-
 9 files changed, 403 insertions(+), 188 deletions(-)

diff --git a/Makefile b/Makefile
index 01e5c2d..f145c58 100644
--- a/Makefile
+++ b/Makefile
@@ -52,7 +52,7 @@ $(OBJ_DIR):
 	mkdir -p $(OBJ_DIR)
 
 # Benchmark rules
-$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o
+$(BENCH_TARGET): $(BENCH_OBJ_DIR)/bench.o $(BENCH_OBJ_DIR)/vector.o $(BENCH_OBJ_DIR)/map.o $(BENCH_OBJ_DIR)/bigint.o
 	$(CC) $(BENCH_FLAGS) -o $@ $^
 
 $(BENCH_OBJ_DIR)/%.o: $(SRC_DIR)/%.c | $(BENCH_OBJ_DIR)
diff --git a/README.md b/README.md
index f8d9476..9ca1a68 100644
--- a/README.md
+++ b/README.md
@@ -145,9 +145,7 @@ This will compile the library as well as the `usage.c` file, the unit tests and
 
 > [!NOTE]
 > This project is primarily developed for learning purposes and was not created with industrial
-> or production use in mind. As such, it is not intended to compete with any existing C library.
-> In particular, the big number implementation does not aim to match the design, the maturity and
-> the performance of established solutions such as the
+> or production use in mind. As such, it is not intended to compete with any existing C library such as the
 > GNU Multiple Precision Arithmetic Library (GMP).
 
 ## Documentation
@@ -167,9 +165,11 @@ $ ./test_bigint
 Under the [`benchmark/`](/benchmark/) folder, you can find a simple benchmark program that stress the `Vector` and the `Map` data structures. You can run it by issuing the following command:
 
 ```sh
+$ make clean all CC=clang
 $ ./benchmark_datum
-Computing Vector average time...average time: 18 ms
-Computing Map average time...average time: 31 ms
+omputing Vector average time...average time: 8 ms
+Computing Map average time...average time: 53 ms
+Computing BigInt average time...average time: 76 ms
 ```
 
 
diff --git a/benchmark/benchmark.c b/benchmark/benchmark.c
index 913f4cd..f083ca1 100644
--- a/benchmark/benchmark.c
+++ b/benchmark/benchmark.c
@@ -6,6 +6,7 @@
 
 #include "../src/vector.h"
 #include "../src/map.h"
+#include "../src/bigint.h"
 
 typedef void (*test_fn_t)(size_t iterations);
 
@@ -22,11 +23,6 @@ void test_vector(size_t iterations) {
         sum += *val;
     }
 
-    // Another trick to prevent compiler optimization
-    if (sum == 0xB00B5) {
-        printf("sum = %llu\n", (unsigned long long)sum);
-    }
-
     vector_destroy(vec);
 }
 
@@ -53,32 +49,99 @@ void test_map(size_t iterations) {
 
     // Cleanup values
     for (size_t idx = 0; idx < map->capacity; idx++) {
-        if (map->elements[idx].state == ENTRY_OCCUPIED) {
-            int *val = (int*)map->elements[idx].value;
-            free(val);
-        }
+        snprintf(key, sizeof(key), "key_%zu", idx);
+
+        int *val = (int *)map_get(map, key).value.element;
+        free(val);
+
+        map_remove(map, key);
     }
 
     map_destroy(map);
 }
 
+void test_bigint(size_t iterations) {
+    volatile uint64_t accumulator = 0;
+
+    for (size_t idx = 1; idx <= iterations; idx++) {
+        long long a_val = (long long)idx * 123456789LL;
+        long long b_val = (long long)idx * 17777LL;
+
+        bigint_result_t a_res = bigint_from_int(a_val);
+        bigint_result_t b_res = bigint_from_int(b_val);
+
+        if (a_res.status != BIGINT_OK || b_res.status != BIGINT_OK) {
+            bigint_destroy(a_res.value.number);
+            bigint_destroy(b_res.value.number);
+            continue;
+        }
+
+        bigint_t *a = a_res.value.number;
+        bigint_t *b = b_res.value.number;
+
+        // Addition
+        bigint_result_t add_res = bigint_add(a, b);
+        if (add_res.status == BIGINT_OK) {
+            vector_result_t v = vector_get(add_res.value.number->digits, 0);
+            if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
+            bigint_destroy(add_res.value.number);
+        }
+
+        // Substraction
+        bigint_result_t sub_res = bigint_sub(a, b);
+        if (sub_res.status == BIGINT_OK) {
+            vector_result_t v = vector_get(sub_res.value.number->digits, 0);
+            if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
+            bigint_destroy(sub_res.value.number);
+        }
+
+        // Multiplication
+        bigint_result_t mul_res = bigint_prod(a, b);
+        if (mul_res.status == BIGINT_OK) {
+            vector_result_t v = vector_get(mul_res.value.number->digits, 0);
+            if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
+            bigint_destroy(mul_res.value.number);
+        }
+
+        // Division
+        bigint_result_t div_res = bigint_divmod(a, b);
+        if (div_res.status == BIGINT_OK) {
+            vector_result_t v = vector_get(div_res.value.division.quotient->digits, 0);
+            if (v.status == VECTOR_OK) { accumulator += *(int *)v.value.element; }
+            bigint_destroy(div_res.value.division.quotient);
+            bigint_destroy(div_res.value.division.remainder);
+        }
+
+        bigint_destroy(a); bigint_destroy(b);
+    }
+}
+
+static inline uint64_t now_ns(void) {
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+
+    return (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
 long long benchmark(test_fn_t fun, size_t iterations, size_t runs) {
     long long total = 0;
-    for (size_t idx = 0; idx < runs; idx++) {
-        clock_t start = clock();
-        fun(iterations);
-        clock_t end = clock();
 
-        total += (long long)((end - start) * 1000 / CLOCKS_PER_SEC);
+    for (size_t idx = 0; idx < runs; idx++) {
+        uint64_t start = now_ns();
+        fun(iterations);
+        uint64_t end = now_ns();
+
+        total += (end - start);
     }
 
-    return total / runs;
+    return (long long)(total / runs / 1000000);
 }
 
 int main(void) {
     // Do a warmup run
     test_vector(1000);
     test_map(1000);
+    test_bigint(1000);
 
     printf("Computing Vector average time...");
     fflush(stdout);
@@ -88,5 +151,9 @@ int main(void) {
     fflush(stdout);
     printf("average time: %lld ms\n", benchmark(test_map, 1e5, 30));
 
+    printf("Computing BigInt average time...");
+    fflush(stdout);
+    printf("average time: %lld ms\n", benchmark(test_bigint, 1e5, 30));
+
     return 0;
-}
\ No newline at end of file
+}
diff --git a/docs/bigint.md b/docs/bigint.md
index 38819a7..bd488e4 100644
--- a/docs/bigint.md
+++ b/docs/bigint.md
@@ -33,17 +33,18 @@ and the boolean `is_negative` variable denotes its sign.
 
 The `BigInt` data structure supports the following methods:
 
-- `bigint_result_t bigint_from_int(value)`: create a big integer from a primitive `int` type;  
-- `bigint_result_t bigint_from_string(string_num)`: create a big integer from a C string;  
-- `bigint_result_t bigint_to_string(number)`: convert a big integer to a C string;  
-- `bigint_result_t bigint_clone(number)`:  clone a big integer;  
-- `bigint_result_t bigint_compare(x, y)`: compare two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively;  
-- `bigint_result_t bigint_add(x, y)`: add two big integers together in $\mathcal{O}(n)$;  
-- `bigint_result_t bigint_sub(x, y)`: subtract two big integers in $\mathcal{O}(n)$;  
-- `bigint_result_t bigint_prod(x, y)`: multiply two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$;  
-- `bigint_result_t bigint_divmod(x, y)`: divide two big integers using *long division* algorithm in $\mathcal{O}(n^2)$, returning both the quotient and the remainder;  
-- `bigint_result_t bigint_mod(x, y)`: computes modulo of two big integers using *long division* algorithm in $\mathcal{O}(n^2)$;  
-- `bigint_result_t bigint_destroy(number)`: delete the big number;  
+- `bigint_result_t bigint_from_int(value)`: creates a big integer from a primitive `int` type;  
+- `bigint_result_t bigint_from_string(string_num)`: creates a big integer from a C string;  
+- `bigint_result_t bigint_to_string(number)`: converts a big integer to a C string;  
+- `bigint_result_t bigint_clone(number)`:  clones a big integer;  
+- `bigint_result_t bigint_compare(x, y)`: compares two big integers, returning either `-1`, `0` or `1` if the first is less than, equal than or greater than the second, respectively;  
+- `bigint_result_t bigint_add(x, y)`: adds two big integers together in $\mathcal{O}(n)$;  
+- `bigint_result_t bigint_sub(x, y)`: subtracts two big integers in $\mathcal{O}(n)$;  
+- `bigint_result_t bigint_prod(x, y)`: multiplies two big integers using Karatsuba's algorithm in $\mathcal{O}(n^{1.585})$;  
+- `bigint_result_t bigint_divmod(x, y)`: divides two big integers using _Knuth's Algorithm D_ in $\mathcal{O}(n \times m)$ where $n$ and $m$ are the number of base-10^9 
+parts/limbs in the divisor and the quotient, respectively. This method returns both the quotient and the remainder;  
+- `bigint_result_t bigint_mod(x, y)`: calls `bigint_divmod`, discards the quotient and yields the remainder;  
+- `bigint_result_t bigint_destroy(number)`: deletes the big number;  
 - `bigint_result_t bigint_printf(format, ...)`: `printf` wrapper that introduces the `%B` placeholder to print big numbers. It supports variadic parameters.
 
 As you can see from the previous function signatures, methods that operate on the
@@ -90,12 +91,3 @@ of them has an unique scope as described below:
 - `compare_status`: result of `bigint_compare`;  
 - `string_num`: result of `bigint_to_string`.
 
-
-> [!IMPORTANT]
-> Currently, the division implementation employs a quadratic-time algorithm derived from the conventional _"grade school"_ long-division method.
-> This approach performs adequately for integers of modest size (up to approximately 200 digits) but becomes highly inefficient when handling
-> substantially larger integers (~1500 digits).
->
-> Improving the efficiency of this algorithm would require further research into advanced
-> numerical algorithms, which is something that I currently not inclined to pursue.
-
diff --git a/docs/map.md b/docs/map.md
index a0c3eeb..c30ac33 100644
--- a/docs/map.md
+++ b/docs/map.md
@@ -37,12 +37,12 @@ free them before removing the keys or destroying the map.
 
 The `Map` data structure supports the following methods:
 
-- `map_result_t map_new()`: initialize a new map;  
-- `map_result_t map_add(map, key, value)`: add a `(key, value)` pair to the map;  
-- `map_result_t map_get(map, key)`: retrieve a values indexed by `key` if it exists;  
-- `map_result_t map_remove(map, key)`: remove a key from the map if it exists;  
-- `map_result_t map_clear(map)`: reset the map state;  
-- `map_result_t map_destroy(map)`: delete the map;  
+- `map_result_t map_new()`: initializes a new map;  
+- `map_result_t map_add(map, key, value)`: adds a `(key, value)` pair to the map;  
+- `map_result_t map_get(map, key)`: retrieves a values indexed by `key` if it exists;  
+- `map_result_t map_remove(map, key)`: removes a key from the map if it exists;  
+- `map_result_t map_clear(map)`: resets the map state;  
+- `map_result_t map_destroy(map)`: deletes the map;  
 - `size_t map_size(map)`: returns map size (i.e., the number of elements);  
 - `size_t map_capacity(map)`: returns map capacity (i.e., map total size).
 
diff --git a/docs/vector.md b/docs/vector.md
index d10a196..6ca7aeb 100644
--- a/docs/vector.md
+++ b/docs/vector.md
@@ -25,19 +25,19 @@ deletion.
 
 At the time being, `Vector` supports the following methods:
 
-- `vector_result_t vector_new(size, data_size)`: create a new vector;  
-- `vector_result_t vector_push(vector, value)`: add a new value to the vector;  
-- `vector_result_t vector_set(vector, index, value)`: update the value of a given index if it exists;  
-- `vector_result_t vector_get(vector, index)`: return the value indexed by `index` if it exists;  
-- `vector_result_t vector_sort(vector, cmp)`: sort vector using `cmp` function;  
-- `vector_result_t vector_pop(vector)`: pop last element from the vector following the LIFO policy;  
-- `vector_result_t vector_map(vector, callback, env)`: apply `callback` function to vector (in-place);  
-- `vector_result_t vector_filter(vector, callback, env)`: filter vector using `callback` (in-place);  
-- `vector_result_t vector_reduce(vector, accumulator, callback, env)`: fold/reduce vector using `callback`;  
-- `vector_result_t vector_clear(vector)`: logically reset the vector. That is, new pushes will overwrite the memory;  
-- `vector_result_t vector_destroy(vector)`: delete the vector;  
-- `size_t vector_size(vector)`: return vector size (i.e., the number of elements);  
-- `size_t vector_capacity(vector)`: return vector capacity (i.e., vector total size).
+- `vector_result_t vector_new(size, data_size)`: creates a new vector;  
+- `vector_result_t vector_push(vector, value)`: adds a new value to the vector;  
+- `vector_result_t vector_set(vector, index, value)`: updates the value of a given index if it exists;  
+- `vector_result_t vector_get(vector, index)`: returns the value indexed by `index` if it exists;  
+- `vector_result_t vector_sort(vector, cmp)`: sorts vector using `cmp` function;  
+- `vector_result_t vector_pop(vector)`: pops last element from the vector following the LIFO policy;  
+- `vector_result_t vector_map(vector, callback, env)`: applies `callback` function to vector (in-place);  
+- `vector_result_t vector_filter(vector, callback, env)`: filters vector using `callback` (in-place);  
+- `vector_result_t vector_reduce(vector, accumulator, callback, env)`: folds/reduces vector using `callback`;  
+- `vector_result_t vector_clear(vector)`: resets the vector logically. That is, new pushes will overwrite the memory;  
+- `vector_result_t vector_destroy(vector)`: deletes the vector;  
+- `size_t vector_size(vector)`: returns vector size (i.e., the number of elements);  
+- `size_t vector_capacity(vector)`: returns vector capacity (i.e., vector total size).
 
 As you can see from the previous function signatures, most methods that operate
 on the `Vector` data type return a custom type called `vector_result_t` which is
diff --git a/src/bigint.c b/src/bigint.c
index dd73dcb..2e4b7d3 100644
--- a/src/bigint.c
+++ b/src/bigint.c
@@ -9,6 +9,10 @@
         (result).message[RESULT_MSG_SIZE - 1] = '\0'; \
     } while (0)
 
+#define REMOVE(ptr) \
+    free(ptr); \
+    ptr = NULL
+
 #define IS_DIGIT(c) ((c) >= '0') && ((c) <= '9')
 
 #include <stdio.h>
@@ -842,30 +846,32 @@ cleanup: // Destroy intermediate allocations on error
 }
 
 /**
- * bigint_dev
- *  @x: a valid non-null big integer (dividend)
- *  @y: a valid non-null big integer (divisor)
- *  
- *  Computes division using long division algorithm in O(n^2)
- * 
- *  Returns a bigint_result_t data type
+ * bigint_div
+ *  @x: a non-null big integer acting as a dividend
+ *  @y: a non-null big integer acting as a divisor
+ *
+ *  Computers the quotient floor (i.e., |X| / |Y|) using Knuth's Algorithm D
+ *  Adaoted from p. 273 of Don Knuth's TAoCP Vol. 2
+ *  The complexity is O(n * m) where 'n' and 'm' are the number of base-10^9
+ *  "parts" (the limbs in the code below) in the divisor and the quotient, respectively.
+ *
+ *  Returns a bigint_result_t containing the quotient.
+ *  The called of this function will be responsible for applying the sign.
  */
 static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
     bigint_result_t result = {0};
     bigint_result_t tmp_res = {0};
 
     bigint_t *quotient = NULL;
-    bigint_t *remainder = NULL;
-    bigint_t *abs_y = NULL;
+    long long *u = NULL, *v = NULL, *q = NULL;
 
     if (x == NULL || y == NULL) {
         result.status = BIGINT_ERR_INVALID;
-        SET_MSG(result, "Invalid big numbers");
+        SET_MSG(result, "Invalid big integers");
 
         return result;
     }
 
-    // Check for division by zero
     const size_t y_size = vector_size(y->digits);
     if (y_size == 0) {
         result.status = BIGINT_ERR_DIV_BY_ZERO;
@@ -875,16 +881,16 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
     }
 
     if (y_size == 1) {
-        vector_result_t y_val_res = vector_get(y->digits, 0);
-        if (y_val_res.status != VECTOR_OK) {
+        vector_result_t y0_res = vector_get(y->digits, 0);
+        if (y0_res.status != VECTOR_OK) {
             result.status = BIGINT_ERR_INVALID;
-            COPY_MSG(result, y_val_res.message);
+            COPY_MSG(result, y0_res.message);
 
             return result;
         }
 
-        int *y_val = (int*)y_val_res.value.element;
-        if (*y_val == 0) {
+        int *y0 = (int *)y0_res.value.element;
+        if (*y0 == 0) {
             result.status = BIGINT_ERR_DIV_BY_ZERO;
             SET_MSG(result, "Cannot divide by zero");
 
@@ -892,94 +898,230 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
         }
     }
 
-    // If |x| < |y| then result is zero
     tmp_res = bigint_compare_abs(x, y);
-    if (tmp_res.status != BIGINT_OK) { return tmp_res; }
+    if (tmp_res.status != BIGINT_OK) {
+        return tmp_res;
+    }
 
     if (tmp_res.value.compare_status < 0) {
-        tmp_res = bigint_from_int(0);
-        if (tmp_res.status != BIGINT_OK) { return tmp_res; }
+        return bigint_from_int(0);
+    }
 
-        result.value.number = tmp_res.value.number;
+    const size_t x_size = vector_size(x->digits);
+    const size_t n = y_size;
+    const long long BASE = (long long)BIGINT_BASE;
+
+    quotient = malloc(sizeof(bigint_t));
+    if (quotient == NULL) {
+        result.status = BIGINT_ERR_ALLOCATE;
+        SET_MSG(result, "Cannot allocate memory for big integer");
+
+        goto cleanup;
+    }
+
+    quotient->digits = NULL;
+    quotient->is_negative = false;
+
+    // Single-limb divisor case. Here, we scan using 64-bit arithmetic in O(n)
+    if (y_size == 1) {
+        vector_result_t y0_res = vector_get(y->digits, 0);
+        if (y0_res.status != VECTOR_OK) {
+            result.status = BIGINT_ERR_INVALID;
+            COPY_MSG(result, y0_res.message);
+
+            goto cleanup;
+        }
+
+        long long divisor = *(int *)y0_res.value.element;
+
+        vector_result_t vec_res = vector_new(x_size, sizeof(int));
+        if (vec_res.status != VECTOR_OK) {
+            result.status = BIGINT_ERR_ALLOCATE;
+            COPY_MSG(result, vec_res.message);
+
+            goto cleanup;
+        }
+
+        quotient->digits = vec_res.value.vector;
+
+        long long remainder = 0;
+        for (int idx = (int)x_size - 1; idx >= 0; idx--) {
+            vector_result_t xidx_res = vector_get(x->digits, idx);
+            if (xidx_res.status != VECTOR_OK) {
+                result.status = BIGINT_ERR_INVALID;
+                COPY_MSG(result, xidx_res.message);
+
+                goto cleanup;
+            }
+
+            long long current = remainder * BASE + *(int *)xidx_res.value.element;
+            int q_idx = (int)(current / divisor);
+            remainder = current % divisor;
+
+            vector_result_t push_res = vector_push(quotient->digits, &q_idx);
+            if (push_res.status != VECTOR_OK) {
+                result.status = BIGINT_ERR_INVALID;
+                COPY_MSG(result, push_res.message);
+
+                goto cleanup;
+            }
+        }
+
+        // Restore the LSB-first order
+        const size_t q_size = vector_size(quotient->digits);
+        for (size_t lo = 0, hi = q_size - 1; lo < hi; hi--) {
+            vector_result_t lr = vector_get(quotient->digits, lo);
+            vector_result_t hr = vector_get(quotient->digits, hi);
+
+            if (lr.status != VECTOR_OK || hr.status != VECTOR_OK) {
+                result.status = BIGINT_ERR_INVALID;
+                SET_MSG(result, "Failed to reverse quotient digits");
+
+                goto cleanup;
+            }
+
+            int lower_val = *(int *)lr.value.element;
+            int higher_val = *(int *)hr.value.element;
+            vector_set(quotient->digits, lo, &higher_val);
+            vector_set(quotient->digits, hi, &lower_val);
+        }
+
+        bigint_result_t trim_res = bigint_trim_zeros(quotient);
+        if (trim_res.status != BIGINT_OK) { result = trim_res; goto cleanup; }
+
+        result.value.number = quotient;
         result.status = BIGINT_OK;
         SET_MSG(result, "Division between big integers was successful");
 
         return result;
     }
 
-    // Initialize quotient and remainder
-    tmp_res = bigint_from_int(0);
-    if (tmp_res.status != BIGINT_OK) { return tmp_res; }
-    quotient = tmp_res.value.number;
+    /* General case using Knuth's Algorithm
+     * First, some definitions:
+     * index 0 -> least significant limb;
+     * n -> limb count of divisor y
+     * m -> limb count of quotient (x_size - n)
+     * u[0 ... m + n] -> working copy of the (scaled) dividend +1 sentinel limb
+     * v[0 ... n - 1] -> working copy of the (scaled) divisor
+     * q[0 ... m] -> output quotient limbs
+     */
+    const size_t m = x_size - n;
 
-    tmp_res = bigint_from_int(0);
-    if (tmp_res.status != BIGINT_OK) { bigint_destroy(quotient); return tmp_res; }
-    remainder = tmp_res.value.number;
+    u = calloc(m + n + 1, sizeof(long long));
+    v = calloc(n, sizeof(long long));
+    q = calloc(m + 1, sizeof(long long));
 
-    // Create absolute value of y for later comparisons
-    tmp_res = bigint_clone(y);
-    if (tmp_res.status != BIGINT_OK) {
-        bigint_destroy(quotient);
-        bigint_destroy(remainder);
+    if (u == NULL || v == NULL || q == NULL) {
+        result.status = BIGINT_ERR_ALLOCATE;
+        SET_MSG(result, "Cannot allocate scratch arrays for division");
 
-        return tmp_res;
+        goto cleanup;
     }
 
-    abs_y = tmp_res.value.number;
-    abs_y->is_negative = false;
-
-    // Long division algorithm applied from MSB to LSB
-    const size_t x_size = vector_size(x->digits);
-    for (int idx = (int)x_size - 1; idx >= 0; idx--) {
-        // Shift remainder left by one base digit (multiplication by BASE)
-        tmp_res = bigint_shift_left(remainder, 1);
-        if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
-
-        bigint_t *shifted_remainder = tmp_res.value.number;
-        bigint_destroy(remainder);
-        remainder = shifted_remainder;
-
-        // Add current digit of 'x' to the least significant position of remainder
-        vector_result_t digit_res = vector_get(x->digits, idx);
-        if (digit_res.status != VECTOR_OK) {
+    for (size_t idx = 0; idx < x_size; idx++) {
+        vector_result_t get_res = vector_get(x->digits, idx);
+        if (get_res.status != VECTOR_OK) {
             result.status = BIGINT_ERR_INVALID;
-            COPY_MSG(result, digit_res.message);
+            COPY_MSG(result, get_res.message);
 
             goto cleanup;
         }
 
-        int *x_digit = (int*)digit_res.value.element;
+        u[idx] = *(int *)get_res.value.element;
+    }
 
-        vector_result_t set_res = vector_set(remainder->digits, 0, x_digit);
-        if (set_res.status != VECTOR_OK) {
+    for (size_t idx = 0; idx < n; idx++) {
+        vector_result_t get_res = vector_get(y->digits, idx);
+        if (get_res.status != VECTOR_OK) {
             result.status = BIGINT_ERR_INVALID;
-            COPY_MSG(result, set_res.message);
+            COPY_MSG(result, get_res.message);
 
             goto cleanup;
         }
 
-        tmp_res = bigint_trim_zeros(remainder);
-        if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
+        v[idx] = *(int *)get_res.value.element;
+    }
 
-        // COunt how many times 'y' fits into current remainder
-        size_t count = 0;
-        while (1) {
-            tmp_res = bigint_compare_abs(remainder, abs_y);
-            if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
-            if (tmp_res.value.compare_status < 0) { break; } // remainder < abs_y
+    // D1 (normalize): choose 'd' so that v[n - 1] >= BASE / 2 (after scaling)
+    const long long d = BASE / (v[n - 1] + 1);
 
-            // remainder = remainder - abs_y
-            tmp_res = bigint_sub_abs(remainder, abs_y);
-            if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
+    long long carry = 0;
+    for (size_t idx = 0; idx < x_size; idx++) {
+        long long current = u[idx] * d + carry;
+        u[idx] = current % BASE;
+        carry = current / BASE;
+    }
+    u[x_size] = carry;
 
-            bigint_t *new_remainder = tmp_res.value.number;
-            bigint_destroy(remainder);
-            remainder = new_remainder;
-            count++;
+    carry = 0;
+    for (size_t idx = 0; idx < n; idx++) {
+        long long current = v[idx] * d + carry;
+        v[idx] = current % BASE;
+        carry = current / BASE;
+    }
+
+    // D2-D6: the main loop. One iteration produces one quotient limb
+    for (long long j = (long long)m; j >= 0; j--) {
+        size_t jj = (size_t)j;
+
+        // D3: 2-by-1 trial quotient
+        long long two_limb = u[jj + n] * BASE + u[jj + n - 1];
+        long long q_hat = two_limb / v[n - 1];
+        long long r_hat = two_limb % v[n - 1];
+
+        while (q_hat >= BASE || ((n >= 2) && (q_hat * v[n - 2]) > (BASE * r_hat + u[jj + n - 2]))) {
+            q_hat--;
+            r_hat += v[n - 1];
+            if (r_hat >= BASE) { break; }
         }
 
-        // Add count to quotient digits
-        vector_result_t push_res = vector_push(quotient->digits, &count);
+        // D4: multiply-subtract u[j ... j + n] -= q_hat * v[0 ... n - 1]
+        long long borrow = 0;
+        for (size_t idx = 0; idx < n; idx++) {
+            long long product = q_hat * v[idx] + borrow;
+            borrow = product / BASE;
+            long long diff = u[jj + idx] - (product % BASE);
+            if (diff < 0) {
+                diff += BASE;
+                borrow++;
+            }
+            u[jj + idx] = diff;
+        }
+        u[jj + n] -= borrow;
+
+        // D5: store quotient digit
+        q[jj] = q_hat;
+
+        // D6: if 'u' went negative, add 'v' back once and decrement q[j]
+        if (u[jj + n] < 0) {
+            q[jj]--;
+            carry = 0;
+            for (size_t idx = 0; idx < n; idx++) {
+                long long sum = u[jj + idx] + v[idx] + carry;
+                u[jj + idx] = sum % BASE;
+                carry = sum / BASE;
+            }
+            u[jj + n] += carry;
+        }
+    }
+
+    // Delete working copy from memory
+    REMOVE(u); REMOVE(v);
+
+    // Build the bigint quotient from q[0 ... m] (index 0 = LSB)
+    vector_result_t vec_res = vector_new(m + 1, sizeof(int));
+    if (vec_res.status != VECTOR_OK) {
+        result.status = BIGINT_ERR_ALLOCATE;
+        COPY_MSG(result, vec_res.message);
+
+        goto cleanup;
+    }
+
+    quotient->digits = vec_res.value.vector;
+    for (size_t idx = 0; idx <= m; idx++) {
+        int q_idx = (int)q[idx];
+
+        vector_result_t push_res = vector_push(quotient->digits, &q_idx);
         if (push_res.status != VECTOR_OK) {
             result.status = BIGINT_ERR_INVALID;
             COPY_MSG(result, push_res.message);
@@ -988,34 +1130,10 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
         }
     }
 
-    // Reverse quotient digits
-    const size_t q_size = vector_size(quotient->digits);
-    for (size_t idx = 0; idx < q_size / 2; idx++) {
-        vector_result_t left_res = vector_get(quotient->digits, idx);
-        vector_result_t right_res = vector_get(quotient->digits, q_size - 1 - idx);
+    REMOVE(q);
 
-        if (left_res.status != VECTOR_OK || right_res.status != VECTOR_OK) {
-            result.status = BIGINT_ERR_INVALID;
-            SET_MSG(result, "Failed to access vector elements");
-
-            goto cleanup;
-        }
-
-        int *left = (int*)left_res.value.element;
-        int *right = (int*)right_res.value.element;
-        int temp = *left;
-
-        vector_set(quotient->digits, idx, right);
-        vector_set(quotient->digits, q_size - 1 - idx, &temp);
-    }
-
-    quotient->is_negative = (x->is_negative != y->is_negative);
-
-    tmp_res = bigint_trim_zeros(quotient);
-    if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
-
-    bigint_destroy(remainder);
-    bigint_destroy(abs_y);
+    bigint_result_t trim_res = bigint_trim_zeros(quotient);
+    if (trim_res.status != BIGINT_OK) { result = trim_res; goto cleanup; }
 
     result.value.number = quotient;
     result.status = BIGINT_OK;
@@ -1024,20 +1142,20 @@ static bigint_result_t bigint_div(const bigint_t *x, const bigint_t *y) {
     return result;
 
 cleanup:
+    free(u); free(v); free(q);
     if (quotient) { bigint_destroy(quotient); }
-    if (remainder) { bigint_destroy(remainder); }
-    if (abs_y) { bigint_destroy(abs_y); }
 
     return result;
 }
 
+
 /**
  * bigint_from_int
  *  @value: an integer value
  * 
  *  Takes an integer and convert it to a big integer
  * 
- *  Returns a big_int_result_t data type containing a new big integer
+ *  Returns a bigint_result_t data type containing a new big integer
  */
 bigint_result_t bigint_from_int(long long value) {
     bigint_result_t result = {0};
@@ -1562,7 +1680,9 @@ bigint_result_t bigint_prod(const bigint_t *x, const bigint_t *y) {
  *  @x: a valid non-null big integer
  *  @y: a valid non-null big integer
  * 
- *  Computes division with remainder
+ *  Computes truncated division with remainder. That is:
+ *   quotient = trunc(x / y)       sign = sign(x) XOR sign(y)
+ *   remainder = x - y * quotient  sign = sign(x)
  *  
  *  Returns a bigint_result_t data type
  */
@@ -1570,7 +1690,6 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
     bigint_result_t result = {0};
     bigint_result_t tmp_res = {0};
 
-    // Intermediate results
     bigint_t *quotient = NULL;
     bigint_t *y_times_q = NULL;
     bigint_t *remainder = NULL;
@@ -1582,11 +1701,10 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
         return result;
     }
 
-    // Check for division by zero
     const size_t y_size = vector_size(y->digits);
     if (y_size == 0) {
         result.status = BIGINT_ERR_DIV_BY_ZERO;
-        SET_MSG(result, "Division by zero");
+        SET_MSG(result, "Cannot divide by zero");
 
         return result;
     }
@@ -1600,16 +1718,16 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
             return result;
         }
 
-        int *y_val = (int*)y_val_res.value.element;
+        int *y_val = (int *)y_val_res.value.element;
         if (*y_val == 0) {
             result.status = BIGINT_ERR_DIV_BY_ZERO;
-            SET_MSG(result, "Division by zero");
+            SET_MSG(result, "Cannot divide by zero");
 
             return result;
         }
     }
 
-    // |x| < |y| then quotient is 0 and remainder is x
+    // |x| < |y|: quotient is 0, remainder is x
     tmp_res = bigint_compare_abs(x, y);
     if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
 
@@ -1624,6 +1742,7 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
 
         result.value.division.quotient = quotient;
         result.value.division.remainder = remainder;
+
         result.status = BIGINT_OK;
         SET_MSG(result, "Division between big integers was successful");
 
@@ -1634,7 +1753,10 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
     if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
     quotient = tmp_res.value.number;
 
-    // Compute r = x - y * q
+    // Set quotient sign accordingly
+    quotient->is_negative = (x->is_negative != y->is_negative);
+
+    // Compute remainder using r = x - y * q
     tmp_res = bigint_prod(y, quotient);
     if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
     y_times_q = tmp_res.value.number;
@@ -1643,20 +1765,31 @@ bigint_result_t bigint_divmod(const bigint_t *x, const bigint_t *y) {
     if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
     remainder = tmp_res.value.number;
 
-    // Ensure that remainder has correct sign (i.e., same as dividend x)
-    // In C-style division, sign(remainder) = sign(dividend)
-    remainder->is_negative = x->is_negative;
-
     tmp_res = bigint_trim_zeros(remainder);
     if (tmp_res.status != BIGINT_OK) { result = tmp_res; goto cleanup; }
-    
+
+    // Set remainder sign accordingly
+    vector_result_t r0 = vector_get(remainder->digits, 0);
+    if (r0.status != VECTOR_OK) {
+        result.status = BIGINT_ERR_INVALID;
+        COPY_MSG(result, r0.message);
+
+        goto cleanup;
+    }
+
+    bool rem_is_zero = (vector_size(remainder->digits) == 1 && *(int *)r0.value.element == 0);
+
+    if (!rem_is_zero) {
+        remainder->is_negative = x->is_negative;
+    }
+
     result.value.division.quotient = quotient;
     result.value.division.remainder = remainder;
     result.status = BIGINT_OK;
     SET_MSG(result, "Division between big integers was successful");
 
     bigint_destroy(y_times_q);
-    
+
     return result;
 
 cleanup:
diff --git a/tests/test_bigint.c b/tests/test_bigint.c
index 6dba103..2f82ef6 100644
--- a/tests/test_bigint.c
+++ b/tests/test_bigint.c
@@ -213,8 +213,8 @@ void test_bigint_prod_neg(void) {
     bigint_destroy(prod.value.number);
 }
 
-// Test division between big numbers
-void test_bigint_div(void) {
+// Test division between big numbers where divisor is a single limb big number
+void test_bigint_div_single_limb(void) {
     bigint_result_t x = bigint_from_int(100);
     bigint_result_t y = bigint_from_int(2);
 
@@ -229,11 +229,33 @@ void test_bigint_div(void) {
     bigint_eq(quotient, "50");
     bigint_eq(remainder, "0");
 
-    bigint_destroy(quotient);
-    bigint_destroy(remainder);
+    bigint_destroy(quotient); bigint_destroy(remainder);
+    bigint_destroy(x.value.number); bigint_destroy(y.value.number);
+}
 
-    bigint_destroy(x.value.number);
-    bigint_destroy(y.value.number);
+// Test division between big numbers using Knuth's algorithm
+void test_bigint_div_knuth(void) {
+    // (1...9) x 8
+    const char *x_origin = "123456789123456789123456789123456789123456789123456789123456789123456789";
+    // (9...1) x 5
+    const char *y_origin = "987654321987654321987654321987654321987654321";
+
+    bigint_result_t x = bigint_from_string(x_origin);
+    bigint_result_t y = bigint_from_string(y_origin);
+
+    assert(x.status == BIGINT_OK && y.status == BIGINT_OK);
+
+    bigint_result_t div = bigint_divmod(x.value.number, y.value.number);
+    assert(div.status == BIGINT_OK);
+
+    bigint_t* const quotient = div.value.division.quotient;
+    bigint_t* const remainder = div.value.division.remainder;
+
+    bigint_eq(quotient, "124999998860937500014238281");
+    bigint_eq(remainder, "246737799246737799370194588370194588370194588");
+
+    bigint_destroy(quotient); bigint_destroy(remainder);
+    bigint_destroy(x.value.number); bigint_destroy(y.value.number);
 }
 
 // Test division between big numbers with negative dividend
@@ -262,7 +284,7 @@ void test_bigint_div_dividend(void) {
 
 // Test division between big numbers with negative divisor
 // This library follows C-style divison such that sign(remainder) = sign(dividend)
-void test_bigint_div_divisor(void) {
+void test_bigint_div_neg_divisor(void) {
     bigint_result_t x = bigint_from_int(13);
     bigint_result_t y = bigint_from_int(-4);
 
@@ -405,9 +427,10 @@ int main(void) {
     TEST(bigint_very_large_prod);
     TEST(bigint_prod_mixed);
     TEST(bigint_prod_neg);
-    TEST(bigint_div);
+    TEST(bigint_div_single_limb);
+    TEST(bigint_div_knuth);
     TEST(bigint_div_dividend);
-    TEST(bigint_div_divisor);
+    TEST(bigint_div_neg_divisor);
     TEST(bigint_div_neg);
     TEST(bigint_div_by_zero);
     TEST(bigint_clone);
diff --git a/usage.c b/usage.c
index c643083..60f2b4a 100644
--- a/usage.c
+++ b/usage.c
@@ -495,7 +495,7 @@ int bigint_usage(void) {
     // Print result
     bigint_printf("multiplication result = %B\n", prod);
 
-    bigint_t *a = bigint_from_string(x_origin).value.number;
+    bigint_t *a = bigint_from_string(large_x).value.number;
     bigint_t *b = bigint_from_string(y_origin).value.number;
 
     // Divide two big integers
-- 
2.49.1


From eb670e26a58285b1884ff390fa8e7bf73b70991f Mon Sep 17 00:00:00 2001
From: Marco Cetica <email@marcocetica.com>
Date: Thu, 26 Feb 2026 09:36:46 +0100
Subject: [PATCH 2/3] Improved `bigint_printf` method

---
 src/bigint.c | 56 +++++++++++++++++++++++++---------------------------
 src/map.c    |  2 --
 src/vector.c |  1 -
 3 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/src/bigint.c b/src/bigint.c
index 2e4b7d3..b37f0cd 100644
--- a/src/bigint.c
+++ b/src/bigint.c
@@ -23,7 +23,6 @@
 #include "bigint.h"
 #include "vector.h"
 
-// Internal methods
 /**
  * bigint_trim_zeros
  *  @number: a non-null big integer
@@ -1673,8 +1672,6 @@ bigint_result_t bigint_prod(const bigint_t *x, const bigint_t *y) {
     return result;
 }
 
-
-
 /**
  * bigint_divmod
  *  @x: a valid non-null big integer
@@ -1886,40 +1883,37 @@ bigint_result_t bigint_printf(const char *format, ...) {
 
     // Process string char by char
     for (const char *p = format; *p != '\0'; p++) {
-        if (*p == '%' && *(p + 1) == 'B') {
-            // Process a big number
-            bigint_t *num = va_arg(args, bigint_t*);
-            if (num == NULL) {
-                printf("<invalid string>");
-            } else {
-                bigint_result_t num_str_res = bigint_to_string(num);
-                if (num_str_res.status != BIGINT_OK) { 
-                    va_end(args); 
-                    return num_str_res; 
-                }
-                
-                char* const number_str = num_str_res.value.string_num;
-                printf("%s", number_str);
-                free(number_str);
-            }
+        if (*p == '%' && *(p + 1) != '%') {
             p++;
-        } else if (*p == '%' && *(p + 1) != '%') {
-            // Handle common printf placeholders
-            p++;
-            char placeholder = *p;
+            const char placeholder = *p;
 
             switch (placeholder) {
+                case 'B': {
+                    bigint_t *num = va_arg(args, bigint_t*);
+                    if (num == NULL) {
+                        for (const char *s = "<invalid big integer>"; *s != '\0'; s++) { putchar(*s); }
+                    } else {
+                        bigint_result_t num_str_res = bigint_to_string(num);
+                        if (num_str_res.status != BIGINT_OK) {
+                            va_end(args);
+                            return num_str_res;
+                        }
+
+                        char *number_str = num_str_res.value.string_num;
+                        for (const char *s = number_str; *s != '\0'; s++) { putchar(*s); }
+                        free(number_str);
+                    }
+                    break;
+                }
                 case 'd':
                 case 'i': {
                     int val = va_arg(args, int);
                     printf("%d", val);
-
                     break;
                 }
                 case 'u': {
                     unsigned int val = va_arg(args, unsigned int);
                     printf("%u", val);
-
                     break;
                 }
                 case 'l': {
@@ -1939,13 +1933,17 @@ bigint_result_t bigint_printf(const char *format, ...) {
                     break;
                 }
                 case 's': {
-                    char *val = va_arg(args, char*);
-                    printf("%s", val ? val : "<invalid string>");
+                    char* val = va_arg(args, char*);
+                    if (val) {
+                        for (const char *s = val; *s != '\0'; s++) { putchar(*s); }
+                    } else {
+                        for (const char *s = "<invalid string>"; *s != '\0'; s++) { putchar(*s); }
+                    }
                     break;
                 }
                 case 'c': {
                     int val = va_arg(args, int);
-                    printf("%c", val);
+                    putchar(val);
                     break;
                 }
                 case 'f': {
@@ -1954,7 +1952,7 @@ bigint_result_t bigint_printf(const char *format, ...) {
                     break;
                 }
                 case 'p': {
-                    void *val = va_arg(args, void*);
+                    void* const val = va_arg(args, void*);
                     printf("%p", val);
                     break;
                 }
diff --git a/src/map.c b/src/map.c
index 7576a5b..7c20b5c 100644
--- a/src/map.c
+++ b/src/map.c
@@ -10,8 +10,6 @@
 
 #include "map.h"
 
-// Internal methods
-
 /**
  * hash_key
  *  @key: The input string for the hash function
diff --git a/src/vector.c b/src/vector.c
index acb2807..3effebe 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -9,7 +9,6 @@
 
 #include "vector.h"
 
-// Internal methods
 /**
  * vector_resize
  *  @vector: a non-null vector
-- 
2.49.1


From 40d343c02b2a2d4680c792e74caa54009db5400b Mon Sep 17 00:00:00 2001
From: Marco Cetica <email@marcocetica.com>
Date: Thu, 26 Feb 2026 09:46:48 +0100
Subject: [PATCH 3/3] Updated documentation

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9ca1a68..42a0145 100644
--- a/README.md
+++ b/README.md
@@ -110,9 +110,9 @@ int main(void) {
 #include "src/bigint.h"
 
 /*
- * Compile with: gcc -O3 main.c src/bigint.c src/vector.c
+ * Compile with: clang -O3 fact.c src/bigint.c src/vector.c -o fact
  * Output: 20000! = 1819206320230345134827641...
- * Time: 4.01s user 0.00s system 99% cpu 4.021 total
+ * Time: 1.49s user 0.00s system 99% cpu 1.501 total
  */
 int main(void) {
     const int n = 20000;
-- 
2.49.1