Completed unit tests for String type and added string_slice function

2026-01-09 12:12:41 +01:00
parent 44e3dfa58d
commit 0f8378bf75
5 changed files with 334 additions and 15 deletions
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 CC = gcc
 CFLAGS = -Wall -Wextra -Werror -pedantic-errors -fstack-protector-strong \
 	-fsanitize=address -fsanitize=undefined -fstack-clash-protection \
-	-fdiagnostics-color=always -Wwrite-strings -g -std=c99
+	 -Wwrite-strings -g -std=c99

 BENCH_FLAGS = -Wall -Wextra -Werror -O3

--- a/src/string.c
+++ b/src/string.c
@@ -239,7 +239,7 @@ string_result_t string_clone(const string_t *str) {
    }

    memcpy(str_copy->data, str->data, str->byte_size + 1);
-    str_copy->byte_size = str->byte_size + 1;
+    str_copy->byte_size = str->byte_size;
    str_copy->byte_capacity = str->byte_size + 1;
    str_copy->char_count = str->char_count;

@@ -295,7 +295,7 @@ string_result_t string_concat(const string_t *x, const string_t *y) {
 }

 /**
- * string_substring
+ * string_contains
 * @haystack: a non-null string
 * @needle: a non-null string
 *
@@ -304,7 +304,7 @@ string_result_t string_concat(const string_t *x, const string_t *y) {
 * Returns a string_result_t containing the index to the beginning of the located string
 * (if the substring has been found)
 */
-string_result_t string_substring(const string_t *haystack, const string_t *needle) {
+string_result_t string_contains(const string_t *haystack, const string_t *needle) {
    string_result_t result = {
        .status = STRING_OK,
        .value.idx = -1
@@ -335,6 +335,75 @@ string_result_t string_substring(const string_t *haystack, const string_t *needl
    return result;
 }

+/**
+ * string_slice
+ *  @str: a non-null string
+ *  @start: the lower bound (inclusive)
+ *  @end: the upper bound (inclusive)
+ *
+ *  Extracts a slice from @str between @start and @end (inclusive)
+ *
+ *  Returns a string_result_t data type containing the slice
+ */
+string_result_t string_slice(const string_t *str, size_t start, size_t end) {
+    string_result_t result = {0};
+
+    if (str == NULL) {
+        result.status = STRING_ERR_INVALID;
+        SET_MSG(result, "Invalid string");
+
+        return result;
+    }
+
+    if (start > end || end >= str->char_count) {
+        result.status = STRING_ERR_OVERFLOW;
+        SET_MSG(result, "Index out of bounds");
+
+        return result;
+    }
+
+    size_t start_byte_offset = 0;
+    for (size_t idx = 0; idx < start; idx++) {
+        start_byte_offset += utf8_char_len((unsigned char)str->data[start_byte_offset]);
+    }
+
+    size_t end_byte_offset = start_byte_offset;
+    for (size_t idx = start; idx <= end; idx++) {
+        end_byte_offset += utf8_char_len((unsigned char)str->data[end_byte_offset]);
+    }
+
+    const size_t slice_byte_size = end_byte_offset - start_byte_offset;
+
+    string_t *slice = malloc(sizeof(string_t));
+    if (slice == NULL) {
+        result.status = STRING_ERR_ALLOCATE;
+        SET_MSG(result, "Cannot allocate memory");
+
+        return result;
+    }
+
+    slice->data = malloc(slice_byte_size + 1);
+    if (slice->data == NULL) {
+        result.status = STRING_ERR_ALLOCATE;
+        SET_MSG(result, "Cannot allocate memory");
+
+        return result;
+    }
+
+    memcpy(slice->data, str->data + start_byte_offset, slice_byte_size);
+    slice->data[slice_byte_size] = '\0';
+
+    slice->byte_size = slice_byte_size;
+    slice->byte_capacity = slice_byte_size + 1;
+    slice->char_count = end - start + 1;
+
+    result.status = STRING_OK;
+    result.value.string = slice;
+    SET_MSG(result, "String sliced successfully");
+
+    return result;
+}
+
 /**
 * string_eq
 *  @x: a non-null string
@@ -405,7 +474,14 @@ string_result_t string_substring(const string_t *haystack, const string_t *needl
 string_result_t string_get_at(const string_t *str, size_t position) {
    string_result_t result = {0};

-    if (str == NULL || position >= str->char_count) {
+    if (str == NULL) {
+        result.status = STRING_ERR_INVALID;
+        SET_MSG(result, "Invalid string");
+
+        return result;
+    }
+    
+    if (position >= str->char_count) {
        result.status = STRING_ERR_OVERFLOW;
        SET_MSG(result, "Index out of bounds");

--- a/src/string.h
+++ b/src/string.h
@@ -26,9 +26,9 @@ typedef struct {
    string_status_t status;
    uint8_t message[RESULT_MSG_SIZE];
    union {
-        string_t *string; // For new, reverse, trim
+        string_t *string; // For new, clone, slice, reverse, trim
        char *symbol; // For get_at
-        int64_t idx; // For substring search
+        int64_t idx; // For contains
        bool is_equ; // For comparison
        struct { // For split
            string_t **strings;
@@ -45,7 +45,8 @@ extern "C" {
 string_result_t string_new(const char *c_str);
 string_result_t string_clone(const string_t *str);
 string_result_t string_concat(const string_t *x, const string_t *y);
-string_result_t string_substring(const string_t *haystack, const string_t *needle);
+string_result_t string_contains(const string_t *haystack, const string_t *needle);
+string_result_t string_slice(const string_t *str, size_t start, size_t end);
 string_result_t string_eq(const string_t *x, const string_t *y, bool case_sensitive);
 string_result_t string_get_at(const string_t *str, size_t position);
 string_result_t string_set_at(string_t *str, size_t position, const char *utf8_char);
--- a/tests/test_string.c
+++ b/tests/test_string.c
@@ -11,6 +11,7 @@
 #include <stdio.h>
 #include <assert.h>
 #include <string.h>
+#include <stdlib.h>

 #include "../src/string.h"

@@ -39,6 +40,20 @@ void test_string_new_empty(void) {
    string_destroy(res.value.string);
 }

+// Test cloning an existing string
+void test_string_clone(void) {
+    string_t *original = string_new("Original").value.string;
+    string_result_t res = string_clone(original);
+
+    assert(res.status == STRING_OK);
+    assert(res.value.string != original); // Different memory address
+    assert(strcmp(res.value.string->data, original->data) == 0);
+    assert(res.value.string->byte_size == original->byte_size);
+
+    string_destroy(original);
+    string_destroy(res.value.string);
+}
+
 // Test string concatenation
 void test_string_concat(void) {
    string_t *str1 = string_new("Foo").value.string;
@@ -54,6 +69,71 @@ void test_string_concat(void) {
    string_destroy(res.value.string);
 }

+// Test if string contains a substring
+void test_string_contains(void) {
+    string_t *haystack = string_new("Hello 🌍 World").value.string;
+    string_t *needle_ascii = string_new("World").value.string;
+    string_t *needle_utf8 = string_new("🌍").value.string;
+    string_t *needle_none = string_new("not found").value.string;
+
+    // World starts at symbol 8
+    string_result_t res1 = string_contains(haystack, needle_ascii);
+    assert(res1.status == STRING_OK);
+    assert(res1.value.idx == 8);
+
+    // 🌍 is at position 6
+    string_result_t res2 = string_contains(haystack, needle_utf8);
+    assert(res2.status == STRING_OK);
+    assert(res2.value.idx == 6);
+
+    // Not found should return -1
+    string_result_t res3 = string_contains(haystack, needle_none);
+    assert(res3.status == STRING_OK);
+    assert(res3.value.idx == -1);
+
+    string_destroy(haystack);
+    string_destroy(needle_ascii);
+    string_destroy(needle_utf8);
+    string_destroy(needle_none);
+}
+
+// Test string slicing
+void test_string_slice(void) {
+    // ASCII slice
+    string_t *str1 = string_new("foobar").value.string;
+    string_result_t res1 = string_slice(str1, 2, 4);
+
+    assert(res1.status == STRING_OK);
+    assert(strcmp(res1.value.string->data, "oba") == 0);
+    assert(res1.value.string->char_count == 3);
+
+    // UTF-8 slice
+    string_t *str2 = string_new("AB😆🌍").value.string;
+    string_result_t res2 = string_slice(str2, 2, 2);
+
+    assert(res2.status == STRING_OK);
+    assert(strcmp(res2.value.string->data, "😆") == 0);
+    assert(res2.value.string->byte_size == 4); // emoji = 4 bytes
+
+    // UTF-8 + ASCII slice
+    string_result_t res3 = string_slice(str2, 0, 2);
+    assert(res3.status == STRING_OK);
+    assert(strcmp(res3.value.string->data, "AB😆") == 0);
+
+    // Invalid bounds
+    string_result_t res4 = string_slice(str1, 5, 2);
+    assert(res4.status == STRING_ERR_OVERFLOW);
+
+    res4 = string_slice(str1, 1, 50);
+    assert(res4.status == STRING_ERR_OVERFLOW);
+
+    string_destroy(str1);
+    string_destroy(str2);
+    string_destroy(res1.value.string);
+    string_destroy(res2.value.string);
+    string_destroy(res3.value.string);
+}
+
 // Test case-insensitive and sensitive comparison
 void test_string_eq(void) {
    string_t *str1 = string_new("Foo").value.string;
@@ -83,6 +163,35 @@ void test_string_reverse_utf8(void) {
    string_destroy(res.value.string);
 }

+// Test string get_at
+void test_string_get_at(void) {
+    string_t *str = string_new("AB😆🌍").value.string;
+
+    // 😆 is at index 2
+    string_result_t res1 = string_get_at(str, 2);
+    assert(res1.status == STRING_OK);
+    assert(strcmp((char*)res1.value.symbol, "😆") == 0);
+    free(res1.value.symbol);
+    
+    // 🌍 is at index 3
+    string_result_t res2 = string_get_at(str, 3);
+    assert(res2.status == STRING_OK);
+    assert(strcmp((char*)res2.value.symbol, "🌍") == 0);
+    free(res2.value.symbol);
+
+    string_destroy(str);
+}
+
+// Test string get_at with invalid index
+void test_string_get_at_overflow(void) {
+    string_t *str = string_new("ABC").value.string;
+
+    string_result_t res = string_get_at(str, 50);
+    assert(res.status == STRING_ERR_OVERFLOW);
+
+    string_destroy(str);
+}
+
 // Test mutation of UTF-8 symbol
 void test_string_set_at(void) {
    string_t *str = string_new("ABC").value.string;
@@ -107,6 +216,30 @@ void test_string_set_at_overflow(void) {
    string_destroy(str);
 }

+// Test string to lowercase
+void test_string_to_lower(void) {
+    string_t *str = string_new("AbC").value.string;
+    string_result_t res = string_to_lower(str);
+
+    assert(res.status == STRING_OK);
+    assert(strcmp(res.value.string->data, "abc") == 0);
+
+    string_destroy(str);
+    string_destroy(res.value.string);
+}
+
+// Test string to uppercase
+void test_string_to_upper(void) {
+    string_t *str = string_new("aBc").value.string;
+    string_result_t res = string_to_upper(str);
+
+    assert(res.status == STRING_OK);
+    assert(strcmp(res.value.string->data, "ABC") == 0);
+
+    string_destroy(str);
+    string_destroy(res.value.string);
+}
+
 // Test whitespace trimming
 void test_string_trim(void) {
    string_t *str = string_new("   \t   Foo Bar \n    ").value.string;
@@ -127,26 +260,49 @@ void test_string_split(void) {
    assert(res.status == STRING_OK);
    assert(res.value.split.count == 3);

-    assert(strcmp(res.value.split.strings[0]->data, "Red") == 0);
-    assert(strcmp(res.value.split.strings[1]->data, "Green") == 0);
-    assert(strcmp(res.value.split.strings[2]->data, "Blue") == 0);
+    const size_t count = res.value.split.count;
+    string_t **strings = res.value.split.strings;

-    string_split_destroy(res.value.split.strings, res.value.split.count);
+    const char *expected[] = { "Red", "Green", "Blue" };
+    for (size_t idx = 0; idx < count; idx++) {
+        assert(strcmp(strings[idx]->data, expected[idx]) == 0);
+    }
+  
+    string_split_destroy(strings, count);
    string_destroy(str);
 }

+// Test string destroy
+void test_string_destroy(void) {
+    string_t *str = string_new("delete me").value.string;
+
+    string_result_t res = string_destroy(str);
+    assert(res.status == STRING_OK);
+
+    string_result_t res_null = string_destroy(NULL);
+    assert(res_null.status == STRING_ERR_INVALID);
+}
+
 int main(void) {
-    printf("=== Running Vector unit tests ===\n\n");
+    printf("=== Running String unit tests ===\n\n");

    TEST(string_new);
    TEST(string_new_empty);
+    TEST(string_clone);
    TEST(string_concat);
+    TEST(string_contains);
+    TEST(string_slice);
    TEST(string_eq);
    TEST(string_reverse_utf8);
+    TEST(string_get_at);
+    TEST(string_get_at_overflow);
    TEST(string_set_at);
    TEST(string_set_at_overflow);
+    TEST(string_to_lower);
+    TEST(string_to_upper);
    TEST(string_trim);
    TEST(string_split);
+    TEST(string_destroy);

    printf("\n=== All tests passed! ===\n");
    return 0;
--- a/usage.c
+++ b/usage.c
@@ -543,7 +543,18 @@ int string_usage(void) {

    string_t *str1 = res.value.string;
    printf("Created string: \"%s\"\n", str1->data);
-    printf("Character count: %zu (%zu actual bytes)\n\n", string_len(str1), str1->byte_size);
+    printf("Character count: %zu (%zu actual bytes)\n", string_len(str1), str1->byte_size);
+
+    string_result_t res_clone = string_clone(str1);
+    if (res_clone.status != STRING_OK) {
+        printf("Error: %s\n", res.message);
+
+        return 1;
+    }
+
+    string_t *cloned = res_clone.value.string;
+    printf("Cloned string: \"%s\"\n\n", cloned->data);
+    string_destroy(cloned);

    // Concatenation of strings
    string_result_t res_suffix = string_new("World! 🦜");
@@ -568,6 +579,46 @@ int string_usage(void) {
    string_t *concat_str = res_cat.value.string;
    printf("Concatenation result: \"%s\"\n\n", concat_str->data);

+    // String contains
+    string_t *haystack = string_new("The quick brown fox jumps over the lazy dog.").value.string;
+    string_t *needle = string_new("brown fox").value.string;
+
+    string_result_t res_contains = string_contains(haystack, needle);
+    if (res_contains.status != STRING_OK) {
+        printf("Error: %s\n", res_contains.message);
+
+        return 1;
+    }
+
+    if (res_contains.value.idx != -1) {
+        printf("Substring found. Starting at index %zu\n\n", res_contains.value.idx);
+    }
+
+    string_destroy(haystack);
+    string_destroy(needle);
+
+    // String slicing
+    string_result_t res_slice = string_slice(concat_str, 7, 14);
+    if (res_slice.status != STRING_OK) {
+        printf("Error: %s\n", res_slice.message);
+
+        return 1;
+    }
+
+    printf("Slice of string: \"%s\"\n\n", res_slice.value.string->data);
+    string_destroy(res_slice.value.string);
+
+    // String equality
+    string_t *compare = string_new("hello, World! 🦜").value.string;
+    string_result_t res_eq = string_eq(concat_str, compare, true);
+    if (res_eq.value.is_equ) {
+        printf("The two strings are equal\n\n");
+    } else {
+        printf("The two strings are not equal\n\n");
+    }
+
+    string_destroy(compare);
+    
    // Uppercase string
    string_result_t res_upper = string_to_upper(concat_str);
    if (res_upper.status != STRING_OK) {
@@ -617,6 +668,41 @@ int string_usage(void) {
    printf("Extracted symbol: \"%s\"\n", res_get.value.symbol);
    free(res_get.value.symbol);

+    // Trim string
+    string_t *to_trim = string_new("    foo    ").value.string;
+    string_result_t res_trim = string_trim(to_trim);
+    if (res_trim.status != STRING_OK) {
+        printf("Error: %s\n", res_trim.message);
+
+        return 1;
+    }
+
+    printf("Trimmed string: \"%s\"\n\n", res_trim.value.string->data);
+    string_destroy(to_trim);
+    string_destroy(res_trim.value.string);
+
+    // Split string
+    string_t *to_split = string_new("foo/bar/biz").value.string;
+    string_result_t res_split = string_split(to_split, "/");
+    if (res_split.status != STRING_OK) {
+        printf("Error: %s\n", res_split.message);
+
+        return 1;
+    }
+
+    const size_t count = res_split.value.split.count;
+    string_t **strings = res_split.value.split.strings;
+
+    printf("Original string: \"%s\"\nSplitted string: ", to_split->data);
+    for (size_t idx = 0; idx < count; idx++) {
+        printf("\"%s\" ", strings[idx]->data);
+    }
+
+    printf("\n");
+
+    string_split_destroy(strings, count);
+    string_destroy(to_split);
+
    string_destroy(concat_str);
    string_destroy(str1);