diff --git a/Makefile b/Makefile index c339dbb..a40e35f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ CC = gcc CFLAGS = -Wall -Wextra -Werror -pedantic-errors -fstack-protector-strong \ -fsanitize=address -fsanitize=undefined -fstack-clash-protection \ - -fdiagnostics-color=always -Wwrite-strings -g -std=c99 + -Wwrite-strings -g -std=c99 BENCH_FLAGS = -Wall -Wextra -Werror -O3 diff --git a/src/string.c b/src/string.c index a4f7b7a..aac4bc3 100644 --- a/src/string.c +++ b/src/string.c @@ -239,7 +239,7 @@ string_result_t string_clone(const string_t *str) { } memcpy(str_copy->data, str->data, str->byte_size + 1); - str_copy->byte_size = str->byte_size + 1; + str_copy->byte_size = str->byte_size; str_copy->byte_capacity = str->byte_size + 1; str_copy->char_count = str->char_count; @@ -295,7 +295,7 @@ string_result_t string_concat(const string_t *x, const string_t *y) { } /** - * string_substring + * string_contains * @haystack: a non-null string * @needle: a non-null string * @@ -304,7 +304,7 @@ string_result_t string_concat(const string_t *x, const string_t *y) { * Returns a string_result_t containing the index to the beginning of the located string * (if the substring has been found) */ -string_result_t string_substring(const string_t *haystack, const string_t *needle) { +string_result_t string_contains(const string_t *haystack, const string_t *needle) { string_result_t result = { .status = STRING_OK, .value.idx = -1 @@ -335,6 +335,75 @@ string_result_t string_substring(const string_t *haystack, const string_t *needl return result; } +/** + * string_slice + * @str: a non-null string + * @start: the lower bound (inclusive) + * @end: the upper bound (inclusive) + * + * Extracts a slice from @str between @start and @end (inclusive) + * + * Returns a string_result_t data type containing the slice + */ +string_result_t string_slice(const string_t *str, size_t start, size_t end) { + string_result_t result = {0}; + + if (str == NULL) { + result.status = STRING_ERR_INVALID; + SET_MSG(result, "Invalid string"); + + return result; + } + + if (start > end || end >= str->char_count) { + result.status = STRING_ERR_OVERFLOW; + SET_MSG(result, "Index out of bounds"); + + return result; + } + + size_t start_byte_offset = 0; + for (size_t idx = 0; idx < start; idx++) { + start_byte_offset += utf8_char_len((unsigned char)str->data[start_byte_offset]); + } + + size_t end_byte_offset = start_byte_offset; + for (size_t idx = start; idx <= end; idx++) { + end_byte_offset += utf8_char_len((unsigned char)str->data[end_byte_offset]); + } + + const size_t slice_byte_size = end_byte_offset - start_byte_offset; + + string_t *slice = malloc(sizeof(string_t)); + if (slice == NULL) { + result.status = STRING_ERR_ALLOCATE; + SET_MSG(result, "Cannot allocate memory"); + + return result; + } + + slice->data = malloc(slice_byte_size + 1); + if (slice->data == NULL) { + result.status = STRING_ERR_ALLOCATE; + SET_MSG(result, "Cannot allocate memory"); + + return result; + } + + memcpy(slice->data, str->data + start_byte_offset, slice_byte_size); + slice->data[slice_byte_size] = '\0'; + + slice->byte_size = slice_byte_size; + slice->byte_capacity = slice_byte_size + 1; + slice->char_count = end - start + 1; + + result.status = STRING_OK; + result.value.string = slice; + SET_MSG(result, "String sliced successfully"); + + return result; +} + /** * string_eq * @x: a non-null string @@ -405,7 +474,14 @@ string_result_t string_substring(const string_t *haystack, const string_t *needl string_result_t string_get_at(const string_t *str, size_t position) { string_result_t result = {0}; - if (str == NULL || position >= str->char_count) { + if (str == NULL) { + result.status = STRING_ERR_INVALID; + SET_MSG(result, "Invalid string"); + + return result; + } + + if (position >= str->char_count) { result.status = STRING_ERR_OVERFLOW; SET_MSG(result, "Index out of bounds"); diff --git a/src/string.h b/src/string.h index 73ece18..23f5cc0 100644 --- a/src/string.h +++ b/src/string.h @@ -26,9 +26,9 @@ typedef struct { string_status_t status; uint8_t message[RESULT_MSG_SIZE]; union { - string_t *string; // For new, reverse, trim + string_t *string; // For new, clone, slice, reverse, trim char *symbol; // For get_at - int64_t idx; // For substring search + int64_t idx; // For contains bool is_equ; // For comparison struct { // For split string_t **strings; @@ -45,7 +45,8 @@ extern "C" { string_result_t string_new(const char *c_str); string_result_t string_clone(const string_t *str); string_result_t string_concat(const string_t *x, const string_t *y); -string_result_t string_substring(const string_t *haystack, const string_t *needle); +string_result_t string_contains(const string_t *haystack, const string_t *needle); +string_result_t string_slice(const string_t *str, size_t start, size_t end); string_result_t string_eq(const string_t *x, const string_t *y, bool case_sensitive); string_result_t string_get_at(const string_t *str, size_t position); string_result_t string_set_at(string_t *str, size_t position, const char *utf8_char); diff --git a/tests/test_string.c b/tests/test_string.c index a7d3562..88d9c0f 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -1,6 +1,6 @@ /* * Unit tests for String data type -*/ + */ #define TEST(NAME) do { \ printf("Running test_%s...", #NAME); \ @@ -11,6 +11,7 @@ #include #include #include +#include #include "../src/string.h" @@ -39,6 +40,20 @@ void test_string_new_empty(void) { string_destroy(res.value.string); } +// Test cloning an existing string +void test_string_clone(void) { + string_t *original = string_new("Original").value.string; + string_result_t res = string_clone(original); + + assert(res.status == STRING_OK); + assert(res.value.string != original); // Different memory address + assert(strcmp(res.value.string->data, original->data) == 0); + assert(res.value.string->byte_size == original->byte_size); + + string_destroy(original); + string_destroy(res.value.string); +} + // Test string concatenation void test_string_concat(void) { string_t *str1 = string_new("Foo").value.string; @@ -54,6 +69,71 @@ void test_string_concat(void) { string_destroy(res.value.string); } +// Test if string contains a substring +void test_string_contains(void) { + string_t *haystack = string_new("Hello 🌍 World").value.string; + string_t *needle_ascii = string_new("World").value.string; + string_t *needle_utf8 = string_new("🌍").value.string; + string_t *needle_none = string_new("not found").value.string; + + // World starts at symbol 8 + string_result_t res1 = string_contains(haystack, needle_ascii); + assert(res1.status == STRING_OK); + assert(res1.value.idx == 8); + + // 🌍 is at position 6 + string_result_t res2 = string_contains(haystack, needle_utf8); + assert(res2.status == STRING_OK); + assert(res2.value.idx == 6); + + // Not found should return -1 + string_result_t res3 = string_contains(haystack, needle_none); + assert(res3.status == STRING_OK); + assert(res3.value.idx == -1); + + string_destroy(haystack); + string_destroy(needle_ascii); + string_destroy(needle_utf8); + string_destroy(needle_none); +} + +// Test string slicing +void test_string_slice(void) { + // ASCII slice + string_t *str1 = string_new("foobar").value.string; + string_result_t res1 = string_slice(str1, 2, 4); + + assert(res1.status == STRING_OK); + assert(strcmp(res1.value.string->data, "oba") == 0); + assert(res1.value.string->char_count == 3); + + // UTF-8 slice + string_t *str2 = string_new("AB😆🌍").value.string; + string_result_t res2 = string_slice(str2, 2, 2); + + assert(res2.status == STRING_OK); + assert(strcmp(res2.value.string->data, "😆") == 0); + assert(res2.value.string->byte_size == 4); // emoji = 4 bytes + + // UTF-8 + ASCII slice + string_result_t res3 = string_slice(str2, 0, 2); + assert(res3.status == STRING_OK); + assert(strcmp(res3.value.string->data, "AB😆") == 0); + + // Invalid bounds + string_result_t res4 = string_slice(str1, 5, 2); + assert(res4.status == STRING_ERR_OVERFLOW); + + res4 = string_slice(str1, 1, 50); + assert(res4.status == STRING_ERR_OVERFLOW); + + string_destroy(str1); + string_destroy(str2); + string_destroy(res1.value.string); + string_destroy(res2.value.string); + string_destroy(res3.value.string); +} + // Test case-insensitive and sensitive comparison void test_string_eq(void) { string_t *str1 = string_new("Foo").value.string; @@ -83,6 +163,35 @@ void test_string_reverse_utf8(void) { string_destroy(res.value.string); } +// Test string get_at +void test_string_get_at(void) { + string_t *str = string_new("AB😆🌍").value.string; + + // 😆 is at index 2 + string_result_t res1 = string_get_at(str, 2); + assert(res1.status == STRING_OK); + assert(strcmp((char*)res1.value.symbol, "😆") == 0); + free(res1.value.symbol); + + // 🌍 is at index 3 + string_result_t res2 = string_get_at(str, 3); + assert(res2.status == STRING_OK); + assert(strcmp((char*)res2.value.symbol, "🌍") == 0); + free(res2.value.symbol); + + string_destroy(str); +} + +// Test string get_at with invalid index +void test_string_get_at_overflow(void) { + string_t *str = string_new("ABC").value.string; + + string_result_t res = string_get_at(str, 50); + assert(res.status == STRING_ERR_OVERFLOW); + + string_destroy(str); +} + // Test mutation of UTF-8 symbol void test_string_set_at(void) { string_t *str = string_new("ABC").value.string; @@ -107,6 +216,30 @@ void test_string_set_at_overflow(void) { string_destroy(str); } +// Test string to lowercase +void test_string_to_lower(void) { + string_t *str = string_new("AbC").value.string; + string_result_t res = string_to_lower(str); + + assert(res.status == STRING_OK); + assert(strcmp(res.value.string->data, "abc") == 0); + + string_destroy(str); + string_destroy(res.value.string); +} + +// Test string to uppercase +void test_string_to_upper(void) { + string_t *str = string_new("aBc").value.string; + string_result_t res = string_to_upper(str); + + assert(res.status == STRING_OK); + assert(strcmp(res.value.string->data, "ABC") == 0); + + string_destroy(str); + string_destroy(res.value.string); +} + // Test whitespace trimming void test_string_trim(void) { string_t *str = string_new(" \t Foo Bar \n ").value.string; @@ -127,26 +260,49 @@ void test_string_split(void) { assert(res.status == STRING_OK); assert(res.value.split.count == 3); - assert(strcmp(res.value.split.strings[0]->data, "Red") == 0); - assert(strcmp(res.value.split.strings[1]->data, "Green") == 0); - assert(strcmp(res.value.split.strings[2]->data, "Blue") == 0); + const size_t count = res.value.split.count; + string_t **strings = res.value.split.strings; - string_split_destroy(res.value.split.strings, res.value.split.count); + const char *expected[] = { "Red", "Green", "Blue" }; + for (size_t idx = 0; idx < count; idx++) { + assert(strcmp(strings[idx]->data, expected[idx]) == 0); + } + + string_split_destroy(strings, count); string_destroy(str); } +// Test string destroy +void test_string_destroy(void) { + string_t *str = string_new("delete me").value.string; + + string_result_t res = string_destroy(str); + assert(res.status == STRING_OK); + + string_result_t res_null = string_destroy(NULL); + assert(res_null.status == STRING_ERR_INVALID); +} + int main(void) { - printf("=== Running Vector unit tests ===\n\n"); + printf("=== Running String unit tests ===\n\n"); TEST(string_new); TEST(string_new_empty); + TEST(string_clone); TEST(string_concat); + TEST(string_contains); + TEST(string_slice); TEST(string_eq); TEST(string_reverse_utf8); + TEST(string_get_at); + TEST(string_get_at_overflow); TEST(string_set_at); TEST(string_set_at_overflow); + TEST(string_to_lower); + TEST(string_to_upper); TEST(string_trim); TEST(string_split); + TEST(string_destroy); printf("\n=== All tests passed! ===\n"); return 0; diff --git a/usage.c b/usage.c index ec8a709..37d5784 100644 --- a/usage.c +++ b/usage.c @@ -543,7 +543,18 @@ int string_usage(void) { string_t *str1 = res.value.string; printf("Created string: \"%s\"\n", str1->data); - printf("Character count: %zu (%zu actual bytes)\n\n", string_len(str1), str1->byte_size); + printf("Character count: %zu (%zu actual bytes)\n", string_len(str1), str1->byte_size); + + string_result_t res_clone = string_clone(str1); + if (res_clone.status != STRING_OK) { + printf("Error: %s\n", res.message); + + return 1; + } + + string_t *cloned = res_clone.value.string; + printf("Cloned string: \"%s\"\n\n", cloned->data); + string_destroy(cloned); // Concatenation of strings string_result_t res_suffix = string_new("World! 🦜"); @@ -568,6 +579,46 @@ int string_usage(void) { string_t *concat_str = res_cat.value.string; printf("Concatenation result: \"%s\"\n\n", concat_str->data); + // String contains + string_t *haystack = string_new("The quick brown fox jumps over the lazy dog.").value.string; + string_t *needle = string_new("brown fox").value.string; + + string_result_t res_contains = string_contains(haystack, needle); + if (res_contains.status != STRING_OK) { + printf("Error: %s\n", res_contains.message); + + return 1; + } + + if (res_contains.value.idx != -1) { + printf("Substring found. Starting at index %zu\n\n", res_contains.value.idx); + } + + string_destroy(haystack); + string_destroy(needle); + + // String slicing + string_result_t res_slice = string_slice(concat_str, 7, 14); + if (res_slice.status != STRING_OK) { + printf("Error: %s\n", res_slice.message); + + return 1; + } + + printf("Slice of string: \"%s\"\n\n", res_slice.value.string->data); + string_destroy(res_slice.value.string); + + // String equality + string_t *compare = string_new("hello, World! 🦜").value.string; + string_result_t res_eq = string_eq(concat_str, compare, true); + if (res_eq.value.is_equ) { + printf("The two strings are equal\n\n"); + } else { + printf("The two strings are not equal\n\n"); + } + + string_destroy(compare); + // Uppercase string string_result_t res_upper = string_to_upper(concat_str); if (res_upper.status != STRING_OK) { @@ -617,6 +668,41 @@ int string_usage(void) { printf("Extracted symbol: \"%s\"\n", res_get.value.symbol); free(res_get.value.symbol); + // Trim string + string_t *to_trim = string_new(" foo ").value.string; + string_result_t res_trim = string_trim(to_trim); + if (res_trim.status != STRING_OK) { + printf("Error: %s\n", res_trim.message); + + return 1; + } + + printf("Trimmed string: \"%s\"\n\n", res_trim.value.string->data); + string_destroy(to_trim); + string_destroy(res_trim.value.string); + + // Split string + string_t *to_split = string_new("foo/bar/biz").value.string; + string_result_t res_split = string_split(to_split, "/"); + if (res_split.status != STRING_OK) { + printf("Error: %s\n", res_split.message); + + return 1; + } + + const size_t count = res_split.value.split.count; + string_t **strings = res_split.value.split.strings; + + printf("Original string: \"%s\"\nSplitted string: ", to_split->data); + for (size_t idx = 0; idx < count; idx++) { + printf("\"%s\" ", strings[idx]->data); + } + + printf("\n"); + + string_split_destroy(strings, count); + string_destroy(to_split); + string_destroy(concat_str); string_destroy(str1);