Skip to content

Commit

Permalink
Toy_String now fragments strings that are too long
Browse files Browse the repository at this point in the history
  • Loading branch information
Ratstail91 committed Oct 12, 2024
1 parent c1d72ad commit 7b1dbf2
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 34 deletions.
5 changes: 0 additions & 5 deletions source/toy_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -608,11 +608,6 @@ void Toy_bindParser(Toy_Parser* parser, Toy_Lexer* lexer) {
Toy_Ast* Toy_scanParser(Toy_Bucket** bucketHandle, Toy_Parser* parser) {
Toy_Ast* rootHandle = NULL;

//double check bucket capacity for strings
if ((*bucketHandle)->capacity < TOY_STRING_MAX_LENGTH) {
fprintf(stderr, TOY_CC_WARN "WARNING: Bucket capacity in Toy_scanParser() is smaller than TOY_STRING_MAX_LENGTH" TOY_CC_RESET);
}

//check for EOF
if (match(parser, TOY_TOKEN_EOF)) {
Toy_private_emitAstEnd(bucketHandle, &rootHandle);
Expand Down
72 changes: 49 additions & 23 deletions source/toy_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <string.h>

//utils
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))

static void deepCopyUtil(char* dest, Toy_String* str) {
//sometimes, "clever" can be a bad thing...
if (str->type == TOY_STRING_NODE) {
Expand Down Expand Up @@ -45,20 +47,13 @@ static unsigned int hashCString(const char* string) {
return hash;
}

//exposed functions
Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring) {
int length = strlen(cstring);

return Toy_createStringLength(bucketHandle, cstring, length);
}

Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, int length) {
if (length > TOY_STRING_MAX_LENGTH) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't create a string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
static Toy_String* partitionStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length) {
if (sizeof(Toy_String) + length + 1 > (*bucketHandle)->capacity) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't partition enough space for a string, requested %d length (%d total) but buckets have a capacity of %d\n" TOY_CC_RESET, (int)length, (int)(sizeof(Toy_String) + length + 1), (int)((*bucketHandle)->capacity));
exit(-1);
}

Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); //TODO: compensate for partitioning more space than bucket capacity
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1);

ret->type = TOY_STRING_LEAF;
ret->length = length;
Expand All @@ -70,15 +65,42 @@ Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstrin
return ret;
}

//exposed functions
Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring) {
unsigned int length = strlen(cstring);

return Toy_createStringLength(bucketHandle, cstring, length);
}

Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length) {
//normal behaviour
if (length < (*bucketHandle)->capacity - sizeof(Toy_String) - 1) {
return partitionStringLength(bucketHandle, cstring, length);
}

//break the string up if it's too long
Toy_String* result = NULL;

for (unsigned int i = 0; i < length; i += (*bucketHandle)->capacity - sizeof(Toy_String) - 1) { //increment by the amount actually used by the cstring
unsigned int amount = MIN((length - i), (*bucketHandle)->capacity - sizeof(Toy_String) - 1);
Toy_String* fragment = partitionStringLength(bucketHandle, cstring + i, amount);

result = result == NULL ? fragment : Toy_concatStrings(bucketHandle, result, fragment);
}

return result;
}

TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname, Toy_ValueType type) {
int length = strlen(cname);
unsigned int length = strlen(cname);

if (length > TOY_STRING_MAX_LENGTH) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't create a name string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
//name strings can't be broken up
if (sizeof(Toy_String) + length + 1 > (*bucketHandle)->capacity) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't partition enough space for a name string, requested %d length (%d total) but buckets have a capacity of %d\n" TOY_CC_RESET, (int)length, (int)(sizeof(Toy_String) + length + 1), (int)((*bucketHandle)->capacity));
exit(-1);
}

Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1); //TODO: compensate for partitioning more space than bucket capacity
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + length + 1);

ret->type = TOY_STRING_NAME;
ret->length = length;
Expand All @@ -105,7 +127,16 @@ Toy_String* Toy_deepCopyString(Toy_Bucket** bucketHandle, Toy_String* str) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't deep copy a string with refcount of zero\n" TOY_CC_RESET);
exit(-1);
}
Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + str->length + 1); //TODO: compensate for partitioning more space than bucket capacity

//handle deep copies of strings that are too long for the bucket capacity NOTE: slow, could replace this at some point
if (sizeof(Toy_String) + str->length + 1 > (*bucketHandle)->capacity) {
char* buffer = Toy_getStringRawBuffer(str);
Toy_String* result = Toy_createStringLength(bucketHandle, buffer, str->length); //handles the fragmenting
free(buffer);
return result;
}

Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String) + str->length + 1);

if (str->type == TOY_STRING_NODE || str->type == TOY_STRING_LEAF) {
ret->type = TOY_STRING_LEAF;
Expand Down Expand Up @@ -138,11 +169,6 @@ Toy_String* Toy_concatStrings(Toy_Bucket** bucketHandle, Toy_String* left, Toy_S
exit(-1);
}

if (left->length + right->length > TOY_STRING_MAX_LENGTH) {
fprintf(stderr, TOY_CC_ERROR "ERROR: Can't concat a string longer than %d\n" TOY_CC_RESET, TOY_STRING_MAX_LENGTH);
exit(-1);
}

Toy_String* ret = (Toy_String*)Toy_partitionBucket(bucketHandle, sizeof(Toy_String));

ret->type = TOY_STRING_NODE;
Expand All @@ -162,11 +188,11 @@ void Toy_freeString(Toy_String* str) {
decrementRefCount(str); //TODO: tool for checking the bucket is empty, and freeing it
}

int Toy_getStringLength(Toy_String* str) {
unsigned int Toy_getStringLength(Toy_String* str) {
return str->length;
}

int Toy_getStringRefCount(Toy_String* str) {
unsigned int Toy_getStringRefCount(Toy_String* str) {
return str->refCount;
}

Expand Down
9 changes: 3 additions & 6 deletions source/toy_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
#include "toy_bucket.h"
#include "toy_value.h"

//TODO: Remove this (related to partitioning more space in a bucket issue)
#define TOY_STRING_MAX_LENGTH 1000

//rope pattern
typedef struct Toy_String { //32 | 64 BITNESS
enum Toy_StringType {
Expand Down Expand Up @@ -39,7 +36,7 @@ typedef struct Toy_String { //32 | 64 BITNESS
} Toy_String; //24 | 32

TOY_API Toy_String* Toy_createString(Toy_Bucket** bucketHandle, const char* cstring);
TOY_API Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, int length);
TOY_API Toy_String* Toy_createStringLength(Toy_Bucket** bucketHandle, const char* cstring, unsigned int length);

TOY_API Toy_String* Toy_createNameString(Toy_Bucket** bucketHandle, const char* cname, Toy_ValueType type); //for variable names

Expand All @@ -50,8 +47,8 @@ TOY_API Toy_String* Toy_concatStrings(Toy_Bucket** bucketHandle, Toy_String* lef

TOY_API void Toy_freeString(Toy_String* str);

TOY_API int Toy_getStringLength(Toy_String* str);
TOY_API int Toy_getStringRefCount(Toy_String* str);
TOY_API unsigned int Toy_getStringLength(Toy_String* str);
TOY_API unsigned int Toy_getStringRefCount(Toy_String* str);

TOY_API char* Toy_getStringRawBuffer(Toy_String* str); //allocates the buffer on the heap, needs to be freed

Expand Down
38 changes: 38 additions & 0 deletions tests/cases/test_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,36 @@ int test_string_diffs() {
return 0;
}

int test_string_fragmenting() {
//allocate a long string
{
//setup
Toy_Bucket* bucket = Toy_allocateBucket(128); //deliberately too small for the cstring

//445 charaters
const char* cstring = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";

Toy_String* str = Toy_createString(&bucket, cstring);

//check
if (str->type != TOY_STRING_NODE ||
str->length != 445 ||
str->refCount != 1)
{
fprintf(stderr, TOY_CC_ERROR "ERROR: Failed to fragment a string within Toy_String\n" TOY_CC_RESET);
Toy_freeString(str);
Toy_freeBucket(&bucket);
return -1;
}

//cleanup
Toy_freeString(str);
Toy_freeBucket(&bucket);
}

return 0;
}

int main() {
//run each test set, returning the total errors given
int total = 0, res = 0;
Expand Down Expand Up @@ -848,5 +878,13 @@ int main() {
total += res;
}

{
res = test_string_fragmenting();
if (res == 0) {
printf(TOY_CC_NOTICE "All good\n" TOY_CC_RESET);
}
total += res;
}

return total;
}

0 comments on commit 7b1dbf2

Please sign in to comment.