diff --git a/README.md b/README.md new file mode 100644 index 0000000..627e4af --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# Sterling + +`i am delusionnal !!! don't take me seriously` + +ptr : addr : 64bit +arr : addr + size : 128bit + + diff --git a/a.exe b/a.exe new file mode 100644 index 0000000..40940e7 Binary files /dev/null and b/a.exe differ diff --git a/include/SterlingCompiler.h b/include/SterlingCompiler.h index 7108dbc..492f340 100644 --- a/include/SterlingCompiler.h +++ b/include/SterlingCompiler.h @@ -1,29 +1,53 @@ #ifndef STERLING_COMPILER_H # define STERLING_COMPILER_H -typedef enum { - TOK_NONE, - TOK_STRING, - TOK_TOKEN, - TOK_PREPROCESSOR, -} TKN_CTX; - -typedef struct Token_s { - int size; - char *data; - TKN_CTX ctx; -} Token_t; - -typedef struct { - int size; - Token_t *token; -} TokenList; - #include #include #include #include #include #include +#include + +//simd +# ifdef __x86_64__ +# include +# endif + +typedef enum { + TOK_NONE, + TOK_STRING, + TOK_RAW, + TOK_PREPROCESSOR, +} TKN_CTX; + +typedef struct Token_s { + int size; + TKN_CTX ctx; + char *data; +} Token_t; + +//builtin type: if x86_64, then since i use simd, should align them + +typedef struct vec2 { + float x; + float y; + float _padding1; + float _padding2; +} __attribute__((aligned(16)));//losing 8 byte + +typedef struct vec3 { + float x; + float y; + float z; + float _padding; +} __attribute__((aligned(16)));//losing 4 byte + +typedef struct vec4 { + float x; + float y; + float z; + float w; +} __attribute__((aligned(16))); #endif diff --git a/source/list.h b/source/list.h new file mode 100644 index 0000000..a45a824 --- /dev/null +++ b/source/list.h @@ -0,0 +1,150 @@ +#ifndef LIST_H +# define LIST_H + +#include +#include +#include +#include + +typedef struct node_s { + void *data; + struct node_s *next; +} node_t; + +typedef struct { + int size; + node_t *first; + node_t *last; +} list_t; + +typedef struct { + node_t *current; +} list_iter_t; + + +list_t *ListInit(list_t *lst); +void ListInsert(list_t *lst, size_t idx, void *data); +void ListPushBack(list_t *lst, void *data); +list_iter_t ListGetIter(const list_t *lst); +void *ListAdvance(list_iter_t *it); +void *ListPeek(const list_iter_t *it); +void *ListPeekNext(const list_iter_t *it); +bool ListAtEnd(const list_iter_t *it); +void ListReset(list_iter_t *it, const list_t *lst); +void ListFree(list_t *lst, void (*free_func)(void*)); +void *ListPeekK(const list_iter_t *it, size_t k); +void ListRestore(list_iter_t *it, const list_iter_t checkpoint); +bool ListMatch(list_iter_t *it, bool (*predicate)(void*, void*), void *target); + +# ifdef LIST_IMPLEMENTATION + +list_t *ListInit(list_t *lst) { + if (!lst) { + lst = (list_t *)malloc(sizeof(list_t)); + } + lst->first = NULL; + lst->last = NULL; + lst->size = 0; + return (lst); +} + +void ListInsert(list_t *lst, size_t idx, void *data) { + node_t *node = (node_t *)malloc(sizeof(node_t)); + node->data = data; + node->next = 0; + if (lst->size == 0) { + lst->first = lst->last = node; + } else if (idx == 0) { + node->next = lst->first; + lst->first = node; + } else { + node_t *prev = lst->first; + for (int i = 0; i < idx - 1 && prev->next; i++) { + prev = prev->next; + } + node->next = prev->next; + prev->next = node; + if (node->next == NULL) { + lst->last = node; + } + } + + lst->size++; +} + +void ListPushBack(list_t *lst, void *data) { + node_t *node = (node_t *)malloc(sizeof(node_t)); + node->data = data; + node->next = NULL; + + if (!lst->first) { + lst->first = node; + } else { + lst->last->next = node; + } + lst->last = node; + lst->size++; +} + +list_iter_t ListGetIter(const list_t *lst) { + return ((list_iter_t){.current = lst->first}); +} + +void *ListAdvance(list_iter_t *it) { + if (!it->current) return NULL; + void *data = it->current->data; + it->current = it->current->next; + return (data); +} + +void *ListPeek(const list_iter_t *it) { + return (it->current ? it->current->data : NULL); +} + +void *ListPeekNext(const list_iter_t *it) { + return (it->current ? (it->current->next ? it->current->next->data : NULL) : NULL); +} + +bool ListAtEnd(const list_iter_t *it) { + return (it->current == NULL); +} + +void ListReset(list_iter_t *it, const list_t *lst) { + it->current = lst->first; +} + +void ListFree(list_t *lst, void (*free_func)(void*)) { + node_t *current = lst->first; + while (current) { + node_t *next = current->next; + if (free_func) free_func(current->data); + free(current); + current = next; + } + lst->size = 0; + lst->first = lst->last = NULL; +} + +void *ListPeekK(const list_iter_t *it, size_t k) { + node_t *current = it->current; + for (size_t i = 0; i < k && current; i++) { + current = current->next; + } + return (current ? current->data : NULL); +} + +void ListRestore(list_iter_t *it, const list_iter_t checkpoint) { + it->current = checkpoint.current; +} + +bool ListMatch(list_iter_t *it, bool (*predicate)(void*, void*), void *target) { + if (it->current && predicate(it->current->data, target)) { + ListAdvance(it); + return true; + } + return false; +} + +# endif + +#endif diff --git a/source/main.c b/source/main.c index efc5d57..59c46c9 100644 --- a/source/main.c +++ b/source/main.c @@ -1,93 +1,269 @@ -#include +#define LIST_IMPLEMENTATION +#include "list.h" + +#include "../include/SterlingCompiler.h" char *LoadFile(const char *filename) { - char *data = NULL; FILE *file = NULL; - const int buff_size = 2048;//may change for max size of file in character - char buffer[buff_size]; - int count = 0; + char *data = NULL; file = fopen(filename, "r"); - count = fread(buffer, 1, buff_size, file); - data = calloc(count + 1, sizeof(char)); - memcpy(data, buffer, count); + assert(file); + fseek(file, 0, SEEK_END); + long size = ftell(file); + fseek(file, 0, SEEK_SET); + data = (char *)malloc(size + 1); + assert(data); + fread(data, 1, size, file); + data[size] = 0x00; fclose(file); return (data); } -TokenList SeparateString(char *data) { - char *span = NULL; - char *tok = NULL; - char *tmp = NULL; - bool toggle = false; - Token_t buffer[256]; - tok = strtok_r(data, "\"\'", &span); - int i = 0; - buffer[i].ctx = TOK_NONE; - while (tok && i < (sizeof(buffer) / sizeof(Token_t))) { - size_t size = (span - tok); - tmp = calloc(size + 1, sizeof(char)); - memcpy_s(tmp, size, tok, size); - tmp[size] = 0x00; - buffer[i].data = tmp; - buffer[i].size = size; - if (toggle) { - buffer[i].ctx = TOK_STRING; - } else { - buffer[i].ctx = TOK_NONE; - } - tok = strtok_r(NULL, "\"\'", &span); - i++; - toggle ^= toggle; +# ifndef strndup +char *strndup(const char *s, size_t n) { + char *str = calloc(n + 1, sizeof(char)); + memcpy(str, s, n); + return (str); +} +# endif - } - TokenList token_list; - token_list.token = calloc(i, sizeof(Token_t)); - token_list.size = i; - memcpy(token_list.token, buffer, i); - for (int k = 0; k < i; k++) { - token_list.token[k] = buffer[i]; - } - return (token_list); +bool IsWhitespace(const char *s) { + while (*s) { + if (!isspace((unsigned char)*s)) return false; + s++; + } + return true; } -void tmpFunction() { - char* data = NULL; - char* span = NULL; - char* tmp = NULL; - char* delim = " \t\n"; - char* special = ";#"; - size_t size = 0; - char* tok = strtok_r(data, delim, &span); - while (tok) { - size = (span - tok); - tmp = malloc(size + 1); - memcpy_s(tmp, size, tok, size); - tmp[size] = 0x00; - printf("|%s|\n", tmp); - free(tmp); - tok = strtok_r(NULL, " \t\n", &span); - } +void ClearTokens(void*arg) { + Token_t *tok = arg; + free(tok->data); + free(tok); } +// Helper to create and link a new token +void PushToken(char *start, char *end, TKN_CTX ctx, list_t *lst) { + size_t len = end - start; + Token_t *t = calloc(1, sizeof(Token_t)); + t->data = malloc(len + 1); + memcpy(t->data, start, len); + ((char*)t->data)[len] = '\0'; + t->size = len; + t->ctx = ctx; + ListPushBack(lst, t); +} + +// Helper to push code that still needs to be refined +void PushRaw(char *start, char *end, list_t *lst) { + if (end <= start) return; + PushToken(start, end, TOK_RAW, lst); +} + +void SeparateStrings(char *data, list_t *tkn_lst) { + char *curr = data; + char *start = data; + + while (*curr != '\0') { + // If we find a quote, we need to "package" the raw code before it, + // then package the string itself. + if (*curr == '\"' || *curr == '\'') { + char quote_type = *curr; + + // 1. Save the "Raw" chunk before the string + if (curr > start) { + size_t raw_len = curr - start; + Token_t *raw = calloc(1, sizeof(Token_t)); + raw->data = strndup(start, raw_len); // strndup is C99/POSIX + raw->size = raw_len; + raw->ctx = TOK_RAW; + ListPushBack(tkn_lst, raw); + } + + // 2. Find the end of the string + char *str_start = curr; + curr++; // Skip opening quote + while (*curr != '\0' && *curr != quote_type) { + if (*curr == '\\') curr++; // Skip escaped characters like \" + curr++; + } + if (*curr == quote_type) curr++; // Include closing quote + + // 3. Save the String Token + size_t str_len = curr - str_start; + Token_t *str_tok = calloc(1, sizeof(Token_t)); + str_tok->data = strndup(str_start, str_len); + str_tok->size = str_len; + str_tok->ctx = TOK_STRING; + ListPushBack(tkn_lst, str_tok); + + start = curr; // Reset start to the character after the string + } else { + curr++; + } + } + + // 4. Catch any remaining raw code after the last string + if (curr > start) { + size_t last_len = curr - start; + Token_t *last = calloc(1, sizeof(Token_t)); + last->data = strndup(start, last_len); + last->size = last_len; + last->ctx = TOK_RAW; + ListPushBack(tkn_lst, last); + } +} + +void InitialScanner(char *data, list_t *tkn_lst) { + char *curr = data; + char *start = data; + + while (*curr != '\0') { + // 1. Handle Strings + if (*curr == '\"' || *curr == '\'') { + PushRaw(start, curr, tkn_lst); // Save code before string + char quote = *curr; + char *str_start = curr++; + while (*curr && *curr != quote) { + if (*curr == '\\' && *(curr + 1)) curr++; // Skip escaped char + curr++; + } + if (*curr) curr++; + PushToken(str_start, curr, TOK_STRING, tkn_lst); + start = curr; + } + // 2. Handle Comments + else if (*curr == '/' && (*(curr + 1) == '/' || *(curr + 1) == '*')) { + PushRaw(start, curr, tkn_lst); // Save code before comment + + if (*(curr + 1) == '/') { // Single line // + while (*curr && *curr != '\n') curr++; + curr++;//for skipping the \n + } else { // Multi-line /* + curr += 2; + while (*curr && !(*curr == '*' && *(curr + 1) == '/')) curr++; + if (*curr) curr += 2; // Move past */ + } + // We DON'T push a token here because we want to ignore comments. + // If you want to keep them (for a doc-generator), push a TOK_COMMENT. + start = curr; + } + else { + curr++; + } + } + PushRaw(start, curr, tkn_lst); // Catch the tail +} + +void RefineRawNodes(list_t *tkn_lst) { + node_t *curr = tkn_lst->first; + node_t *prev = NULL; + + while (curr) { + Token_t *t = (Token_t *)curr->data; + + // Only process RAW chunks; leave TOK_STRING nodes alone! + if (t->ctx == TOK_RAW) { + char *span = NULL; + // Note: We use a copy because strtok modifies the string + char *to_split = _strdup(t->data); + char *tok = strtok_r(to_split, " \t\r\n", &span); + + if (tok) { + // 1. Update the current node's data with the FIRST token found + free(t->data); + t->data = _strdup(tok); + t->size = strlen(tok); + + node_t *last_inserted = curr; + tok = strtok_r(NULL, " \t\r\n", &span); + + // 2. Insert NEW nodes for the rest of the tokens + while (tok) { + Token_t *new_t = calloc(1, sizeof(Token_t)); + new_t->data = _strdup(tok); + new_t->size = strlen(tok); + new_t->ctx = TOK_RAW; + + node_t *new_node = calloc(1, sizeof(node_t)); + new_node->data = new_t; + + // Insert into the list + new_node->next = last_inserted->next; + last_inserted->next = new_node; + + if (tkn_lst->last == last_inserted) tkn_lst->last = new_node; + + last_inserted = new_node; + tkn_lst->size++; + tok = strtok_r(NULL, " \t\r\n", &span); + } + curr = last_inserted; // Move cursor to the end of the new chain + } + free(to_split); + } + + prev = curr; + curr = curr->next; + } +} + +void PruneWhitespaceNodes(list_t *lst) { + node_t *curr = lst->first; + node_t *prev = NULL; + + while (curr) { + Token_t *t = (Token_t *)curr->data; + if (t->ctx == TOK_RAW && IsWhitespace(t->data)) { + // Unlink and free + node_t *temp = curr; + if (prev) prev->next = curr->next; + else lst->first = curr->next; + + if (lst->last == temp) lst->last = prev; + + curr = curr->next; + ClearTokens(temp->data); + free(temp); + lst->size--; + } else { + prev = curr; + curr = curr->next; + } + } +} + + int main(int ac, char **av) { if (ac <= 1) { printf("no file specified"); return (-1); } char* data = LoadFile(av[1]); - TokenList tkn_lst; - //first pass on string - tkn_lst = SeparateString(data); - //second pass on ; and \n - //third pass on \t and space - //give each token a context - //let's replace preprocessor - //let's do recursive parsing everywhere that need it - //compile time reflection - //metaprogramming logic annotation if i do it lastly - for (int i = 0; i < tkn_lst.size; i++) { - free(tkn_lst.token[i].data); + list_t *tkn_lst = ListInit(NULL); + //first pass on string, whitespace and comments + InitialScanner(data, tkn_lst); + //SeparateStrings(data, tkn_lst); + list_iter_t iter = ListGetIter(tkn_lst); + while (iter.current) { + printf("|%s|", ((Token_t *)iter.current->data)->data); + iter.current = iter.current->next; } + PruneWhitespaceNodes(tkn_lst); + printf("\n___\n"); + RefineRawNodes(tkn_lst); + ListReset(&iter, tkn_lst); + while (iter.current) { + printf("|%s|\n", ((Token_t *)iter.current->data)->data); + iter.current = iter.current->next; + } + //pass on ";(){}[]$%&*$#@!?:,.<>|_-+=~`" + //give each token a context + //let's replace preprocessor (include, define, etc) + //let's do recursive parsing everywhere that need it + //compile time reflection (@comptime or @reflect) + //metaprogramming logic annotation if i do it lastly** may not be + ListFree(tkn_lst, ClearTokens); free(data); return(0); } +//test