#define LIST_IMPLEMENTATION #include "list.h" #include "../include/SterlingCompiler.h" char *LoadFile(const char *filename) { FILE *file = NULL; char *data = NULL; file = fopen(filename, "r"); assert(file); fseek(file, 0, SEEK_END); long size = ftell(file); fseek(file, 0, SEEK_SET); data = (char *)malloc(size + 1); assert(data); fread(data, 1, size, file); data[size] = 0x00; fclose(file); return (data); } # ifndef strndup char *strndup(const char *s, size_t n) { char *str = calloc(n + 1, sizeof(char)); memcpy(str, s, n); return (str); } # endif bool IsWhitespace(const char *s) { while (*s) { if (!isspace((unsigned char)*s)) return false; s++; } return true; } void ClearTokens(void*arg) { Token_t *tok = arg; free(tok->data); free(tok); } // Helper to create and link a new token void PushToken(char *start, char *end, TKN_CTX ctx, list_t *lst) { size_t len = end - start; Token_t *t = calloc(1, sizeof(Token_t)); t->data = malloc(len + 1); memcpy(t->data, start, len); ((char*)t->data)[len] = '\0'; t->size = len; t->ctx = ctx; ListPushBack(lst, t); } // Helper to push code that still needs to be refined void PushRaw(char *start, char *end, list_t *lst) { if (end <= start) return; PushToken(start, end, TOK_RAW, lst); } void SeparateStrings(char *data, list_t *tkn_lst) { char *curr = data; char *start = data; while (*curr != '\0') { // If we find a quote, we need to "package" the raw code before it, // then package the string itself. if (*curr == '\"' || *curr == '\'') { char quote_type = *curr; // 1. Save the "Raw" chunk before the string if (curr > start) { size_t raw_len = curr - start; Token_t *raw = calloc(1, sizeof(Token_t)); raw->data = strndup(start, raw_len); // strndup is C99/POSIX raw->size = raw_len; raw->ctx = TOK_RAW; ListPushBack(tkn_lst, raw); } // 2. Find the end of the string char *str_start = curr; curr++; // Skip opening quote while (*curr != '\0' && *curr != quote_type) { if (*curr == '\\') curr++; // Skip escaped characters like \" curr++; } if (*curr == quote_type) curr++; // Include closing quote // 3. Save the String Token size_t str_len = curr - str_start; Token_t *str_tok = calloc(1, sizeof(Token_t)); str_tok->data = strndup(str_start, str_len); str_tok->size = str_len; str_tok->ctx = TOK_STRING; ListPushBack(tkn_lst, str_tok); start = curr; // Reset start to the character after the string } else { curr++; } } // 4. Catch any remaining raw code after the last string if (curr > start) { size_t last_len = curr - start; Token_t *last = calloc(1, sizeof(Token_t)); last->data = strndup(start, last_len); last->size = last_len; last->ctx = TOK_RAW; ListPushBack(tkn_lst, last); } } void InitialScanner(char *data, list_t *tkn_lst) { char *curr = data; char *start = data; while (*curr != '\0') { // 1. Handle Strings if (*curr == '\"' || *curr == '\'') { PushRaw(start, curr, tkn_lst); // Save code before string char quote = *curr; char *str_start = curr++; while (*curr && *curr != quote) { if (*curr == '\\' && *(curr + 1)) curr++; // Skip escaped char curr++; } if (*curr) curr++; PushToken(str_start, curr, TOK_STRING, tkn_lst); start = curr; } // 2. Handle Comments else if (*curr == '/' && (*(curr + 1) == '/' || *(curr + 1) == '*')) { PushRaw(start, curr, tkn_lst); // Save code before comment if (*(curr + 1) == '/') { // Single line // while (*curr && *curr != '\n') curr++; curr++;//for skipping the \n } else { // Multi-line /* curr += 2; while (*curr && !(*curr == '*' && *(curr + 1) == '/')) curr++; if (*curr) curr += 2; // Move past */ } // We DON'T push a token here because we want to ignore comments. // If you want to keep them (for a doc-generator), push a TOK_COMMENT. start = curr; } else { curr++; } } PushRaw(start, curr, tkn_lst); // Catch the tail } void RefineRawNodes(list_t *tkn_lst) { node_t *curr = tkn_lst->first; node_t *prev = NULL; while (curr) { Token_t *t = (Token_t *)curr->data; // Only process RAW chunks; leave TOK_STRING nodes alone! if (t->ctx == TOK_RAW) { char *span = NULL; // Note: We use a copy because strtok modifies the string char *to_split = _strdup(t->data); char *tok = strtok_r(to_split, " \t\r\n", &span); if (tok) { // 1. Update the current node's data with the FIRST token found free(t->data); t->data = _strdup(tok); t->size = strlen(tok); node_t *last_inserted = curr; tok = strtok_r(NULL, " \t\r\n", &span); // 2. Insert NEW nodes for the rest of the tokens while (tok) { Token_t *new_t = calloc(1, sizeof(Token_t)); new_t->data = _strdup(tok); new_t->size = strlen(tok); new_t->ctx = TOK_RAW; node_t *new_node = calloc(1, sizeof(node_t)); new_node->data = new_t; // Insert into the list new_node->next = last_inserted->next; last_inserted->next = new_node; if (tkn_lst->last == last_inserted) tkn_lst->last = new_node; last_inserted = new_node; tkn_lst->size++; tok = strtok_r(NULL, " \t\r\n", &span); } curr = last_inserted; // Move cursor to the end of the new chain } free(to_split); } prev = curr; curr = curr->next; } } void PruneWhitespaceNodes(list_t *lst) { node_t *curr = lst->first; node_t *prev = NULL; while (curr) { Token_t *t = (Token_t *)curr->data; if (t->ctx == TOK_RAW && IsWhitespace(t->data)) { // Unlink and free node_t *temp = curr; if (prev) prev->next = curr->next; else lst->first = curr->next; if (lst->last == temp) lst->last = prev; curr = curr->next; ClearTokens(temp->data); free(temp); lst->size--; } else { prev = curr; curr = curr->next; } } } int main(int ac, char **av) { if (ac <= 1) { printf("no file specified"); return (-1); } char* data = LoadFile(av[1]); list_t *tkn_lst = ListInit(NULL); //first pass on string, whitespace and comments InitialScanner(data, tkn_lst); //SeparateStrings(data, tkn_lst); list_iter_t iter = ListGetIter(tkn_lst); while (iter.current) { printf("|%s|", ((Token_t *)iter.current->data)->data); iter.current = iter.current->next; } PruneWhitespaceNodes(tkn_lst); printf("\n___\n"); RefineRawNodes(tkn_lst); ListReset(&iter, tkn_lst); while (iter.current) { printf("|%s|\n", ((Token_t *)iter.current->data)->data); iter.current = iter.current->next; } //pass on ";(){}[]$%&*$#@!?:,.<>|_-+=~`" //give each token a context //let's replace preprocessor (include, define, etc) //let's do recursive parsing everywhere that need it //compile time reflection (@comptime or @reflect) //metaprogramming logic annotation if i do it lastly** may not be ListFree(tkn_lst, ClearTokens); free(data); return(0); } //test