#ifdef noneafjodsjf # define // Clean up the node creation to be more "C-Style" node_t* NewNode(void* data) { node_t* n = calloc(1, sizeof(node_t)); if(n) n->data = data; return n; } // Optimization: Use a specialized Token creation function Token_t* NewToken(const char* start, size_t len, TKN_CTX ctx) { Token_t* t = malloc(sizeof(Token_t)); t->data = malloc(len + 1); memcpy(t->data, start, len); ((char*)t->data)[len] = '\0'; t->size = len; t->ctx = ctx; return t; } void MunchTokens(list_t *lst) { node_t *curr = lst->first; while (curr && curr->next) { Token_t *t1 = (Token_t *)curr->data; Token_t *t2 = (Token_t *)curr->next->data; // Only munch RAW tokens that are single characters if (t1->ctx == TOK_RAW && t2->ctx == TOK_RAW && t1->size == 1 && t2->size == 1) { char pair[3] = { ((char*)t1->data)[0], ((char*)t2->data)[0], '\0' }; bool matched = false; for (int i = 0; MUNCH_TABLE[i].op != NULL; i++) { if (strcmp(pair, MUNCH_TABLE[i].op) == 0) { // 1. Update T1 to the new string char *new_data = malloc(3); memcpy(new_data, pair, 3); free(t1->data); t1->data = new_data; t1->size = 2; // 2. Remove T2 from the list node_t *node_to_remove = curr->next; curr->next = node_to_remove->next; if (lst->last == node_to_remove) lst->last = curr; // 3. Free T2 memory ClearTokens(node_to_remove->data); free(node_to_remove); lst->size--; matched = true; break; } } // If we matched "++", curr now contains "++". // We DON'T move to next yet, in case there's a 3rd char (like ">>=") if (matched) continue; } curr = curr->next; } } void RefineSymbols(list_t *tkn_lst) { node_t *curr = tkn_lst->first; while (curr) { Token_t *t = (Token_t *)curr->data; // Skip strings and skip nodes that are JUST a single symbol already if (t->ctx != TOK_RAW || (t->size == 1 && strchr(SYMBOLS, ((char*)t->data)[0]))) { curr = curr->next; continue; } // Find the first symbol in this string size_t pos = strcspn(t->data, SYMBOLS); if (pos < t->size) { // We found a symbol! Now we split. // Case 1: Symbol is NOT at the very start (there is a prefix) if (pos > 0) { // Split the token into [prefix] and [symbol + suffix] // We reuse the ListSplitToken logic we discussed earlier ListSplitToken(tkn_lst, curr, pos); // After splitting, curr is now just the prefix. // We move to curr->next to handle the symbol. curr = curr->next; } // Case 2: Symbol is at the start (pos == 0) else { // Split the token into [1-char symbol] and [suffix] ListSplitToken(tkn_lst, curr, 1); // The current node is now the 1-char symbol. // We move to the next node to see if the suffix has more symbols. curr = curr->next; } } else { // No symbols found in this node, move to the next node in the list curr = curr->next; } } } void SeparateStrings(char *data, list_t *tkn_lst) { char *curr = data; char *start = data; while (*curr != '\0') { if (*curr == '\"' || *curr == '\'') { char quote_type = *curr; if (curr > start) { size_t raw_len = curr - start; Token_t *raw = calloc(1, sizeof(Token_t)); raw->data = strndup(start, raw_len);//strndup is C99/POSIX raw->size = raw_len; raw->ctx = TOK_RAW; ListPushBack(tkn_lst, raw); } char *str_start = curr; curr++; // Skip opening quote while (*curr != '\0' && *curr != quote_type) { if (*curr == '\\') curr++; // Skip escaped characters like \" curr++; } if (*curr == quote_type) curr++; // Include closing quote size_t str_len = curr - str_start; Token_t *str_tok = calloc(1, sizeof(Token_t)); str_tok->data = strndup(str_start, str_len); str_tok->size = str_len; str_tok->ctx = TOK_STRING; ListPushBack(tkn_lst, str_tok); start = curr; } else { curr++; } } if (curr > start) { size_t last_len = curr - start; Token_t *last = calloc(1, sizeof(Token_t)); last->data = strndup(start, last_len); last->size = last_len; last->ctx = TOK_RAW; ListPushBack(tkn_lst, last); } } void InitialScanner(char *data, list_t *tkn_lst) { char *curr = data; char *start = data; while (*curr != '\0') { if (*curr == '\"' || *curr == '\'') { PushRaw(start, curr, tkn_lst); char quote = *curr; char *str_start = curr++; while (*curr && *curr != quote) { if (*curr == '\\' && *(curr + 1)) curr++; curr++; } if (*curr) curr++; PushToken(str_start, curr, TOK_STRING, tkn_lst); start = curr; } // 2. Handle Comments else if (*curr == '/' && (*(curr + 1) == '/' || *(curr + 1) == '*')) { PushRaw(start, curr, tkn_lst); if (*(curr + 1) == '/') { // Single line // while (*curr && *curr != '\n') curr++; curr++;//for skipping the \n } else { // Multi-line /* curr += 2; while (*curr && !(*curr == '*' && *(curr + 1) == '/')) curr++; if (*curr) curr += 2; // Move past */ } // We DON'T push a token here because we want to ignore comments. // If you want to keep them (for a doc-generator), push a TOK_COMMENT. //PushToken(start, curr, TOK_COMMENT, tkn_lst); start = curr; } else { curr++; } } PushRaw(start, curr, tkn_lst); } void RefineRawNodes(list_t *tkn_lst) { node_t *curr = tkn_lst->first; //node_t *prev = NULL; while (curr) { Token_t *t = (Token_t *)curr->data; if (t->ctx == TOK_RAW) { char *span = NULL; char *to_split = strndup(t->data, t->size); char *tok = strtok_r(to_split, " \t\r\n", &span); if (tok) { free(t->data); t->size = strlen(tok); t->data = strndup(tok, t->size); node_t *last_inserted = curr; tok = strtok_r(NULL, " \t\r\n", &span); while (tok) { Token_t *new_t = calloc(1, sizeof(Token_t)); new_t->size = strlen(tok); new_t->data = strndup(tok, new_t->size); new_t->ctx = TOK_RAW; node_t *new_node = calloc(1, sizeof(node_t)); new_node->data = new_t; new_node->next = last_inserted->next; last_inserted->next = new_node; if (tkn_lst->last == last_inserted) tkn_lst->last = new_node; last_inserted = new_node; tkn_lst->size++; tok = strtok_r(NULL, " \t\r\n", &span); } curr = last_inserted; } free(to_split); } //prev = curr; curr = curr->next; } } void PruneWhitespaceNodes(list_t *lst) { node_t *curr = lst->first; node_t *prev = NULL; while (curr) { Token_t *t = (Token_t *)curr->data; if (t->ctx == TOK_RAW && IsWhitespace(t->data)) { // Unlink and free node_t *temp = curr; if (prev) prev->next = curr->next; else lst->first = curr->next; if (lst->last == temp) lst->last = prev; curr = curr->next; ClearTokens(temp->data); free(temp); lst->size--; } else { prev = curr; curr = curr->next; } } } void ListSplitToken(list_t *lst, node_t *node, size_t index) { Token_t *old_t = (Token_t *)node->data; // 1. Create Suffix Data size_t suffix_len = old_t->size - index; char *suffix_data = malloc(suffix_len + 1); memcpy(suffix_data, (char*)old_t->data + index, suffix_len); suffix_data[suffix_len] = '\0'; // 2. Truncate Prefix Data char *prefix_data = malloc(index + 1); memcpy(prefix_data, old_t->data, index); prefix_data[index] = '\0'; free(old_t->data); old_t->data = prefix_data; old_t->size = index; // 3. Create New Node for Suffix Token_t *new_t = calloc(1, sizeof(Token_t)); new_t->data = suffix_data; new_t->size = suffix_len; new_t->ctx = TOK_RAW; node_t *new_node = calloc(1, sizeof(node_t)); new_node->data = new_t; new_node->next = node->next; // 4. Update List node->next = new_node; if (lst->last == node) lst->last = new_node; lst->size++; } //// Helper to create and link a new token void PushToken(char *start, char *end, TKN_CTX ctx, list_t *lst) { size_t len = end - start; Token_t *t = calloc(1, sizeof(Token_t)); t->data = malloc(len + 1); memcpy(t->data, start, len); ((char*)t->data)[len] = '\0'; t->size = len; t->ctx = ctx; ListPushBack(lst, t); } //// Helper to push code that still needs to be refined void PushRaw(char *start, char *end, list_t *lst) { if (end <= start) return; PushToken(start, end, TOK_RAW, lst); } #endif