320 lines
10 KiB
Plaintext
320 lines
10 KiB
Plaintext
#ifdef noneafjodsjf
|
|
# define
|
|
|
|
// Clean up the node creation to be more "C-Style"
|
|
node_t* NewNode(void* data) {
|
|
node_t* n = calloc(1, sizeof(node_t));
|
|
if(n) n->data = data;
|
|
return n;
|
|
}
|
|
|
|
// Optimization: Use a specialized Token creation function
|
|
Token_t* NewToken(const char* start, size_t len, TKN_CTX ctx) {
|
|
Token_t* t = malloc(sizeof(Token_t));
|
|
t->data = malloc(len + 1);
|
|
memcpy(t->data, start, len);
|
|
((char*)t->data)[len] = '\0';
|
|
t->size = len;
|
|
t->ctx = ctx;
|
|
return t;
|
|
}
|
|
|
|
void MunchTokens(list_t *lst) {
|
|
node_t *curr = lst->first;
|
|
|
|
while (curr && curr->next) {
|
|
Token_t *t1 = (Token_t *)curr->data;
|
|
Token_t *t2 = (Token_t *)curr->next->data;
|
|
|
|
// Only munch RAW tokens that are single characters
|
|
if (t1->ctx == TOK_RAW && t2->ctx == TOK_RAW && t1->size == 1 && t2->size == 1) {
|
|
char pair[3] = { ((char*)t1->data)[0], ((char*)t2->data)[0], '\0' };
|
|
bool matched = false;
|
|
|
|
for (int i = 0; MUNCH_TABLE[i].op != NULL; i++) {
|
|
if (strcmp(pair, MUNCH_TABLE[i].op) == 0) {
|
|
// 1. Update T1 to the new string
|
|
char *new_data = malloc(3);
|
|
memcpy(new_data, pair, 3);
|
|
free(t1->data);
|
|
t1->data = new_data;
|
|
t1->size = 2;
|
|
|
|
// 2. Remove T2 from the list
|
|
node_t *node_to_remove = curr->next;
|
|
curr->next = node_to_remove->next;
|
|
|
|
if (lst->last == node_to_remove) lst->last = curr;
|
|
|
|
// 3. Free T2 memory
|
|
ClearTokens(node_to_remove->data);
|
|
free(node_to_remove);
|
|
lst->size--;
|
|
|
|
matched = true;
|
|
break;
|
|
}
|
|
}
|
|
// If we matched "++", curr now contains "++".
|
|
// We DON'T move to next yet, in case there's a 3rd char (like ">>=")
|
|
if (matched) continue;
|
|
}
|
|
curr = curr->next;
|
|
}
|
|
}
|
|
|
|
void RefineSymbols(list_t *tkn_lst) {
|
|
node_t *curr = tkn_lst->first;
|
|
|
|
while (curr) {
|
|
Token_t *t = (Token_t *)curr->data;
|
|
|
|
// Skip strings and skip nodes that are JUST a single symbol already
|
|
if (t->ctx != TOK_RAW || (t->size == 1 && strchr(SYMBOLS, ((char*)t->data)[0]))) {
|
|
curr = curr->next;
|
|
continue;
|
|
}
|
|
|
|
// Find the first symbol in this string
|
|
size_t pos = strcspn(t->data, SYMBOLS);
|
|
|
|
if (pos < t->size) {
|
|
// We found a symbol! Now we split.
|
|
// Case 1: Symbol is NOT at the very start (there is a prefix)
|
|
if (pos > 0) {
|
|
// Split the token into [prefix] and [symbol + suffix]
|
|
// We reuse the ListSplitToken logic we discussed earlier
|
|
ListSplitToken(tkn_lst, curr, pos);
|
|
// After splitting, curr is now just the prefix.
|
|
// We move to curr->next to handle the symbol.
|
|
curr = curr->next;
|
|
}
|
|
// Case 2: Symbol is at the start (pos == 0)
|
|
else {
|
|
// Split the token into [1-char symbol] and [suffix]
|
|
ListSplitToken(tkn_lst, curr, 1);
|
|
// The current node is now the 1-char symbol.
|
|
// We move to the next node to see if the suffix has more symbols.
|
|
curr = curr->next;
|
|
}
|
|
} else {
|
|
// No symbols found in this node, move to the next node in the list
|
|
curr = curr->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
void SeparateStrings(char *data, list_t *tkn_lst) {
|
|
char *curr = data;
|
|
char *start = data;
|
|
|
|
while (*curr != '\0') {
|
|
if (*curr == '\"' || *curr == '\'') {
|
|
char quote_type = *curr;
|
|
|
|
if (curr > start) {
|
|
size_t raw_len = curr - start;
|
|
Token_t *raw = calloc(1, sizeof(Token_t));
|
|
raw->data = strndup(start, raw_len);//strndup is C99/POSIX
|
|
raw->size = raw_len;
|
|
raw->ctx = TOK_RAW;
|
|
ListPushBack(tkn_lst, raw);
|
|
}
|
|
|
|
char *str_start = curr;
|
|
curr++; // Skip opening quote
|
|
while (*curr != '\0' && *curr != quote_type) {
|
|
if (*curr == '\\') curr++; // Skip escaped characters like \"
|
|
curr++;
|
|
}
|
|
if (*curr == quote_type) curr++; // Include closing quote
|
|
|
|
size_t str_len = curr - str_start;
|
|
Token_t *str_tok = calloc(1, sizeof(Token_t));
|
|
str_tok->data = strndup(str_start, str_len);
|
|
str_tok->size = str_len;
|
|
str_tok->ctx = TOK_STRING;
|
|
ListPushBack(tkn_lst, str_tok);
|
|
|
|
start = curr;
|
|
} else {
|
|
curr++;
|
|
}
|
|
}
|
|
|
|
if (curr > start) {
|
|
size_t last_len = curr - start;
|
|
Token_t *last = calloc(1, sizeof(Token_t));
|
|
last->data = strndup(start, last_len);
|
|
last->size = last_len;
|
|
last->ctx = TOK_RAW;
|
|
ListPushBack(tkn_lst, last);
|
|
}
|
|
}
|
|
|
|
void InitialScanner(char *data, list_t *tkn_lst) {
|
|
char *curr = data;
|
|
char *start = data;
|
|
|
|
while (*curr != '\0') {
|
|
if (*curr == '\"' || *curr == '\'') {
|
|
PushRaw(start, curr, tkn_lst);
|
|
char quote = *curr;
|
|
char *str_start = curr++;
|
|
while (*curr && *curr != quote) {
|
|
if (*curr == '\\' && *(curr + 1)) curr++;
|
|
curr++;
|
|
}
|
|
if (*curr) curr++;
|
|
PushToken(str_start, curr, TOK_STRING, tkn_lst);
|
|
start = curr;
|
|
}
|
|
// 2. Handle Comments
|
|
else if (*curr == '/' && (*(curr + 1) == '/' || *(curr + 1) == '*')) {
|
|
PushRaw(start, curr, tkn_lst);
|
|
|
|
if (*(curr + 1) == '/') { // Single line //
|
|
while (*curr && *curr != '\n') curr++;
|
|
curr++;//for skipping the \n
|
|
} else { // Multi-line /*
|
|
curr += 2;
|
|
while (*curr && !(*curr == '*' && *(curr + 1) == '/')) curr++;
|
|
if (*curr) curr += 2; // Move past */
|
|
}
|
|
// We DON'T push a token here because we want to ignore comments.
|
|
// If you want to keep them (for a doc-generator), push a TOK_COMMENT.
|
|
//PushToken(start, curr, TOK_COMMENT, tkn_lst);
|
|
start = curr;
|
|
} else {
|
|
curr++;
|
|
}
|
|
}
|
|
PushRaw(start, curr, tkn_lst);
|
|
}
|
|
|
|
void RefineRawNodes(list_t *tkn_lst) {
|
|
node_t *curr = tkn_lst->first;
|
|
//node_t *prev = NULL;
|
|
|
|
while (curr) {
|
|
Token_t *t = (Token_t *)curr->data;
|
|
if (t->ctx == TOK_RAW) {
|
|
char *span = NULL;
|
|
char *to_split = strndup(t->data, t->size);
|
|
char *tok = strtok_r(to_split, " \t\r\n", &span);
|
|
|
|
if (tok) {
|
|
free(t->data);
|
|
t->size = strlen(tok);
|
|
t->data = strndup(tok, t->size);
|
|
|
|
node_t *last_inserted = curr;
|
|
tok = strtok_r(NULL, " \t\r\n", &span);
|
|
|
|
while (tok) {
|
|
Token_t *new_t = calloc(1, sizeof(Token_t));
|
|
new_t->size = strlen(tok);
|
|
new_t->data = strndup(tok, new_t->size);
|
|
new_t->ctx = TOK_RAW;
|
|
|
|
node_t *new_node = calloc(1, sizeof(node_t));
|
|
new_node->data = new_t;
|
|
|
|
new_node->next = last_inserted->next;
|
|
last_inserted->next = new_node;
|
|
|
|
if (tkn_lst->last == last_inserted) tkn_lst->last = new_node;
|
|
|
|
last_inserted = new_node;
|
|
tkn_lst->size++;
|
|
tok = strtok_r(NULL, " \t\r\n", &span);
|
|
}
|
|
curr = last_inserted;
|
|
}
|
|
free(to_split);
|
|
}
|
|
//prev = curr;
|
|
curr = curr->next;
|
|
}
|
|
}
|
|
|
|
void PruneWhitespaceNodes(list_t *lst) {
|
|
node_t *curr = lst->first;
|
|
node_t *prev = NULL;
|
|
|
|
while (curr) {
|
|
Token_t *t = (Token_t *)curr->data;
|
|
if (t->ctx == TOK_RAW && IsWhitespace(t->data)) {
|
|
// Unlink and free
|
|
node_t *temp = curr;
|
|
if (prev) prev->next = curr->next;
|
|
else lst->first = curr->next;
|
|
|
|
if (lst->last == temp) lst->last = prev;
|
|
|
|
curr = curr->next;
|
|
ClearTokens(temp->data);
|
|
free(temp);
|
|
lst->size--;
|
|
} else {
|
|
prev = curr;
|
|
curr = curr->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
void ListSplitToken(list_t *lst, node_t *node, size_t index) {
|
|
Token_t *old_t = (Token_t *)node->data;
|
|
|
|
// 1. Create Suffix Data
|
|
size_t suffix_len = old_t->size - index;
|
|
char *suffix_data = malloc(suffix_len + 1);
|
|
memcpy(suffix_data, (char*)old_t->data + index, suffix_len);
|
|
suffix_data[suffix_len] = '\0';
|
|
|
|
// 2. Truncate Prefix Data
|
|
char *prefix_data = malloc(index + 1);
|
|
memcpy(prefix_data, old_t->data, index);
|
|
prefix_data[index] = '\0';
|
|
|
|
free(old_t->data);
|
|
old_t->data = prefix_data;
|
|
old_t->size = index;
|
|
|
|
// 3. Create New Node for Suffix
|
|
Token_t *new_t = calloc(1, sizeof(Token_t));
|
|
new_t->data = suffix_data;
|
|
new_t->size = suffix_len;
|
|
new_t->ctx = TOK_RAW;
|
|
|
|
node_t *new_node = calloc(1, sizeof(node_t));
|
|
new_node->data = new_t;
|
|
new_node->next = node->next;
|
|
|
|
// 4. Update List
|
|
node->next = new_node;
|
|
if (lst->last == node) lst->last = new_node;
|
|
lst->size++;
|
|
}
|
|
|
|
|
|
//// Helper to create and link a new token
|
|
void PushToken(char *start, char *end, TKN_CTX ctx, list_t *lst) {
|
|
size_t len = end - start;
|
|
Token_t *t = calloc(1, sizeof(Token_t));
|
|
t->data = malloc(len + 1);
|
|
memcpy(t->data, start, len);
|
|
((char*)t->data)[len] = '\0';
|
|
t->size = len;
|
|
t->ctx = ctx;
|
|
ListPushBack(lst, t);
|
|
}
|
|
|
|
//// Helper to push code that still needs to be refined
|
|
void PushRaw(char *start, char *end, list_t *lst) {
|
|
if (end <= start) return;
|
|
PushToken(start, end, TOK_RAW, lst);
|
|
}
|
|
|
|
#endif
|