270 lines
8.3 KiB
C

#define LIST_IMPLEMENTATION
#include "list.h"
#include "../include/SterlingCompiler.h"
char *LoadFile(const char *filename) {
FILE *file = NULL;
char *data = NULL;
file = fopen(filename, "r");
assert(file);
fseek(file, 0, SEEK_END);
long size = ftell(file);
fseek(file, 0, SEEK_SET);
data = (char *)malloc(size + 1);
assert(data);
fread(data, 1, size, file);
data[size] = 0x00;
fclose(file);
return (data);
}
# ifndef strndup
char *strndup(const char *s, size_t n) {
char *str = calloc(n + 1, sizeof(char));
memcpy(str, s, n);
return (str);
}
# endif
bool IsWhitespace(const char *s) {
while (*s) {
if (!isspace((unsigned char)*s)) return false;
s++;
}
return true;
}
void ClearTokens(void*arg) {
Token_t *tok = arg;
free(tok->data);
free(tok);
}
// Helper to create and link a new token
void PushToken(char *start, char *end, TKN_CTX ctx, list_t *lst) {
size_t len = end - start;
Token_t *t = calloc(1, sizeof(Token_t));
t->data = malloc(len + 1);
memcpy(t->data, start, len);
((char*)t->data)[len] = '\0';
t->size = len;
t->ctx = ctx;
ListPushBack(lst, t);
}
// Helper to push code that still needs to be refined
void PushRaw(char *start, char *end, list_t *lst) {
if (end <= start) return;
PushToken(start, end, TOK_RAW, lst);
}
void SeparateStrings(char *data, list_t *tkn_lst) {
char *curr = data;
char *start = data;
while (*curr != '\0') {
// If we find a quote, we need to "package" the raw code before it,
// then package the string itself.
if (*curr == '\"' || *curr == '\'') {
char quote_type = *curr;
// 1. Save the "Raw" chunk before the string
if (curr > start) {
size_t raw_len = curr - start;
Token_t *raw = calloc(1, sizeof(Token_t));
raw->data = strndup(start, raw_len); // strndup is C99/POSIX
raw->size = raw_len;
raw->ctx = TOK_RAW;
ListPushBack(tkn_lst, raw);
}
// 2. Find the end of the string
char *str_start = curr;
curr++; // Skip opening quote
while (*curr != '\0' && *curr != quote_type) {
if (*curr == '\\') curr++; // Skip escaped characters like \"
curr++;
}
if (*curr == quote_type) curr++; // Include closing quote
// 3. Save the String Token
size_t str_len = curr - str_start;
Token_t *str_tok = calloc(1, sizeof(Token_t));
str_tok->data = strndup(str_start, str_len);
str_tok->size = str_len;
str_tok->ctx = TOK_STRING;
ListPushBack(tkn_lst, str_tok);
start = curr; // Reset start to the character after the string
} else {
curr++;
}
}
// 4. Catch any remaining raw code after the last string
if (curr > start) {
size_t last_len = curr - start;
Token_t *last = calloc(1, sizeof(Token_t));
last->data = strndup(start, last_len);
last->size = last_len;
last->ctx = TOK_RAW;
ListPushBack(tkn_lst, last);
}
}
void InitialScanner(char *data, list_t *tkn_lst) {
char *curr = data;
char *start = data;
while (*curr != '\0') {
// 1. Handle Strings
if (*curr == '\"' || *curr == '\'') {
PushRaw(start, curr, tkn_lst); // Save code before string
char quote = *curr;
char *str_start = curr++;
while (*curr && *curr != quote) {
if (*curr == '\\' && *(curr + 1)) curr++; // Skip escaped char
curr++;
}
if (*curr) curr++;
PushToken(str_start, curr, TOK_STRING, tkn_lst);
start = curr;
}
// 2. Handle Comments
else if (*curr == '/' && (*(curr + 1) == '/' || *(curr + 1) == '*')) {
PushRaw(start, curr, tkn_lst); // Save code before comment
if (*(curr + 1) == '/') { // Single line //
while (*curr && *curr != '\n') curr++;
curr++;//for skipping the \n
} else { // Multi-line /*
curr += 2;
while (*curr && !(*curr == '*' && *(curr + 1) == '/')) curr++;
if (*curr) curr += 2; // Move past */
}
// We DON'T push a token here because we want to ignore comments.
// If you want to keep them (for a doc-generator), push a TOK_COMMENT.
start = curr;
}
else {
curr++;
}
}
PushRaw(start, curr, tkn_lst); // Catch the tail
}
void RefineRawNodes(list_t *tkn_lst) {
node_t *curr = tkn_lst->first;
node_t *prev = NULL;
while (curr) {
Token_t *t = (Token_t *)curr->data;
// Only process RAW chunks; leave TOK_STRING nodes alone!
if (t->ctx == TOK_RAW) {
char *span = NULL;
// Note: We use a copy because strtok modifies the string
char *to_split = _strdup(t->data);
char *tok = strtok_r(to_split, " \t\r\n", &span);
if (tok) {
// 1. Update the current node's data with the FIRST token found
free(t->data);
t->data = _strdup(tok);
t->size = strlen(tok);
node_t *last_inserted = curr;
tok = strtok_r(NULL, " \t\r\n", &span);
// 2. Insert NEW nodes for the rest of the tokens
while (tok) {
Token_t *new_t = calloc(1, sizeof(Token_t));
new_t->data = _strdup(tok);
new_t->size = strlen(tok);
new_t->ctx = TOK_RAW;
node_t *new_node = calloc(1, sizeof(node_t));
new_node->data = new_t;
// Insert into the list
new_node->next = last_inserted->next;
last_inserted->next = new_node;
if (tkn_lst->last == last_inserted) tkn_lst->last = new_node;
last_inserted = new_node;
tkn_lst->size++;
tok = strtok_r(NULL, " \t\r\n", &span);
}
curr = last_inserted; // Move cursor to the end of the new chain
}
free(to_split);
}
prev = curr;
curr = curr->next;
}
}
void PruneWhitespaceNodes(list_t *lst) {
node_t *curr = lst->first;
node_t *prev = NULL;
while (curr) {
Token_t *t = (Token_t *)curr->data;
if (t->ctx == TOK_RAW && IsWhitespace(t->data)) {
// Unlink and free
node_t *temp = curr;
if (prev) prev->next = curr->next;
else lst->first = curr->next;
if (lst->last == temp) lst->last = prev;
curr = curr->next;
ClearTokens(temp->data);
free(temp);
lst->size--;
} else {
prev = curr;
curr = curr->next;
}
}
}
int main(int ac, char **av) {
if (ac <= 1) {
printf("no file specified");
return (-1);
}
char* data = LoadFile(av[1]);
list_t *tkn_lst = ListInit(NULL);
//first pass on string, whitespace and comments
InitialScanner(data, tkn_lst);
//SeparateStrings(data, tkn_lst);
list_iter_t iter = ListGetIter(tkn_lst);
while (iter.current) {
printf("|%s|", ((Token_t *)iter.current->data)->data);
iter.current = iter.current->next;
}
PruneWhitespaceNodes(tkn_lst);
printf("\n___\n");
RefineRawNodes(tkn_lst);
ListReset(&iter, tkn_lst);
while (iter.current) {
printf("|%s|\n", ((Token_t *)iter.current->data)->data);
iter.current = iter.current->next;
}
//pass on ";(){}[]$%&*$#@!?:,.<>|_-+=~`"
//give each token a context
//let's replace preprocessor (include, define, etc)
//let's do recursive parsing everywhere that need it
//compile time reflection (@comptime or @reflect)
//metaprogramming logic annotation if i do it lastly** may not be
ListFree(tkn_lst, ClearTokens);
free(data);
return(0);
}
//test