Yay git add .! parser seems to work fine, need to implemente an hash map
This commit is contained in:
parent
76ea84b0be
commit
cebb63912c
@ -15,39 +15,30 @@
|
||||
# endif
|
||||
|
||||
typedef enum {
|
||||
TOK_NONE,
|
||||
TOK_STRING,
|
||||
TOK_RAW,
|
||||
TOK_PREPROCESSOR,
|
||||
} TKN_CTX;
|
||||
TOK_NONE = 1 << 0,
|
||||
TOK_RAW = 1 << 1,
|
||||
TOK_STRING = 1 << 2,
|
||||
TOK_OP = 1 << 3,
|
||||
TOK_PREPROC = 1 << 4,
|
||||
TOK_COMMENT = 1 << 5,
|
||||
TOK_KEY = 1 << 6,
|
||||
TOK_ID = 1 << 7 // New: For variable/function names
|
||||
} TKN_CTX;
|
||||
|
||||
typedef struct Token_s {
|
||||
int size;
|
||||
size_t size;
|
||||
TKN_CTX ctx;
|
||||
char *data;
|
||||
} Token_t;
|
||||
|
||||
//builtin type: if x86_64, then since i use simd, should align them
|
||||
typedef struct {
|
||||
char *op;
|
||||
size_t len;
|
||||
} MultiOp;
|
||||
|
||||
typedef struct vec2 {
|
||||
float x;
|
||||
float y;
|
||||
float _padding1;
|
||||
float _padding2;
|
||||
} __attribute__((aligned(16)));//losing 8 byte
|
||||
|
||||
typedef struct vec3 {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float _padding;
|
||||
} __attribute__((aligned(16)));//losing 4 byte
|
||||
|
||||
typedef struct vec4 {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float w;
|
||||
} __attribute__((aligned(16)));
|
||||
typedef struct {
|
||||
const char *name;
|
||||
TKN_CTX ctx;
|
||||
} KeywordEntry;
|
||||
|
||||
#endif
|
||||
|
||||
BIN
obj/main.o
Normal file
BIN
obj/main.o
Normal file
Binary file not shown.
319
source/bckp_data.bsdok
Normal file
319
source/bckp_data.bsdok
Normal file
@ -0,0 +1,319 @@
|
||||
#ifdef noneafjodsjf
|
||||
# define
|
||||
|
||||
// Clean up the node creation to be more "C-Style"
|
||||
node_t* NewNode(void* data) {
|
||||
node_t* n = calloc(1, sizeof(node_t));
|
||||
if(n) n->data = data;
|
||||
return n;
|
||||
}
|
||||
|
||||
// Optimization: Use a specialized Token creation function
|
||||
Token_t* NewToken(const char* start, size_t len, TKN_CTX ctx) {
|
||||
Token_t* t = malloc(sizeof(Token_t));
|
||||
t->data = malloc(len + 1);
|
||||
memcpy(t->data, start, len);
|
||||
((char*)t->data)[len] = '\0';
|
||||
t->size = len;
|
||||
t->ctx = ctx;
|
||||
return t;
|
||||
}
|
||||
|
||||
void MunchTokens(list_t *lst) {
|
||||
node_t *curr = lst->first;
|
||||
|
||||
while (curr && curr->next) {
|
||||
Token_t *t1 = (Token_t *)curr->data;
|
||||
Token_t *t2 = (Token_t *)curr->next->data;
|
||||
|
||||
// Only munch RAW tokens that are single characters
|
||||
if (t1->ctx == TOK_RAW && t2->ctx == TOK_RAW && t1->size == 1 && t2->size == 1) {
|
||||
char pair[3] = { ((char*)t1->data)[0], ((char*)t2->data)[0], '\0' };
|
||||
bool matched = false;
|
||||
|
||||
for (int i = 0; MUNCH_TABLE[i].op != NULL; i++) {
|
||||
if (strcmp(pair, MUNCH_TABLE[i].op) == 0) {
|
||||
// 1. Update T1 to the new string
|
||||
char *new_data = malloc(3);
|
||||
memcpy(new_data, pair, 3);
|
||||
free(t1->data);
|
||||
t1->data = new_data;
|
||||
t1->size = 2;
|
||||
|
||||
// 2. Remove T2 from the list
|
||||
node_t *node_to_remove = curr->next;
|
||||
curr->next = node_to_remove->next;
|
||||
|
||||
if (lst->last == node_to_remove) lst->last = curr;
|
||||
|
||||
// 3. Free T2 memory
|
||||
ClearTokens(node_to_remove->data);
|
||||
free(node_to_remove);
|
||||
lst->size--;
|
||||
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we matched "++", curr now contains "++".
|
||||
// We DON'T move to next yet, in case there's a 3rd char (like ">>=")
|
||||
if (matched) continue;
|
||||
}
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
|
||||
void RefineSymbols(list_t *tkn_lst) {
|
||||
node_t *curr = tkn_lst->first;
|
||||
|
||||
while (curr) {
|
||||
Token_t *t = (Token_t *)curr->data;
|
||||
|
||||
// Skip strings and skip nodes that are JUST a single symbol already
|
||||
if (t->ctx != TOK_RAW || (t->size == 1 && strchr(SYMBOLS, ((char*)t->data)[0]))) {
|
||||
curr = curr->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find the first symbol in this string
|
||||
size_t pos = strcspn(t->data, SYMBOLS);
|
||||
|
||||
if (pos < t->size) {
|
||||
// We found a symbol! Now we split.
|
||||
// Case 1: Symbol is NOT at the very start (there is a prefix)
|
||||
if (pos > 0) {
|
||||
// Split the token into [prefix] and [symbol + suffix]
|
||||
// We reuse the ListSplitToken logic we discussed earlier
|
||||
ListSplitToken(tkn_lst, curr, pos);
|
||||
// After splitting, curr is now just the prefix.
|
||||
// We move to curr->next to handle the symbol.
|
||||
curr = curr->next;
|
||||
}
|
||||
// Case 2: Symbol is at the start (pos == 0)
|
||||
else {
|
||||
// Split the token into [1-char symbol] and [suffix]
|
||||
ListSplitToken(tkn_lst, curr, 1);
|
||||
// The current node is now the 1-char symbol.
|
||||
// We move to the next node to see if the suffix has more symbols.
|
||||
curr = curr->next;
|
||||
}
|
||||
} else {
|
||||
// No symbols found in this node, move to the next node in the list
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SeparateStrings(char *data, list_t *tkn_lst) {
|
||||
char *curr = data;
|
||||
char *start = data;
|
||||
|
||||
while (*curr != '\0') {
|
||||
if (*curr == '\"' || *curr == '\'') {
|
||||
char quote_type = *curr;
|
||||
|
||||
if (curr > start) {
|
||||
size_t raw_len = curr - start;
|
||||
Token_t *raw = calloc(1, sizeof(Token_t));
|
||||
raw->data = strndup(start, raw_len);//strndup is C99/POSIX
|
||||
raw->size = raw_len;
|
||||
raw->ctx = TOK_RAW;
|
||||
ListPushBack(tkn_lst, raw);
|
||||
}
|
||||
|
||||
char *str_start = curr;
|
||||
curr++; // Skip opening quote
|
||||
while (*curr != '\0' && *curr != quote_type) {
|
||||
if (*curr == '\\') curr++; // Skip escaped characters like \"
|
||||
curr++;
|
||||
}
|
||||
if (*curr == quote_type) curr++; // Include closing quote
|
||||
|
||||
size_t str_len = curr - str_start;
|
||||
Token_t *str_tok = calloc(1, sizeof(Token_t));
|
||||
str_tok->data = strndup(str_start, str_len);
|
||||
str_tok->size = str_len;
|
||||
str_tok->ctx = TOK_STRING;
|
||||
ListPushBack(tkn_lst, str_tok);
|
||||
|
||||
start = curr;
|
||||
} else {
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
|
||||
if (curr > start) {
|
||||
size_t last_len = curr - start;
|
||||
Token_t *last = calloc(1, sizeof(Token_t));
|
||||
last->data = strndup(start, last_len);
|
||||
last->size = last_len;
|
||||
last->ctx = TOK_RAW;
|
||||
ListPushBack(tkn_lst, last);
|
||||
}
|
||||
}
|
||||
|
||||
void InitialScanner(char *data, list_t *tkn_lst) {
|
||||
char *curr = data;
|
||||
char *start = data;
|
||||
|
||||
while (*curr != '\0') {
|
||||
if (*curr == '\"' || *curr == '\'') {
|
||||
PushRaw(start, curr, tkn_lst);
|
||||
char quote = *curr;
|
||||
char *str_start = curr++;
|
||||
while (*curr && *curr != quote) {
|
||||
if (*curr == '\\' && *(curr + 1)) curr++;
|
||||
curr++;
|
||||
}
|
||||
if (*curr) curr++;
|
||||
PushToken(str_start, curr, TOK_STRING, tkn_lst);
|
||||
start = curr;
|
||||
}
|
||||
// 2. Handle Comments
|
||||
else if (*curr == '/' && (*(curr + 1) == '/' || *(curr + 1) == '*')) {
|
||||
PushRaw(start, curr, tkn_lst);
|
||||
|
||||
if (*(curr + 1) == '/') { // Single line //
|
||||
while (*curr && *curr != '\n') curr++;
|
||||
curr++;//for skipping the \n
|
||||
} else { // Multi-line /*
|
||||
curr += 2;
|
||||
while (*curr && !(*curr == '*' && *(curr + 1) == '/')) curr++;
|
||||
if (*curr) curr += 2; // Move past */
|
||||
}
|
||||
// We DON'T push a token here because we want to ignore comments.
|
||||
// If you want to keep them (for a doc-generator), push a TOK_COMMENT.
|
||||
//PushToken(start, curr, TOK_COMMENT, tkn_lst);
|
||||
start = curr;
|
||||
} else {
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
PushRaw(start, curr, tkn_lst);
|
||||
}
|
||||
|
||||
void RefineRawNodes(list_t *tkn_lst) {
|
||||
node_t *curr = tkn_lst->first;
|
||||
//node_t *prev = NULL;
|
||||
|
||||
while (curr) {
|
||||
Token_t *t = (Token_t *)curr->data;
|
||||
if (t->ctx == TOK_RAW) {
|
||||
char *span = NULL;
|
||||
char *to_split = strndup(t->data, t->size);
|
||||
char *tok = strtok_r(to_split, " \t\r\n", &span);
|
||||
|
||||
if (tok) {
|
||||
free(t->data);
|
||||
t->size = strlen(tok);
|
||||
t->data = strndup(tok, t->size);
|
||||
|
||||
node_t *last_inserted = curr;
|
||||
tok = strtok_r(NULL, " \t\r\n", &span);
|
||||
|
||||
while (tok) {
|
||||
Token_t *new_t = calloc(1, sizeof(Token_t));
|
||||
new_t->size = strlen(tok);
|
||||
new_t->data = strndup(tok, new_t->size);
|
||||
new_t->ctx = TOK_RAW;
|
||||
|
||||
node_t *new_node = calloc(1, sizeof(node_t));
|
||||
new_node->data = new_t;
|
||||
|
||||
new_node->next = last_inserted->next;
|
||||
last_inserted->next = new_node;
|
||||
|
||||
if (tkn_lst->last == last_inserted) tkn_lst->last = new_node;
|
||||
|
||||
last_inserted = new_node;
|
||||
tkn_lst->size++;
|
||||
tok = strtok_r(NULL, " \t\r\n", &span);
|
||||
}
|
||||
curr = last_inserted;
|
||||
}
|
||||
free(to_split);
|
||||
}
|
||||
//prev = curr;
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
|
||||
void PruneWhitespaceNodes(list_t *lst) {
|
||||
node_t *curr = lst->first;
|
||||
node_t *prev = NULL;
|
||||
|
||||
while (curr) {
|
||||
Token_t *t = (Token_t *)curr->data;
|
||||
if (t->ctx == TOK_RAW && IsWhitespace(t->data)) {
|
||||
// Unlink and free
|
||||
node_t *temp = curr;
|
||||
if (prev) prev->next = curr->next;
|
||||
else lst->first = curr->next;
|
||||
|
||||
if (lst->last == temp) lst->last = prev;
|
||||
|
||||
curr = curr->next;
|
||||
ClearTokens(temp->data);
|
||||
free(temp);
|
||||
lst->size--;
|
||||
} else {
|
||||
prev = curr;
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ListSplitToken(list_t *lst, node_t *node, size_t index) {
|
||||
Token_t *old_t = (Token_t *)node->data;
|
||||
|
||||
// 1. Create Suffix Data
|
||||
size_t suffix_len = old_t->size - index;
|
||||
char *suffix_data = malloc(suffix_len + 1);
|
||||
memcpy(suffix_data, (char*)old_t->data + index, suffix_len);
|
||||
suffix_data[suffix_len] = '\0';
|
||||
|
||||
// 2. Truncate Prefix Data
|
||||
char *prefix_data = malloc(index + 1);
|
||||
memcpy(prefix_data, old_t->data, index);
|
||||
prefix_data[index] = '\0';
|
||||
|
||||
free(old_t->data);
|
||||
old_t->data = prefix_data;
|
||||
old_t->size = index;
|
||||
|
||||
// 3. Create New Node for Suffix
|
||||
Token_t *new_t = calloc(1, sizeof(Token_t));
|
||||
new_t->data = suffix_data;
|
||||
new_t->size = suffix_len;
|
||||
new_t->ctx = TOK_RAW;
|
||||
|
||||
node_t *new_node = calloc(1, sizeof(node_t));
|
||||
new_node->data = new_t;
|
||||
new_node->next = node->next;
|
||||
|
||||
// 4. Update List
|
||||
node->next = new_node;
|
||||
if (lst->last == node) lst->last = new_node;
|
||||
lst->size++;
|
||||
}
|
||||
|
||||
|
||||
//// Helper to create and link a new token
|
||||
void PushToken(char *start, char *end, TKN_CTX ctx, list_t *lst) {
|
||||
size_t len = end - start;
|
||||
Token_t *t = calloc(1, sizeof(Token_t));
|
||||
t->data = malloc(len + 1);
|
||||
memcpy(t->data, start, len);
|
||||
((char*)t->data)[len] = '\0';
|
||||
t->size = len;
|
||||
t->ctx = ctx;
|
||||
ListPushBack(lst, t);
|
||||
}
|
||||
|
||||
//// Helper to push code that still needs to be refined
|
||||
void PushRaw(char *start, char *end, list_t *lst) {
|
||||
if (end <= start) return;
|
||||
PushToken(start, end, TOK_RAW, lst);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -62,7 +62,7 @@ void ListInsert(list_t *lst, size_t idx, void *data) {
|
||||
lst->first = node;
|
||||
} else {
|
||||
node_t *prev = lst->first;
|
||||
for (int i = 0; i < idx - 1 && prev->next; i++) {
|
||||
for (size_t i = 0; i < idx - 1 && prev->next; i++) {
|
||||
prev = prev->next;
|
||||
}
|
||||
node->next = prev->next;
|
||||
|
||||
358
source/main.c
358
source/main.c
@ -3,6 +3,93 @@
|
||||
|
||||
#include "../include/SterlingCompiler.h"
|
||||
|
||||
const char *SYMBOLS = ";(){}[]$%&*#@!?:,.<>|-+=~`^";
|
||||
|
||||
// Common C operators (Order matters: put longer ones first if you add 3-char ops)
|
||||
MultiOp MUNCH_TABLE[] = {
|
||||
{"<<=", 3}, {">>=", 3},
|
||||
{"==", 2}, {"!=", 2}, {"<=", 2}, {">=", 2},
|
||||
{"++", 2}, {"--", 2}, {"->", 2}, {"+=", 2},
|
||||
{"-=", 2}, {"*=", 2}, {"/=", 2}, {"&&", 2}, {"||", 2},
|
||||
{"^=", 2}, {"<<", 2}, {">>", 2}, {"|=", 2}, {"&=", 2},
|
||||
{NULL, 0}
|
||||
};
|
||||
|
||||
// This can be expanded at runtime if you use a dynamic array instead of a static one
|
||||
KeywordEntry KEYWORD_TABLE[] = {
|
||||
{"if", TOK_KEY},
|
||||
{"else", TOK_KEY},
|
||||
{"while", TOK_KEY},
|
||||
{"return", TOK_KEY},
|
||||
{"int", TOK_KEY},
|
||||
{"float", TOK_KEY},
|
||||
{"void", TOK_KEY},
|
||||
{"include", TOK_PREPROC},
|
||||
{"define", TOK_PREPROC},
|
||||
{"@comptime",TOK_KEY}, // Your custom identifier
|
||||
{NULL, TOK_NONE}
|
||||
};
|
||||
|
||||
# ifndef strndup
|
||||
char *strndup(const char *s, size_t n) {
|
||||
char *str = calloc(n + 1, sizeof(char));
|
||||
memcpy(str, s, n);
|
||||
return (str);
|
||||
}
|
||||
# endif
|
||||
|
||||
|
||||
bool IsWhitespace(const char *s) {
|
||||
while (*s) {
|
||||
if (!isspace((unsigned char)*s)) return false;
|
||||
s++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ClearTokens(void*arg) {
|
||||
Token_t *tok = arg;
|
||||
free(tok->data);
|
||||
free(tok);
|
||||
}
|
||||
|
||||
node_t* NewNode(void* data) {
|
||||
node_t* n = calloc(1, sizeof(node_t));
|
||||
if(n) n->data = data;
|
||||
return n;
|
||||
}
|
||||
|
||||
Token_t* NewToken(const char* start, size_t len, TKN_CTX ctx) {
|
||||
Token_t* t = malloc(sizeof(Token_t));
|
||||
t->data = strndup(start, len);
|
||||
t->size = len;
|
||||
t->ctx = ctx;
|
||||
return t;
|
||||
}
|
||||
|
||||
void PushToken(list_t *lst, const char *start, const char *end, TKN_CTX ctx) {
|
||||
if (end <= start) return;
|
||||
ListPushBack(lst, NewToken(start, end - start, ctx));
|
||||
}
|
||||
|
||||
void ListSplitToken(list_t *lst, node_t *node, size_t index) {
|
||||
Token_t *t = (Token_t *)node->data;
|
||||
|
||||
// Create the suffix node first
|
||||
Token_t *suffix = NewToken(t->data + index, t->size - index, TOK_RAW);
|
||||
node_t *new_node = NewNode(suffix);
|
||||
new_node->next = node->next;
|
||||
node->next = new_node;
|
||||
if (lst->last == node) lst->last = new_node;
|
||||
lst->size++;
|
||||
|
||||
// Truncate the original (prefix)
|
||||
char *new_prefix = strndup(t->data, index);
|
||||
free(t->data);
|
||||
t->data = new_prefix;
|
||||
t->size = index;
|
||||
}
|
||||
|
||||
char *LoadFile(const char *filename) {
|
||||
FILE *file = NULL;
|
||||
char *data = NULL;
|
||||
@ -19,175 +106,113 @@ char *LoadFile(const char *filename) {
|
||||
return (data);
|
||||
}
|
||||
|
||||
# ifndef strndup
|
||||
char *strndup(const char *s, size_t n) {
|
||||
char *str = calloc(n + 1, sizeof(char));
|
||||
memcpy(str, s, n);
|
||||
return (str);
|
||||
}
|
||||
# endif
|
||||
|
||||
bool IsWhitespace(const char *s) {
|
||||
while (*s) {
|
||||
if (!isspace((unsigned char)*s)) return false;
|
||||
s++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ClearTokens(void*arg) {
|
||||
Token_t *tok = arg;
|
||||
free(tok->data);
|
||||
free(tok);
|
||||
}
|
||||
|
||||
// Helper to create and link a new token
|
||||
void PushToken(char *start, char *end, TKN_CTX ctx, list_t *lst) {
|
||||
size_t len = end - start;
|
||||
Token_t *t = calloc(1, sizeof(Token_t));
|
||||
t->data = malloc(len + 1);
|
||||
memcpy(t->data, start, len);
|
||||
((char*)t->data)[len] = '\0';
|
||||
t->size = len;
|
||||
t->ctx = ctx;
|
||||
ListPushBack(lst, t);
|
||||
}
|
||||
|
||||
// Helper to push code that still needs to be refined
|
||||
void PushRaw(char *start, char *end, list_t *lst) {
|
||||
if (end <= start) return;
|
||||
PushToken(start, end, TOK_RAW, lst);
|
||||
}
|
||||
|
||||
void SeparateStrings(char *data, list_t *tkn_lst) {
|
||||
char *curr = data;
|
||||
char *start = data;
|
||||
|
||||
while (*curr != '\0') {
|
||||
// If we find a quote, we need to "package" the raw code before it,
|
||||
// then package the string itself.
|
||||
if (*curr == '\"' || *curr == '\'') {
|
||||
char quote_type = *curr;
|
||||
|
||||
// 1. Save the "Raw" chunk before the string
|
||||
if (curr > start) {
|
||||
size_t raw_len = curr - start;
|
||||
Token_t *raw = calloc(1, sizeof(Token_t));
|
||||
raw->data = strndup(start, raw_len); // strndup is C99/POSIX
|
||||
raw->size = raw_len;
|
||||
raw->ctx = TOK_RAW;
|
||||
ListPushBack(tkn_lst, raw);
|
||||
}
|
||||
|
||||
// 2. Find the end of the string
|
||||
char *str_start = curr;
|
||||
curr++; // Skip opening quote
|
||||
while (*curr != '\0' && *curr != quote_type) {
|
||||
if (*curr == '\\') curr++; // Skip escaped characters like \"
|
||||
curr++;
|
||||
}
|
||||
if (*curr == quote_type) curr++; // Include closing quote
|
||||
|
||||
// 3. Save the String Token
|
||||
size_t str_len = curr - str_start;
|
||||
Token_t *str_tok = calloc(1, sizeof(Token_t));
|
||||
str_tok->data = strndup(str_start, str_len);
|
||||
str_tok->size = str_len;
|
||||
str_tok->ctx = TOK_STRING;
|
||||
ListPushBack(tkn_lst, str_tok);
|
||||
|
||||
start = curr; // Reset start to the character after the string
|
||||
} else {
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Catch any remaining raw code after the last string
|
||||
if (curr > start) {
|
||||
size_t last_len = curr - start;
|
||||
Token_t *last = calloc(1, sizeof(Token_t));
|
||||
last->data = strndup(start, last_len);
|
||||
last->size = last_len;
|
||||
last->ctx = TOK_RAW;
|
||||
ListPushBack(tkn_lst, last);
|
||||
}
|
||||
}
|
||||
|
||||
void InitialScanner(char *data, list_t *tkn_lst) {
|
||||
char *curr = data;
|
||||
char *start = data;
|
||||
char *curr = data, *start = data;
|
||||
|
||||
while (*curr != '\0') {
|
||||
// 1. Handle Strings
|
||||
while (*curr) {
|
||||
// Handle Strings
|
||||
if (*curr == '\"' || *curr == '\'') {
|
||||
PushRaw(start, curr, tkn_lst); // Save code before string
|
||||
char quote = *curr;
|
||||
char *str_start = curr++;
|
||||
while (*curr && *curr != quote) {
|
||||
if (*curr == '\\' && *(curr + 1)) curr++; // Skip escaped char
|
||||
curr++;
|
||||
}
|
||||
if (*curr) curr++;
|
||||
PushToken(str_start, curr, TOK_STRING, tkn_lst);
|
||||
PushToken(tkn_lst, start, curr, TOK_RAW);
|
||||
char *s_start = curr++, q = *curr;
|
||||
while (*curr && *curr != q) { if (*curr == '\\') curr++; curr++; }
|
||||
if (*curr) curr++;
|
||||
PushToken(tkn_lst, s_start, curr, TOK_STRING);
|
||||
start = curr;
|
||||
}
|
||||
// 2. Handle Comments
|
||||
else if (*curr == '/' && (*(curr + 1) == '/' || *(curr + 1) == '*')) {
|
||||
PushRaw(start, curr, tkn_lst); // Save code before comment
|
||||
|
||||
if (*(curr + 1) == '/') { // Single line //
|
||||
while (*curr && *curr != '\n') curr++;
|
||||
curr++;//for skipping the \n
|
||||
} else { // Multi-line /*
|
||||
curr += 2;
|
||||
while (*curr && !(*curr == '*' && *(curr + 1) == '/')) curr++;
|
||||
if (*curr) curr += 2; // Move past */
|
||||
// Handle Comments
|
||||
else if (*curr == '/' && (curr[1] == '/' || curr[1] == '*')) {
|
||||
PushToken(tkn_lst, start, curr, TOK_RAW);
|
||||
if (curr[1] == '/') { while (*curr && *curr != '\n') curr++; }
|
||||
else {
|
||||
curr += 2;
|
||||
while (*curr && !(*curr == '*' && curr[1] == '/')) curr++;
|
||||
if (*curr) curr += 2;
|
||||
}
|
||||
// We DON'T push a token here because we want to ignore comments.
|
||||
// If you want to keep them (for a doc-generator), push a TOK_COMMENT.
|
||||
start = curr;
|
||||
}
|
||||
else curr++;
|
||||
}
|
||||
PushToken(tkn_lst, start, curr, TOK_RAW);
|
||||
}
|
||||
|
||||
void RefineSymbols(list_t *tkn_lst) {
|
||||
for (node_t *curr = tkn_lst->first; curr; ) {
|
||||
Token_t *t = curr->data;
|
||||
if (t->ctx != TOK_RAW || (t->size == 1 && strchr(SYMBOLS, t->data[0]))) {
|
||||
curr = curr->next;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
curr++;
|
||||
|
||||
size_t pos = strcspn(t->data, SYMBOLS);
|
||||
if (pos < t->size) {
|
||||
ListSplitToken(tkn_lst, curr, (pos == 0) ? 1 : pos);
|
||||
// Don't move curr yet, we might have more symbols in the suffix
|
||||
} else {
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
PushRaw(start, curr, tkn_lst); // Catch the tail
|
||||
}
|
||||
|
||||
void MunchTokens(list_t *lst) {
|
||||
for (node_t *n = lst->first; n && n->next; ) {
|
||||
Token_t *t1 = n->data, *t2 = n->next->data;
|
||||
|
||||
if (t1->ctx == TOK_RAW && t2->ctx == TOK_RAW && t1->size == 1 && t2->size == 1) {
|
||||
char op[3] = { t1->data[0], t2->data[0], '\0' };
|
||||
bool match = false;
|
||||
for (int i = 0; MUNCH_TABLE[i].op; i++) {
|
||||
if (strcmp(op, MUNCH_TABLE[i].op) == 0) { match = true; break; }
|
||||
}
|
||||
|
||||
if (match) {
|
||||
free(t1->data);
|
||||
t1->data = strndup(op, 2);
|
||||
t1->size = 2;
|
||||
t1->ctx = TOK_OP; // Upgrade to Operator context
|
||||
|
||||
node_t *tmp = n->next;
|
||||
n->next = tmp->next;
|
||||
if (lst->last == tmp) lst->last = n;
|
||||
ClearTokens(tmp->data);
|
||||
free(tmp);
|
||||
lst->size--;
|
||||
continue; // Check if the next char can be munched too (e.g. >>=)
|
||||
}
|
||||
}
|
||||
n = n->next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RefineRawNodes(list_t *tkn_lst) {
|
||||
node_t *curr = tkn_lst->first;
|
||||
node_t *prev = NULL;
|
||||
//node_t *prev = NULL;
|
||||
|
||||
while (curr) {
|
||||
Token_t *t = (Token_t *)curr->data;
|
||||
|
||||
// Only process RAW chunks; leave TOK_STRING nodes alone!
|
||||
if (t->ctx == TOK_RAW) {
|
||||
char *span = NULL;
|
||||
// Note: We use a copy because strtok modifies the string
|
||||
char *to_split = _strdup(t->data);
|
||||
char *to_split = strndup(t->data, t->size);
|
||||
char *tok = strtok_r(to_split, " \t\r\n", &span);
|
||||
|
||||
if (tok) {
|
||||
// 1. Update the current node's data with the FIRST token found
|
||||
free(t->data);
|
||||
t->data = _strdup(tok);
|
||||
t->size = strlen(tok);
|
||||
t->data = strndup(tok, t->size);
|
||||
|
||||
node_t *last_inserted = curr;
|
||||
tok = strtok_r(NULL, " \t\r\n", &span);
|
||||
|
||||
// 2. Insert NEW nodes for the rest of the tokens
|
||||
while (tok) {
|
||||
Token_t *new_t = calloc(1, sizeof(Token_t));
|
||||
new_t->data = _strdup(tok);
|
||||
new_t->size = strlen(tok);
|
||||
new_t->data = strndup(tok, new_t->size);
|
||||
new_t->ctx = TOK_RAW;
|
||||
|
||||
node_t *new_node = calloc(1, sizeof(node_t));
|
||||
new_node->data = new_t;
|
||||
|
||||
// Insert into the list
|
||||
new_node->next = last_inserted->next;
|
||||
last_inserted->next = new_node;
|
||||
|
||||
@ -197,12 +222,11 @@ void RefineRawNodes(list_t *tkn_lst) {
|
||||
tkn_lst->size++;
|
||||
tok = strtok_r(NULL, " \t\r\n", &span);
|
||||
}
|
||||
curr = last_inserted; // Move cursor to the end of the new chain
|
||||
curr = last_inserted;
|
||||
}
|
||||
free(to_split);
|
||||
}
|
||||
|
||||
prev = curr;
|
||||
//prev = curr;
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
@ -232,32 +256,60 @@ void PruneWhitespaceNodes(list_t *lst) {
|
||||
}
|
||||
}
|
||||
|
||||
void IdentifyTokens(list_t *lst) {
|
||||
for (node_t *curr = lst->first; curr; curr = curr->next) {
|
||||
Token_t *t = (Token_t *)curr->data;
|
||||
|
||||
if (t->ctx != TOK_RAW) continue;
|
||||
|
||||
bool found = false;
|
||||
// 1. Check against Keyword Registry
|
||||
for (int i = 0; KEYWORD_TABLE[i].name != NULL; i++) {
|
||||
if (strcmp(t->data, KEYWORD_TABLE[i].name) == 0) {
|
||||
t->ctx = KEYWORD_TABLE[i].ctx;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 2. If not a keyword, is it a valid Identifier? (e.g., my_var_1)
|
||||
if (!found && t->size > 0) {
|
||||
if (isalpha(t->data[0]) || t->data[0] == '_' || t->data[0] == '@') {
|
||||
t->ctx = TOK_ID;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// Modular function to register new identifiers
|
||||
void RegisterIdentifier(const char *name, TKN_CTX type) {
|
||||
// In a professional compiler, you'd insert this into a Hash Map.
|
||||
// For now, it's enough to know this is where user-defined types go.
|
||||
}
|
||||
*/
|
||||
|
||||
int main(int ac, char **av) {
|
||||
if (ac <= 1) {
|
||||
printf("no file specified");
|
||||
return (-1);
|
||||
}
|
||||
if (ac <= 1) return printf("No file specified\n"), -1;
|
||||
char* data = LoadFile(av[1]);
|
||||
list_t *tkn_lst = ListInit(NULL);
|
||||
//first pass on string, whitespace and comments
|
||||
|
||||
InitialScanner(data, tkn_lst);
|
||||
//SeparateStrings(data, tkn_lst);
|
||||
list_iter_t iter = ListGetIter(tkn_lst);
|
||||
while (iter.current) {
|
||||
printf("|%s|", ((Token_t *)iter.current->data)->data);
|
||||
iter.current = iter.current->next;
|
||||
}
|
||||
PruneWhitespaceNodes(tkn_lst);
|
||||
printf("\n___\n");
|
||||
RefineRawNodes(tkn_lst);
|
||||
ListReset(&iter, tkn_lst);
|
||||
while (iter.current) {
|
||||
printf("|%s|\n", ((Token_t *)iter.current->data)->data);
|
||||
iter.current = iter.current->next;
|
||||
}
|
||||
RefineSymbols(tkn_lst);
|
||||
MunchTokens(tkn_lst);
|
||||
IdentifyTokens(tkn_lst);
|
||||
|
||||
list_iter_t iter = ListGetIter(tkn_lst);
|
||||
while (iter.current) {
|
||||
Token_t *t = (Token_t *)iter.current->data;
|
||||
printf("[%02X] %-10s | %s\n", t->ctx,
|
||||
(t->ctx == TOK_ID ? "IDENTIFIER" : "TOKEN"), t->data);
|
||||
iter.current = iter.current->next;
|
||||
}
|
||||
//pass on ";(){}[]$%&*$#@!?:,.<>|_-+=~`"
|
||||
//give each token a context
|
||||
//and give each token a context
|
||||
//let's replace preprocessor (include, define, etc)
|
||||
//let's do recursive parsing everywhere that need it
|
||||
//compile time reflection (@comptime or @reflect)
|
||||
|
||||
23
source/type.h
Normal file
23
source/type.h
Normal file
@ -0,0 +1,23 @@
|
||||
|
||||
//builtin type: if x86_64, then since i use simd, should align them
|
||||
|
||||
//typedef struct vec2 {
|
||||
// float x;
|
||||
// float y;
|
||||
// float _padding1;
|
||||
// float _padding2;
|
||||
//} __attribute__((aligned(16)));//losing 8 byte
|
||||
|
||||
//typedef struct vec3 {
|
||||
// float x;
|
||||
// float y;
|
||||
// float z;
|
||||
// float _padding;
|
||||
//} __attribute__((aligned(16)));//losing 4 byte
|
||||
|
||||
//typedef struct vec4 {
|
||||
// float x;
|
||||
// float y;
|
||||
// float z;
|
||||
// float w;
|
||||
//} __attribute__((aligned(16)));
|
||||
Loading…
x
Reference in New Issue
Block a user