fixed issue with single char op, complex, num, and other fun things
This commit is contained in:
parent
cebb63912c
commit
378cae31a1
2
Makefile
2
Makefile
@ -5,7 +5,7 @@ SRC := $(wildcard source/*.c)
|
|||||||
OBJ := $(SRC:source/%.c=obj/%.o)
|
OBJ := $(SRC:source/%.c=obj/%.o)
|
||||||
|
|
||||||
CC := gcc
|
CC := gcc
|
||||||
CFLAG := -ggdb -Wall -Wextra -Werror -Wpedantic -I include -O0 -std=c99
|
CFLAG := -ggdb -Wall -Wextra -Werror -Wpedantic -I include -O0 -std=c99 -fsignaling-nans
|
||||||
LFLAG :=
|
LFLAG :=
|
||||||
|
|
||||||
all: $(NAME)
|
all: $(NAME)
|
||||||
|
|||||||
@ -17,12 +17,14 @@
|
|||||||
typedef enum {
|
typedef enum {
|
||||||
TOK_NONE = 1 << 0,
|
TOK_NONE = 1 << 0,
|
||||||
TOK_RAW = 1 << 1,
|
TOK_RAW = 1 << 1,
|
||||||
TOK_STRING = 1 << 2,
|
TOK_STRING = 1 << 2,//"asdfasf"; L"WideString"
|
||||||
TOK_OP = 1 << 3,
|
TOK_LITERAL = 1 << 3,//INT: 42, 0xff, 0777, 100L; FLOAT:3.14, 1e-5, 2.0f; ENUM; char CONST 'a' '\n' L'x'
|
||||||
TOK_PREPROC = 1 << 4,
|
TOK_OP = 1 << 4,
|
||||||
TOK_COMMENT = 1 << 5,
|
TOK_PREPROC = 1 << 5,
|
||||||
TOK_KEY = 1 << 6,
|
TOK_COMMENT = 1 << 6,
|
||||||
TOK_ID = 1 << 7 // New: For variable/function names
|
TOK_KEY = 1 << 7,
|
||||||
|
TOK_ID = 1 << 8,
|
||||||
|
TOK_NUM = 1 << 9
|
||||||
} TKN_CTX;
|
} TKN_CTX;
|
||||||
|
|
||||||
typedef struct Token_s {
|
typedef struct Token_s {
|
||||||
@ -41,4 +43,39 @@ typedef struct {
|
|||||||
TKN_CTX ctx;
|
TKN_CTX ctx;
|
||||||
} KeywordEntry;
|
} KeywordEntry;
|
||||||
|
|
||||||
|
|
||||||
|
const char *SYMBOLS = ";(){}[]$%&*#@!?:,.<>|-+=~`^";
|
||||||
|
|
||||||
|
// Common C operators (Order matters: put longer ones first if you add 3-char ops)
|
||||||
|
static const MultiOp MUNCH_TABLE[] = {
|
||||||
|
{"%:%:", 4},
|
||||||
|
{"<<=", 3}, {">>=", 3}, {"...", 3},
|
||||||
|
{"\?\?=", 3}, {"\?\?/", 3}, {"\?\?'", 3}, {"\?\?(", 3},
|
||||||
|
{"\?\?)", 3}, {"\?\?!", 3}, {"\?\?<", 3},
|
||||||
|
{"\?\?>", 3}, {"\?\?-", 3}, //trigraph
|
||||||
|
{"==", 2}, {"!=", 2}, {"<=", 2}, {">=", 2}, {"##", 2},
|
||||||
|
{"++", 2}, {"--", 2}, {"->", 2}, {"+=", 2}, {"%=",2},
|
||||||
|
{"-=", 2}, {"*=", 2}, {"/=", 2}, {"&&", 2}, {"||", 2},
|
||||||
|
{"^=", 2}, {"<<", 2}, {">>", 2}, {"|=", 2}, {"&=", 2},//
|
||||||
|
{"<:", 2}, {":>", 2}, {"<%", 2}, {"%>", 2}, {"%:", 2},//digraphs
|
||||||
|
{NULL, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
// This can be expanded at runtime if you use a dynamic array instead of a static one
|
||||||
|
static const KeywordEntry KEYWORD_TABLE[] = {
|
||||||
|
{"if", TOK_KEY},
|
||||||
|
{"else", TOK_KEY},
|
||||||
|
{"while", TOK_KEY},
|
||||||
|
{"return", TOK_KEY},
|
||||||
|
{"var", TOK_KEY},
|
||||||
|
{"int", TOK_KEY},
|
||||||
|
{"float", TOK_KEY},
|
||||||
|
{"void", TOK_KEY},
|
||||||
|
{"include", TOK_PREPROC},
|
||||||
|
{"define", TOK_PREPROC},
|
||||||
|
{"comptime",TOK_KEY},
|
||||||
|
{"reflect", TOK_KEY},
|
||||||
|
{NULL, TOK_NONE}
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
BIN
obj/main.o
BIN
obj/main.o
Binary file not shown.
469
source/main.c
469
source/main.c
@ -3,33 +3,6 @@
|
|||||||
|
|
||||||
#include "../include/SterlingCompiler.h"
|
#include "../include/SterlingCompiler.h"
|
||||||
|
|
||||||
const char *SYMBOLS = ";(){}[]$%&*#@!?:,.<>|-+=~`^";
|
|
||||||
|
|
||||||
// Common C operators (Order matters: put longer ones first if you add 3-char ops)
|
|
||||||
MultiOp MUNCH_TABLE[] = {
|
|
||||||
{"<<=", 3}, {">>=", 3},
|
|
||||||
{"==", 2}, {"!=", 2}, {"<=", 2}, {">=", 2},
|
|
||||||
{"++", 2}, {"--", 2}, {"->", 2}, {"+=", 2},
|
|
||||||
{"-=", 2}, {"*=", 2}, {"/=", 2}, {"&&", 2}, {"||", 2},
|
|
||||||
{"^=", 2}, {"<<", 2}, {">>", 2}, {"|=", 2}, {"&=", 2},
|
|
||||||
{NULL, 0}
|
|
||||||
};
|
|
||||||
|
|
||||||
// This can be expanded at runtime if you use a dynamic array instead of a static one
|
|
||||||
KeywordEntry KEYWORD_TABLE[] = {
|
|
||||||
{"if", TOK_KEY},
|
|
||||||
{"else", TOK_KEY},
|
|
||||||
{"while", TOK_KEY},
|
|
||||||
{"return", TOK_KEY},
|
|
||||||
{"int", TOK_KEY},
|
|
||||||
{"float", TOK_KEY},
|
|
||||||
{"void", TOK_KEY},
|
|
||||||
{"include", TOK_PREPROC},
|
|
||||||
{"define", TOK_PREPROC},
|
|
||||||
{"@comptime",TOK_KEY}, // Your custom identifier
|
|
||||||
{NULL, TOK_NONE}
|
|
||||||
};
|
|
||||||
|
|
||||||
# ifndef strndup
|
# ifndef strndup
|
||||||
char *strndup(const char *s, size_t n) {
|
char *strndup(const char *s, size_t n) {
|
||||||
char *str = calloc(n + 1, sizeof(char));
|
char *str = calloc(n + 1, sizeof(char));
|
||||||
@ -38,6 +11,21 @@ char *strndup(const char *s, size_t n) {
|
|||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
char *LoadFile(const char *filename) {
|
||||||
|
FILE *file = NULL;
|
||||||
|
char *data = NULL;
|
||||||
|
file = fopen(filename, "r");
|
||||||
|
assert(file);
|
||||||
|
fseek(file, 0, SEEK_END);
|
||||||
|
long size = ftell(file);
|
||||||
|
fseek(file, 0, SEEK_SET);
|
||||||
|
data = (char *)malloc(size + 1);
|
||||||
|
assert(data);
|
||||||
|
fread(data, 1, size, file);
|
||||||
|
data[size] = 0x00;
|
||||||
|
fclose(file);
|
||||||
|
return (data);
|
||||||
|
}
|
||||||
|
|
||||||
bool IsWhitespace(const char *s) {
|
bool IsWhitespace(const char *s) {
|
||||||
while (*s) {
|
while (*s) {
|
||||||
@ -47,6 +35,90 @@ bool IsWhitespace(const char *s) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsNumeric(const char *s) {
|
||||||
|
if (!s || !*s) return false;
|
||||||
|
for (int i = 0; s[i]; i++) {
|
||||||
|
if (!isdigit((unsigned char)s[i])) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsComplexNumeric(const char *s, size_t len) {
|
||||||
|
if (len == 0) return false;
|
||||||
|
|
||||||
|
// Most numbers start with a digit
|
||||||
|
if (isdigit((unsigned char)s[0])) return true;
|
||||||
|
|
||||||
|
// Floats can start with a dot (e.g., .5)
|
||||||
|
if (s[0] == '.' && len > 1 && isdigit((unsigned char)s[1])) return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResolveTrigraphs(char *data) {
|
||||||
|
char *src = data, *dst = data;
|
||||||
|
while (*src) {
|
||||||
|
if (src[0] == '?' && src[1] == '?' && src[2]) {
|
||||||
|
char c = 0;
|
||||||
|
switch (src[2]) {
|
||||||
|
case '=': c = '#'; break; case '/': c = '\\'; break;
|
||||||
|
case '\'': c = '^'; break; case '(': c = '['; break;
|
||||||
|
case ')': c = ']'; break; case '!': c = '|'; break;
|
||||||
|
case '<': c = '{'; break; case '>': c = '}'; break;
|
||||||
|
case '-': c = '~'; break;
|
||||||
|
}
|
||||||
|
if (c) { *dst++ = c; src += 3; continue; }
|
||||||
|
}
|
||||||
|
*dst++ = *src++;
|
||||||
|
}
|
||||||
|
*dst = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
list_iter_t iter;
|
||||||
|
list_t *tokens;
|
||||||
|
bool error;
|
||||||
|
} Parser_t;
|
||||||
|
|
||||||
|
// Initialize the parser
|
||||||
|
Parser_t ParserInit(list_t *lst) {
|
||||||
|
return (Parser_t){ .iter = ListGetIter(lst), .tokens = lst, .error = false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look at current token
|
||||||
|
Token_t* Peek(Parser_t *p) {
|
||||||
|
if (!p->iter.current) return NULL;
|
||||||
|
return (Token_t*)p->iter.current->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move to next
|
||||||
|
void Advance(Parser_t *p) {
|
||||||
|
if (p->iter.current) p->iter.current = p->iter.current->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if current matches type (and optionally value)
|
||||||
|
bool Match(Parser_t *p, TKN_CTX ctx, const char *val) {
|
||||||
|
Token_t *t = Peek(p);
|
||||||
|
if (!t || (t->ctx != ctx)) return false;
|
||||||
|
if (val && strcmp(t->data, val) != 0) return false;
|
||||||
|
|
||||||
|
Advance(p);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required token; errors out if not found
|
||||||
|
Token_t* Expect(Parser_t *p, TKN_CTX ctx, const char *val) {
|
||||||
|
Token_t *t = Peek(p);
|
||||||
|
if (!t || (t->ctx != ctx) || (val && strcmp(t->data, val) != 0)) {
|
||||||
|
printf("Syntax Error: Expected '%s', but found '%s'\n",
|
||||||
|
val ? val : "specific type", t ? t->data : "EOF");
|
||||||
|
p->error = true;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Advance(p);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
void ClearTokens(void*arg) {
|
void ClearTokens(void*arg) {
|
||||||
Token_t *tok = arg;
|
Token_t *tok = arg;
|
||||||
free(tok->data);
|
free(tok->data);
|
||||||
@ -72,6 +144,58 @@ void PushToken(list_t *lst, const char *start, const char *end, TKN_CTX ctx) {
|
|||||||
ListPushBack(lst, NewToken(start, end - start, ctx));
|
ListPushBack(lst, NewToken(start, end - start, ctx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IdentifyTokens(list_t *lst) {
|
||||||
|
for (node_t *curr = lst->first; curr; curr = curr->next) {
|
||||||
|
Token_t *t = (Token_t *)curr->data;
|
||||||
|
|
||||||
|
// Skip nodes that were already identified (like TOK_STRING or munched TOK_OP)
|
||||||
|
if (t->ctx != TOK_RAW) continue;
|
||||||
|
|
||||||
|
// 1. Check Keyword Registry (Highest Priority)
|
||||||
|
bool found = false;
|
||||||
|
for (int i = 0; KEYWORD_TABLE[i].name != NULL; i++) {
|
||||||
|
if (strcmp(t->data, KEYWORD_TABLE[i].name) == 0) {
|
||||||
|
t->ctx = KEYWORD_TABLE[i].ctx;
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found) continue;
|
||||||
|
|
||||||
|
// 2. Check for Numeric Literals (0x..., 3.14, 100L)
|
||||||
|
if (isdigit((unsigned char)t->data[0]) || (t->data[0] == '.' && t->size > 1 && isdigit(t->data[1]))) {
|
||||||
|
t->ctx = TOK_NUM;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Check for Identifiers (my_var, @comptime)
|
||||||
|
if (isalpha((unsigned char)t->data[0]) || t->data[0] == '_' || t->data[0] == '@') {
|
||||||
|
t->ctx = TOK_ID;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Check for Operators/Symbols (;, +, -, #)
|
||||||
|
// If it's in our SYMBOLS string, it's an operator or preprocessor trigger
|
||||||
|
if (strchr(SYMBOLS, t->data[0])) {
|
||||||
|
// Special case for '#' which is often its own thing
|
||||||
|
if (t->data[0] == '#') t->ctx = TOK_PREPROC;
|
||||||
|
else t->ctx = TOK_OP;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ApplyTypeAliases(list_t *lst) {
|
||||||
|
for (node_t *curr = lst->first; curr; curr = curr->next) {
|
||||||
|
Token_t *t = curr->data;
|
||||||
|
// If we see 'int', we could programmatically replace it
|
||||||
|
// with the sequence ': 4' during a transformation pass.
|
||||||
|
if (t->ctx == TOK_ID && strcmp(t->data, "int") == 0) {
|
||||||
|
// Logic to transform token...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ListSplitToken(list_t *lst, node_t *node, size_t index) {
|
void ListSplitToken(list_t *lst, node_t *node, size_t index) {
|
||||||
Token_t *t = (Token_t *)node->data;
|
Token_t *t = (Token_t *)node->data;
|
||||||
|
|
||||||
@ -90,37 +214,31 @@ void ListSplitToken(list_t *lst, node_t *node, size_t index) {
|
|||||||
t->size = index;
|
t->size = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *LoadFile(const char *filename) {
|
|
||||||
FILE *file = NULL;
|
|
||||||
char *data = NULL;
|
|
||||||
file = fopen(filename, "r");
|
|
||||||
assert(file);
|
|
||||||
fseek(file, 0, SEEK_END);
|
|
||||||
long size = ftell(file);
|
|
||||||
fseek(file, 0, SEEK_SET);
|
|
||||||
data = (char *)malloc(size + 1);
|
|
||||||
assert(data);
|
|
||||||
fread(data, 1, size, file);
|
|
||||||
data[size] = 0x00;
|
|
||||||
fclose(file);
|
|
||||||
return (data);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void InitialScanner(char *data, list_t *tkn_lst) {
|
void InitialScanner(char *data, list_t *tkn_lst) {
|
||||||
char *curr = data, *start = data;
|
char *curr = data, *start = data;
|
||||||
|
|
||||||
while (*curr) {
|
while (*curr) {
|
||||||
// Handle Strings
|
// 1. Handle Wide or Normal Strings/Chars
|
||||||
if (*curr == '\"' || *curr == '\'') {
|
// Check for 'L' followed immediately by a quote
|
||||||
|
bool is_wide = (*curr == 'L' && (curr[1] == '\"' || curr[1] == '\''));
|
||||||
|
if (*curr == '\"' || *curr == '\'' || is_wide) {
|
||||||
PushToken(tkn_lst, start, curr, TOK_RAW);
|
PushToken(tkn_lst, start, curr, TOK_RAW);
|
||||||
char *s_start = curr++, q = *curr;
|
|
||||||
while (*curr && *curr != q) { if (*curr == '\\') curr++; curr++; }
|
char *s_start = curr;
|
||||||
if (*curr) curr++;
|
if (is_wide) curr++; // Advance past 'L'
|
||||||
|
|
||||||
|
char q = *curr;
|
||||||
|
curr++; // Skip opening quote
|
||||||
|
while (*curr && *curr != q) {
|
||||||
|
if (*curr == '\\' && curr[1]) curr++; // Skip escaped char
|
||||||
|
curr++;
|
||||||
|
}
|
||||||
|
if (*curr) curr++; // Skip closing quote
|
||||||
|
|
||||||
PushToken(tkn_lst, s_start, curr, TOK_STRING);
|
PushToken(tkn_lst, s_start, curr, TOK_STRING);
|
||||||
start = curr;
|
start = curr;
|
||||||
}
|
}
|
||||||
// Handle Comments
|
// 2. Handle Comments (Same as before)
|
||||||
else if (*curr == '/' && (curr[1] == '/' || curr[1] == '*')) {
|
else if (*curr == '/' && (curr[1] == '/' || curr[1] == '*')) {
|
||||||
PushToken(tkn_lst, start, curr, TOK_RAW);
|
PushToken(tkn_lst, start, curr, TOK_RAW);
|
||||||
if (curr[1] == '/') { while (*curr && *curr != '\n') curr++; }
|
if (curr[1] == '/') { while (*curr && *curr != '\n') curr++; }
|
||||||
@ -139,6 +257,7 @@ void InitialScanner(char *data, list_t *tkn_lst) {
|
|||||||
void RefineSymbols(list_t *tkn_lst) {
|
void RefineSymbols(list_t *tkn_lst) {
|
||||||
for (node_t *curr = tkn_lst->first; curr; ) {
|
for (node_t *curr = tkn_lst->first; curr; ) {
|
||||||
Token_t *t = curr->data;
|
Token_t *t = curr->data;
|
||||||
|
//IsComplexNumeric(t->data, t->size) ||
|
||||||
if (t->ctx != TOK_RAW || (t->size == 1 && strchr(SYMBOLS, t->data[0]))) {
|
if (t->ctx != TOK_RAW || (t->size == 1 && strchr(SYMBOLS, t->data[0]))) {
|
||||||
curr = curr->next;
|
curr = curr->next;
|
||||||
continue;
|
continue;
|
||||||
@ -154,36 +273,128 @@ void RefineSymbols(list_t *tkn_lst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MunchTokens(list_t *lst) {
|
void MunchFloats(list_t *lst) {
|
||||||
for (node_t *n = lst->first; n && n->next; ) {
|
for (node_t *n = lst->first; n && n->next && n->next->next; ) {
|
||||||
Token_t *t1 = n->data, *t2 = n->next->data;
|
Token_t *t1 = n->data, *dot = n->next->data, *t2 = n->next->next->data;
|
||||||
|
|
||||||
if (t1->ctx == TOK_RAW && t2->ctx == TOK_RAW && t1->size == 1 && t2->size == 1) {
|
// Look for [Digit] [.] [Digit]
|
||||||
char op[3] = { t1->data[0], t2->data[0], '\0' };
|
if (isdigit(t1->data[0]) && dot->data[0] == '.' && dot->size == 1 && isdigit(t2->data[0])) {
|
||||||
bool match = false;
|
size_t new_size = t1->size + 1 + t2->size;
|
||||||
for (int i = 0; MUNCH_TABLE[i].op; i++) {
|
char *buf = malloc(new_size + 1);
|
||||||
if (strcmp(op, MUNCH_TABLE[i].op) == 0) { match = true; break; }
|
sprintf(buf, "%s.%s", t1->data, t2->data);
|
||||||
}
|
|
||||||
|
|
||||||
if (match) {
|
|
||||||
free(t1->data);
|
free(t1->data);
|
||||||
t1->data = strndup(op, 2);
|
t1->data = buf;
|
||||||
t1->size = 2;
|
t1->size = new_size;
|
||||||
t1->ctx = TOK_OP; // Upgrade to Operator context
|
t1->ctx = TOK_NUM; // Mark it now!
|
||||||
|
|
||||||
node_t *tmp = n->next;
|
// Remove '.' and '14'
|
||||||
n->next = tmp->next;
|
for(int i=0; i<2; i++) {
|
||||||
if (lst->last == tmp) lst->last = n;
|
node_t *rem = n->next;
|
||||||
ClearTokens(tmp->data);
|
n->next = rem->next;
|
||||||
free(tmp);
|
if (lst->last == rem) lst->last = n;
|
||||||
|
ClearTokens(rem->data); free(rem);
|
||||||
lst->size--;
|
lst->size--;
|
||||||
continue; // Check if the next char can be munched too (e.g. >>=)
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
n = n->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MunchScientificNotation(list_t *lst) {
|
||||||
|
for (node_t *n = lst->first; n && n->next && n->next->next; ) {
|
||||||
|
Token_t *t1 = n->data;
|
||||||
|
Token_t *op = n->next->data;
|
||||||
|
Token_t *t2 = n->next->next->data;
|
||||||
|
|
||||||
|
// Check if t1 ends with 'e' or 'E' (and t1 is currently RAW)
|
||||||
|
if (t1->ctx == TOK_RAW && t1->size > 0) {
|
||||||
|
char last = tolower((unsigned char)t1->data[t1->size - 1]);
|
||||||
|
|
||||||
|
if (last == 'e' &&
|
||||||
|
(op->data[0] == '+' || op->data[0] == '-') && op->size == 1 &&
|
||||||
|
isdigit((unsigned char)t2->data[0])) {
|
||||||
|
|
||||||
|
// We found a match! (e.g., "1e" + "-" + "5")
|
||||||
|
size_t new_size = t1->size + op->size + t2->size;
|
||||||
|
char *new_data = malloc(new_size + 1);
|
||||||
|
|
||||||
|
sprintf(new_data, "%s%s%s", t1->data, op->data, t2->data);
|
||||||
|
|
||||||
|
free(t1->data);
|
||||||
|
t1->data = new_data;
|
||||||
|
t1->size = new_size;
|
||||||
|
|
||||||
|
// Remove the op and t2 nodes
|
||||||
|
for (size_t i = 0; i < 2; i++) {
|
||||||
|
node_t *to_remove = n->next;
|
||||||
|
n->next = to_remove->next;
|
||||||
|
if (lst->last == to_remove) lst->last = n;
|
||||||
|
ClearTokens(to_remove->data);
|
||||||
|
free(to_remove);
|
||||||
|
lst->size--;
|
||||||
|
}
|
||||||
|
// Check this same node again (in case of weird nesting, though rare here)
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n = n->next;
|
n = n->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MunchTokens(list_t *lst) {
|
||||||
|
node_t *curr = lst->first;
|
||||||
|
|
||||||
|
while (curr) {
|
||||||
|
Token_t *t1 = curr->data;
|
||||||
|
if (t1->ctx != TOK_RAW && t1->ctx != TOK_OP) {
|
||||||
|
curr = curr->next;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool matched = false;
|
||||||
|
for (size_t i = 0; MUNCH_TABLE[i].op; i++) {
|
||||||
|
size_t len = MUNCH_TABLE[i].len;
|
||||||
|
|
||||||
|
// 1. Peek ahead to see if we have enough nodes
|
||||||
|
node_t *temp = curr;
|
||||||
|
char buffer[5] = {0}; // Max munch is 4
|
||||||
|
size_t nodes_found = 0;
|
||||||
|
|
||||||
|
for (size_t j = 0; j < len && temp; j++) {
|
||||||
|
Token_t *tk = temp->data;
|
||||||
|
if (tk->size != 1) break; // Multi-char tokens can't be part of a new munch
|
||||||
|
buffer[j] = tk->data[0];
|
||||||
|
temp = temp->next;
|
||||||
|
nodes_found++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Compare buffer to table entry
|
||||||
|
if (nodes_found == len && strcmp(buffer, MUNCH_TABLE[i].op) == 0) {
|
||||||
|
// SUCCESS: Consolidate 'len' nodes into 'curr'
|
||||||
|
free(t1->data);
|
||||||
|
t1->data = strndup(MUNCH_TABLE[i].op, len);
|
||||||
|
t1->size = len;
|
||||||
|
t1->ctx = (MUNCH_TABLE[i].op[0] == '%') ? TOK_PREPROC : TOK_OP;
|
||||||
|
|
||||||
|
// Remove the 'tail' nodes
|
||||||
|
for (size_t j = 1; j < len; j++) {
|
||||||
|
node_t *to_remove = curr->next;
|
||||||
|
curr->next = to_remove->next;
|
||||||
|
if (lst->last == to_remove) lst->last = curr;
|
||||||
|
ClearTokens(to_remove->data);
|
||||||
|
free(to_remove);
|
||||||
|
lst->size--;
|
||||||
|
}
|
||||||
|
matched = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we munched, stay on 'curr' to see if a new sequence formed
|
||||||
|
if (!matched) curr = curr->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RefineRawNodes(list_t *tkn_lst) {
|
void RefineRawNodes(list_t *tkn_lst) {
|
||||||
node_t *curr = tkn_lst->first;
|
node_t *curr = tkn_lst->first;
|
||||||
@ -256,64 +467,110 @@ void PruneWhitespaceNodes(list_t *lst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void IdentifyTokens(list_t *lst) {
|
|
||||||
for (node_t *curr = lst->first; curr; curr = curr->next) {
|
|
||||||
Token_t *t = (Token_t *)curr->data;
|
|
||||||
|
|
||||||
if (t->ctx != TOK_RAW) continue;
|
|
||||||
|
|
||||||
bool found = false;
|
|
||||||
// 1. Check against Keyword Registry
|
|
||||||
for (int i = 0; KEYWORD_TABLE[i].name != NULL; i++) {
|
|
||||||
if (strcmp(t->data, KEYWORD_TABLE[i].name) == 0) {
|
|
||||||
t->ctx = KEYWORD_TABLE[i].ctx;
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. If not a keyword, is it a valid Identifier? (e.g., my_var_1)
|
|
||||||
if (!found && t->size > 0) {
|
|
||||||
if (isalpha(t->data[0]) || t->data[0] == '_' || t->data[0] == '@') {
|
|
||||||
t->ctx = TOK_ID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Modular function to register new identifiers
|
// Modular function to register new identifiers
|
||||||
void RegisterIdentifier(const char *name, TKN_CTX type) {
|
void RegisterIdentifier(const char *name, TKN_CTX type) {
|
||||||
// In a professional compiler, you'd insert this into a Hash Map.
|
//insert this into a Hash Map.
|
||||||
// For now, it's enough to know this is where user-defined types go.
|
//this is where user-defined types go.
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
void ParseVarDeclaration(Parser_t *p) {
|
||||||
|
// 1. We already saw 'var' (the trigger)
|
||||||
|
|
||||||
|
// 2. Expect an Identifier (the name)
|
||||||
|
Token_t *name = Expect(p, TOK_ID, NULL);
|
||||||
|
if (p->error) return;
|
||||||
|
|
||||||
|
// 3. Expect the separator ':'
|
||||||
|
Expect(p, TOK_OP, ":");
|
||||||
|
if (p->error) return;
|
||||||
|
|
||||||
|
// 4. Expect the size (numeric)
|
||||||
|
Token_t *size = Expect(p, TOK_NUM, NULL);
|
||||||
|
if (p->error) return;
|
||||||
|
|
||||||
|
printf("Defined variable '%s' with size %s bytes.\n", name->data, size->data);
|
||||||
|
|
||||||
|
// 5. Finalize with semicolon
|
||||||
|
Expect(p, TOK_OP, ";");
|
||||||
|
}
|
||||||
|
|
||||||
|
void Parse(Parser_t *p) {
|
||||||
|
while (Peek(p) != NULL && !p->error) {
|
||||||
|
Token_t *t = Peek(p);
|
||||||
|
|
||||||
|
if (t->ctx == TOK_KEY && strcmp(t->data, "var") == 0) {
|
||||||
|
Advance(p); // Consume 'var'
|
||||||
|
ParseVarDeclaration(p);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
printf("Unknown token: %s\n", t->data);
|
||||||
|
Advance(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* CtxToString(TKN_CTX ctx) {
|
||||||
|
if (ctx & TOK_KEY) return "KEYWORD";
|
||||||
|
if (ctx & TOK_ID) return "IDENTIFIER";
|
||||||
|
if (ctx & TOK_NUM) return "NUMBER";
|
||||||
|
if (ctx & TOK_OP) return "OPERATOR";
|
||||||
|
if (ctx & TOK_STRING) return "STRING";
|
||||||
|
if (ctx & TOK_PREPROC) return "PREPROCESS";
|
||||||
|
if (ctx & TOK_COMMENT) return "COMMENT";
|
||||||
|
if (ctx & TOK_RAW) return "RAW";
|
||||||
|
if (ctx & TOK_LITERAL) return "LITERAL";
|
||||||
|
if (ctx & TOK_NONE) return "NONE";
|
||||||
|
return "UNKNOWN";
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
pass on ";(){}[]$%&*$#@!?:,.<>|_-+=~`"
|
||||||
|
and give each token a context
|
||||||
|
let's replace preprocessor (include, define, etc)
|
||||||
|
let's do recursive parsing everywhere that need it
|
||||||
|
compile time reflection (@comptime or @reflect)
|
||||||
|
metaprogramming logic annotation if i do it lastly** may not be
|
||||||
|
*/
|
||||||
int main(int ac, char **av) {
|
int main(int ac, char **av) {
|
||||||
if (ac <= 1) return printf("No file specified\n"), -1;
|
if (ac <= 1) return printf("No file specified\n"), -1;
|
||||||
char* data = LoadFile(av[1]);
|
|
||||||
list_t *tkn_lst = ListInit(NULL);
|
|
||||||
|
|
||||||
|
char* data = LoadFile(av[1]);
|
||||||
|
assert(data);
|
||||||
|
ResolveTrigraphs(data);
|
||||||
|
|
||||||
|
list_t *tkn_lst = ListInit(NULL);
|
||||||
|
assert(tkn_lst);
|
||||||
InitialScanner(data, tkn_lst);
|
InitialScanner(data, tkn_lst);
|
||||||
PruneWhitespaceNodes(tkn_lst);
|
PruneWhitespaceNodes(tkn_lst);
|
||||||
RefineRawNodes(tkn_lst);
|
RefineRawNodes(tkn_lst);
|
||||||
RefineSymbols(tkn_lst);
|
RefineSymbols(tkn_lst);
|
||||||
|
MunchFloats(tkn_lst);
|
||||||
|
MunchScientificNotation(tkn_lst);
|
||||||
MunchTokens(tkn_lst);
|
MunchTokens(tkn_lst);
|
||||||
IdentifyTokens(tkn_lst);
|
IdentifyTokens(tkn_lst);
|
||||||
|
|
||||||
list_iter_t iter = ListGetIter(tkn_lst);
|
list_iter_t iter = ListGetIter(tkn_lst);
|
||||||
|
printf("\n--- TOKEN STREAM ---\n");
|
||||||
|
printf("%-6s | %-12s | %s\n", "HEX", "CONTEXT", "VALUE");
|
||||||
|
printf("-------|--------------|----------\n");
|
||||||
while (iter.current) {
|
while (iter.current) {
|
||||||
Token_t *t = (Token_t *)iter.current->data;
|
Token_t *t = (Token_t *)iter.current->data;
|
||||||
printf("[%02X] %-10s | %s\n", t->ctx,
|
|
||||||
(t->ctx == TOK_ID ? "IDENTIFIER" : "TOKEN"), t->data);
|
// Use CtxToString for the middle column
|
||||||
|
printf("[0x%04X] | %-12s | %s\n",
|
||||||
|
t->ctx,
|
||||||
|
CtxToString(t->ctx),
|
||||||
|
t->data);
|
||||||
|
|
||||||
iter.current = iter.current->next;
|
iter.current = iter.current->next;
|
||||||
}
|
}
|
||||||
//pass on ";(){}[]$%&*$#@!?:,.<>|_-+=~`"
|
printf("--------------------\n");
|
||||||
//and give each token a context
|
|
||||||
//let's replace preprocessor (include, define, etc)
|
//Parser_t p = ParserInit(tkn_lst);
|
||||||
//let's do recursive parsing everywhere that need it
|
//Parse(&p);
|
||||||
//compile time reflection (@comptime or @reflect)
|
|
||||||
//metaprogramming logic annotation if i do it lastly** may not be
|
|
||||||
ListFree(tkn_lst, ClearTokens);
|
ListFree(tkn_lst, ClearTokens);
|
||||||
free(data);
|
free(data);
|
||||||
return(0);
|
return(0);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user