wokring up to current status, used some ia to help me comment and refine some logic, need some more work on my part not yet as good as i want
This commit is contained in:
parent
97d0e0f955
commit
fab64a5ae0
8
README.md
Normal file
8
README.md
Normal file
@ -0,0 +1,8 @@
|
||||
# Sterling
|
||||
|
||||
`i am delusionnal !!! don't take me seriously`
|
||||
|
||||
ptr : addr : 64bit
|
||||
arr : addr + size : 128bit
|
||||
|
||||
|
||||
@ -1,29 +1,53 @@
|
||||
#ifndef STERLING_COMPILER_H
|
||||
# define STERLING_COMPILER_H
|
||||
|
||||
typedef enum {
|
||||
TOK_NONE,
|
||||
TOK_STRING,
|
||||
TOK_TOKEN,
|
||||
TOK_PREPROCESSOR,
|
||||
} TKN_CTX;
|
||||
|
||||
typedef struct Token_s {
|
||||
int size;
|
||||
char *data;
|
||||
TKN_CTX ctx;
|
||||
} Token_t;
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
Token_t *token;
|
||||
} TokenList;
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
//simd
|
||||
# ifdef __x86_64__
|
||||
# include <x86intrin.h>
|
||||
# endif
|
||||
|
||||
typedef enum {
|
||||
TOK_NONE,
|
||||
TOK_STRING,
|
||||
TOK_RAW,
|
||||
TOK_PREPROCESSOR,
|
||||
} TKN_CTX;
|
||||
|
||||
typedef struct Token_s {
|
||||
int size;
|
||||
TKN_CTX ctx;
|
||||
char *data;
|
||||
} Token_t;
|
||||
|
||||
//builtin type: if x86_64, then since i use simd, should align them
|
||||
|
||||
typedef struct vec2 {
|
||||
float x;
|
||||
float y;
|
||||
float _padding1;
|
||||
float _padding2;
|
||||
} __attribute__((aligned(16)));//losing 8 byte
|
||||
|
||||
typedef struct vec3 {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float _padding;
|
||||
} __attribute__((aligned(16)));//losing 4 byte
|
||||
|
||||
typedef struct vec4 {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float w;
|
||||
} __attribute__((aligned(16)));
|
||||
|
||||
#endif
|
||||
|
||||
150
source/list.h
Normal file
150
source/list.h
Normal file
@ -0,0 +1,150 @@
|
||||
#ifndef LIST_H
|
||||
# define LIST_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <assert.h>
|
||||
|
||||
typedef struct node_s {
|
||||
void *data;
|
||||
struct node_s *next;
|
||||
} node_t;
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
node_t *first;
|
||||
node_t *last;
|
||||
} list_t;
|
||||
|
||||
typedef struct {
|
||||
node_t *current;
|
||||
} list_iter_t;
|
||||
|
||||
|
||||
list_t *ListInit(list_t *lst);
|
||||
void ListInsert(list_t *lst, size_t idx, void *data);
|
||||
void ListPushBack(list_t *lst, void *data);
|
||||
list_iter_t ListGetIter(const list_t *lst);
|
||||
void *ListAdvance(list_iter_t *it);
|
||||
void *ListPeek(const list_iter_t *it);
|
||||
void *ListPeekNext(const list_iter_t *it);
|
||||
bool ListAtEnd(const list_iter_t *it);
|
||||
void ListReset(list_iter_t *it, const list_t *lst);
|
||||
void ListFree(list_t *lst, void (*free_func)(void*));
|
||||
void *ListPeekK(const list_iter_t *it, size_t k);
|
||||
void ListRestore(list_iter_t *it, const list_iter_t checkpoint);
|
||||
bool ListMatch(list_iter_t *it, bool (*predicate)(void*, void*), void *target);
|
||||
|
||||
# ifdef LIST_IMPLEMENTATION
|
||||
|
||||
list_t *ListInit(list_t *lst) {
|
||||
if (!lst) {
|
||||
lst = (list_t *)malloc(sizeof(list_t));
|
||||
}
|
||||
lst->first = NULL;
|
||||
lst->last = NULL;
|
||||
lst->size = 0;
|
||||
return (lst);
|
||||
}
|
||||
|
||||
void ListInsert(list_t *lst, size_t idx, void *data) {
|
||||
node_t *node = (node_t *)malloc(sizeof(node_t));
|
||||
node->data = data;
|
||||
node->next = 0;
|
||||
if (lst->size == 0) {
|
||||
lst->first = lst->last = node;
|
||||
} else if (idx == 0) {
|
||||
node->next = lst->first;
|
||||
lst->first = node;
|
||||
} else {
|
||||
node_t *prev = lst->first;
|
||||
for (int i = 0; i < idx - 1 && prev->next; i++) {
|
||||
prev = prev->next;
|
||||
}
|
||||
node->next = prev->next;
|
||||
prev->next = node;
|
||||
if (node->next == NULL) {
|
||||
lst->last = node;
|
||||
}
|
||||
}
|
||||
|
||||
lst->size++;
|
||||
}
|
||||
|
||||
void ListPushBack(list_t *lst, void *data) {
|
||||
node_t *node = (node_t *)malloc(sizeof(node_t));
|
||||
node->data = data;
|
||||
node->next = NULL;
|
||||
|
||||
if (!lst->first) {
|
||||
lst->first = node;
|
||||
} else {
|
||||
lst->last->next = node;
|
||||
}
|
||||
lst->last = node;
|
||||
lst->size++;
|
||||
}
|
||||
|
||||
list_iter_t ListGetIter(const list_t *lst) {
|
||||
return ((list_iter_t){.current = lst->first});
|
||||
}
|
||||
|
||||
void *ListAdvance(list_iter_t *it) {
|
||||
if (!it->current) return NULL;
|
||||
void *data = it->current->data;
|
||||
it->current = it->current->next;
|
||||
return (data);
|
||||
}
|
||||
|
||||
void *ListPeek(const list_iter_t *it) {
|
||||
return (it->current ? it->current->data : NULL);
|
||||
}
|
||||
|
||||
void *ListPeekNext(const list_iter_t *it) {
|
||||
return (it->current ? (it->current->next ? it->current->next->data : NULL) : NULL);
|
||||
}
|
||||
|
||||
bool ListAtEnd(const list_iter_t *it) {
|
||||
return (it->current == NULL);
|
||||
}
|
||||
|
||||
void ListReset(list_iter_t *it, const list_t *lst) {
|
||||
it->current = lst->first;
|
||||
}
|
||||
|
||||
void ListFree(list_t *lst, void (*free_func)(void*)) {
|
||||
node_t *current = lst->first;
|
||||
while (current) {
|
||||
node_t *next = current->next;
|
||||
if (free_func) free_func(current->data);
|
||||
free(current);
|
||||
current = next;
|
||||
}
|
||||
lst->size = 0;
|
||||
lst->first = lst->last = NULL;
|
||||
}
|
||||
|
||||
void *ListPeekK(const list_iter_t *it, size_t k) {
|
||||
node_t *current = it->current;
|
||||
for (size_t i = 0; i < k && current; i++) {
|
||||
current = current->next;
|
||||
}
|
||||
return (current ? current->data : NULL);
|
||||
}
|
||||
|
||||
void ListRestore(list_iter_t *it, const list_iter_t checkpoint) {
|
||||
it->current = checkpoint.current;
|
||||
}
|
||||
|
||||
bool ListMatch(list_iter_t *it, bool (*predicate)(void*, void*), void *target) {
|
||||
if (it->current && predicate(it->current->data, target)) {
|
||||
ListAdvance(it);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
# endif
|
||||
|
||||
#endif
|
||||
316
source/main.c
316
source/main.c
@ -1,93 +1,269 @@
|
||||
#include <SterlingCompiler.h>
|
||||
#define LIST_IMPLEMENTATION
|
||||
#include "list.h"
|
||||
|
||||
#include "../include/SterlingCompiler.h"
|
||||
|
||||
char *LoadFile(const char *filename) {
|
||||
char *data = NULL;
|
||||
FILE *file = NULL;
|
||||
const int buff_size = 2048;//may change for max size of file in character
|
||||
char buffer[buff_size];
|
||||
int count = 0;
|
||||
char *data = NULL;
|
||||
file = fopen(filename, "r");
|
||||
count = fread(buffer, 1, buff_size, file);
|
||||
data = calloc(count + 1, sizeof(char));
|
||||
memcpy(data, buffer, count);
|
||||
assert(file);
|
||||
fseek(file, 0, SEEK_END);
|
||||
long size = ftell(file);
|
||||
fseek(file, 0, SEEK_SET);
|
||||
data = (char *)malloc(size + 1);
|
||||
assert(data);
|
||||
fread(data, 1, size, file);
|
||||
data[size] = 0x00;
|
||||
fclose(file);
|
||||
return (data);
|
||||
}
|
||||
|
||||
TokenList SeparateString(char *data) {
|
||||
char *span = NULL;
|
||||
char *tok = NULL;
|
||||
char *tmp = NULL;
|
||||
bool toggle = false;
|
||||
Token_t buffer[256];
|
||||
tok = strtok_r(data, "\"\'", &span);
|
||||
int i = 0;
|
||||
buffer[i].ctx = TOK_NONE;
|
||||
while (tok && i < (sizeof(buffer) / sizeof(Token_t))) {
|
||||
size_t size = (span - tok);
|
||||
tmp = calloc(size + 1, sizeof(char));
|
||||
memcpy_s(tmp, size, tok, size);
|
||||
tmp[size] = 0x00;
|
||||
buffer[i].data = tmp;
|
||||
buffer[i].size = size;
|
||||
if (toggle) {
|
||||
buffer[i].ctx = TOK_STRING;
|
||||
} else {
|
||||
buffer[i].ctx = TOK_NONE;
|
||||
}
|
||||
tok = strtok_r(NULL, "\"\'", &span);
|
||||
i++;
|
||||
toggle ^= toggle;
|
||||
# ifndef strndup
|
||||
char *strndup(const char *s, size_t n) {
|
||||
char *str = calloc(n + 1, sizeof(char));
|
||||
memcpy(str, s, n);
|
||||
return (str);
|
||||
}
|
||||
# endif
|
||||
|
||||
}
|
||||
TokenList token_list;
|
||||
token_list.token = calloc(i, sizeof(Token_t));
|
||||
token_list.size = i;
|
||||
memcpy(token_list.token, buffer, i);
|
||||
for (int k = 0; k < i; k++) {
|
||||
token_list.token[k] = buffer[i];
|
||||
}
|
||||
return (token_list);
|
||||
bool IsWhitespace(const char *s) {
|
||||
while (*s) {
|
||||
if (!isspace((unsigned char)*s)) return false;
|
||||
s++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void tmpFunction() {
|
||||
char* data = NULL;
|
||||
char* span = NULL;
|
||||
char* tmp = NULL;
|
||||
char* delim = " \t\n";
|
||||
char* special = ";#";
|
||||
size_t size = 0;
|
||||
char* tok = strtok_r(data, delim, &span);
|
||||
while (tok) {
|
||||
size = (span - tok);
|
||||
tmp = malloc(size + 1);
|
||||
memcpy_s(tmp, size, tok, size);
|
||||
tmp[size] = 0x00;
|
||||
printf("|%s|\n", tmp);
|
||||
free(tmp);
|
||||
tok = strtok_r(NULL, " \t\n", &span);
|
||||
}
|
||||
void ClearTokens(void*arg) {
|
||||
Token_t *tok = arg;
|
||||
free(tok->data);
|
||||
free(tok);
|
||||
}
|
||||
|
||||
// Helper to create and link a new token
|
||||
void PushToken(char *start, char *end, TKN_CTX ctx, list_t *lst) {
|
||||
size_t len = end - start;
|
||||
Token_t *t = calloc(1, sizeof(Token_t));
|
||||
t->data = malloc(len + 1);
|
||||
memcpy(t->data, start, len);
|
||||
((char*)t->data)[len] = '\0';
|
||||
t->size = len;
|
||||
t->ctx = ctx;
|
||||
ListPushBack(lst, t);
|
||||
}
|
||||
|
||||
// Helper to push code that still needs to be refined
|
||||
void PushRaw(char *start, char *end, list_t *lst) {
|
||||
if (end <= start) return;
|
||||
PushToken(start, end, TOK_RAW, lst);
|
||||
}
|
||||
|
||||
void SeparateStrings(char *data, list_t *tkn_lst) {
|
||||
char *curr = data;
|
||||
char *start = data;
|
||||
|
||||
while (*curr != '\0') {
|
||||
// If we find a quote, we need to "package" the raw code before it,
|
||||
// then package the string itself.
|
||||
if (*curr == '\"' || *curr == '\'') {
|
||||
char quote_type = *curr;
|
||||
|
||||
// 1. Save the "Raw" chunk before the string
|
||||
if (curr > start) {
|
||||
size_t raw_len = curr - start;
|
||||
Token_t *raw = calloc(1, sizeof(Token_t));
|
||||
raw->data = strndup(start, raw_len); // strndup is C99/POSIX
|
||||
raw->size = raw_len;
|
||||
raw->ctx = TOK_RAW;
|
||||
ListPushBack(tkn_lst, raw);
|
||||
}
|
||||
|
||||
// 2. Find the end of the string
|
||||
char *str_start = curr;
|
||||
curr++; // Skip opening quote
|
||||
while (*curr != '\0' && *curr != quote_type) {
|
||||
if (*curr == '\\') curr++; // Skip escaped characters like \"
|
||||
curr++;
|
||||
}
|
||||
if (*curr == quote_type) curr++; // Include closing quote
|
||||
|
||||
// 3. Save the String Token
|
||||
size_t str_len = curr - str_start;
|
||||
Token_t *str_tok = calloc(1, sizeof(Token_t));
|
||||
str_tok->data = strndup(str_start, str_len);
|
||||
str_tok->size = str_len;
|
||||
str_tok->ctx = TOK_STRING;
|
||||
ListPushBack(tkn_lst, str_tok);
|
||||
|
||||
start = curr; // Reset start to the character after the string
|
||||
} else {
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Catch any remaining raw code after the last string
|
||||
if (curr > start) {
|
||||
size_t last_len = curr - start;
|
||||
Token_t *last = calloc(1, sizeof(Token_t));
|
||||
last->data = strndup(start, last_len);
|
||||
last->size = last_len;
|
||||
last->ctx = TOK_RAW;
|
||||
ListPushBack(tkn_lst, last);
|
||||
}
|
||||
}
|
||||
|
||||
void InitialScanner(char *data, list_t *tkn_lst) {
|
||||
char *curr = data;
|
||||
char *start = data;
|
||||
|
||||
while (*curr != '\0') {
|
||||
// 1. Handle Strings
|
||||
if (*curr == '\"' || *curr == '\'') {
|
||||
PushRaw(start, curr, tkn_lst); // Save code before string
|
||||
char quote = *curr;
|
||||
char *str_start = curr++;
|
||||
while (*curr && *curr != quote) {
|
||||
if (*curr == '\\' && *(curr + 1)) curr++; // Skip escaped char
|
||||
curr++;
|
||||
}
|
||||
if (*curr) curr++;
|
||||
PushToken(str_start, curr, TOK_STRING, tkn_lst);
|
||||
start = curr;
|
||||
}
|
||||
// 2. Handle Comments
|
||||
else if (*curr == '/' && (*(curr + 1) == '/' || *(curr + 1) == '*')) {
|
||||
PushRaw(start, curr, tkn_lst); // Save code before comment
|
||||
|
||||
if (*(curr + 1) == '/') { // Single line //
|
||||
while (*curr && *curr != '\n') curr++;
|
||||
curr++;//for skipping the \n
|
||||
} else { // Multi-line /*
|
||||
curr += 2;
|
||||
while (*curr && !(*curr == '*' && *(curr + 1) == '/')) curr++;
|
||||
if (*curr) curr += 2; // Move past */
|
||||
}
|
||||
// We DON'T push a token here because we want to ignore comments.
|
||||
// If you want to keep them (for a doc-generator), push a TOK_COMMENT.
|
||||
start = curr;
|
||||
}
|
||||
else {
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
PushRaw(start, curr, tkn_lst); // Catch the tail
|
||||
}
|
||||
|
||||
void RefineRawNodes(list_t *tkn_lst) {
|
||||
node_t *curr = tkn_lst->first;
|
||||
node_t *prev = NULL;
|
||||
|
||||
while (curr) {
|
||||
Token_t *t = (Token_t *)curr->data;
|
||||
|
||||
// Only process RAW chunks; leave TOK_STRING nodes alone!
|
||||
if (t->ctx == TOK_RAW) {
|
||||
char *span = NULL;
|
||||
// Note: We use a copy because strtok modifies the string
|
||||
char *to_split = _strdup(t->data);
|
||||
char *tok = strtok_r(to_split, " \t\r\n", &span);
|
||||
|
||||
if (tok) {
|
||||
// 1. Update the current node's data with the FIRST token found
|
||||
free(t->data);
|
||||
t->data = _strdup(tok);
|
||||
t->size = strlen(tok);
|
||||
|
||||
node_t *last_inserted = curr;
|
||||
tok = strtok_r(NULL, " \t\r\n", &span);
|
||||
|
||||
// 2. Insert NEW nodes for the rest of the tokens
|
||||
while (tok) {
|
||||
Token_t *new_t = calloc(1, sizeof(Token_t));
|
||||
new_t->data = _strdup(tok);
|
||||
new_t->size = strlen(tok);
|
||||
new_t->ctx = TOK_RAW;
|
||||
|
||||
node_t *new_node = calloc(1, sizeof(node_t));
|
||||
new_node->data = new_t;
|
||||
|
||||
// Insert into the list
|
||||
new_node->next = last_inserted->next;
|
||||
last_inserted->next = new_node;
|
||||
|
||||
if (tkn_lst->last == last_inserted) tkn_lst->last = new_node;
|
||||
|
||||
last_inserted = new_node;
|
||||
tkn_lst->size++;
|
||||
tok = strtok_r(NULL, " \t\r\n", &span);
|
||||
}
|
||||
curr = last_inserted; // Move cursor to the end of the new chain
|
||||
}
|
||||
free(to_split);
|
||||
}
|
||||
|
||||
prev = curr;
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
|
||||
void PruneWhitespaceNodes(list_t *lst) {
|
||||
node_t *curr = lst->first;
|
||||
node_t *prev = NULL;
|
||||
|
||||
while (curr) {
|
||||
Token_t *t = (Token_t *)curr->data;
|
||||
if (t->ctx == TOK_RAW && IsWhitespace(t->data)) {
|
||||
// Unlink and free
|
||||
node_t *temp = curr;
|
||||
if (prev) prev->next = curr->next;
|
||||
else lst->first = curr->next;
|
||||
|
||||
if (lst->last == temp) lst->last = prev;
|
||||
|
||||
curr = curr->next;
|
||||
ClearTokens(temp->data);
|
||||
free(temp);
|
||||
lst->size--;
|
||||
} else {
|
||||
prev = curr;
|
||||
curr = curr->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int ac, char **av) {
|
||||
if (ac <= 1) {
|
||||
printf("no file specified");
|
||||
return (-1);
|
||||
}
|
||||
char* data = LoadFile(av[1]);
|
||||
TokenList tkn_lst;
|
||||
//first pass on string
|
||||
tkn_lst = SeparateString(data);
|
||||
//second pass on ; and \n
|
||||
//third pass on \t and space
|
||||
//give each token a context
|
||||
//let's replace preprocessor
|
||||
//let's do recursive parsing everywhere that need it
|
||||
//compile time reflection
|
||||
//metaprogramming logic annotation if i do it lastly
|
||||
for (int i = 0; i < tkn_lst.size; i++) {
|
||||
free(tkn_lst.token[i].data);
|
||||
list_t *tkn_lst = ListInit(NULL);
|
||||
//first pass on string, whitespace and comments
|
||||
InitialScanner(data, tkn_lst);
|
||||
//SeparateStrings(data, tkn_lst);
|
||||
list_iter_t iter = ListGetIter(tkn_lst);
|
||||
while (iter.current) {
|
||||
printf("|%s|", ((Token_t *)iter.current->data)->data);
|
||||
iter.current = iter.current->next;
|
||||
}
|
||||
PruneWhitespaceNodes(tkn_lst);
|
||||
printf("\n___\n");
|
||||
RefineRawNodes(tkn_lst);
|
||||
ListReset(&iter, tkn_lst);
|
||||
while (iter.current) {
|
||||
printf("|%s|\n", ((Token_t *)iter.current->data)->data);
|
||||
iter.current = iter.current->next;
|
||||
}
|
||||
//pass on ";(){}[]$%&*$#@!?:,.<>|_-+=~`"
|
||||
//give each token a context
|
||||
//let's replace preprocessor (include, define, etc)
|
||||
//let's do recursive parsing everywhere that need it
|
||||
//compile time reflection (@comptime or @reflect)
|
||||
//metaprogramming logic annotation if i do it lastly** may not be
|
||||
ListFree(tkn_lst, ClearTokens);
|
||||
free(data);
|
||||
return(0);
|
||||
}
|
||||
//test
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user