114 lines
3.2 KiB
C

enum TokenType {
TOK_EOF, TOK_IDENTIFIER, TOK_INTEGER, TOK_STRING, TOK_CHAR,
TOK_FN, TOK_RETURN, TOK_STRUCT, TOK_TYPEDEF,
TOK_TYPE_U8, TOK_TYPE_U32, // ... add all types
TOK_FN_STATIC, TOK_FN_EXPORT, // ... all fn variants
TOK_LBRACE, TOK_RBRACE, TOK_LPAREN, TOK_RPAREN,
TOK_COMMA, TOK_SEMICOLON, TOK_STAR, TOK_EQUAL,
TOK_PLUS, TOK_MINUS, TOK_SLASH, TOK_BANG, TOK_ARROW,
// ...
};
struct Token {
TokenType type;
char* lexeme;
int line;
};
struct Lexer {
char* src;
int pos;
int line;
};
char peek(Lexer* l) {
return l->src[l->pos];
}
char advance(Lexer* l) {
if (l->src[l->pos] == '\n') l->line++;
return l->src[l->pos++];
}
bool match(Lexer* l, char expected) {
if (l->src[l->pos] != expected) return false;
l->pos++;
return true;
}
void skip_whitespace_and_comments(Lexer* l) {
while (true) {
char c = peek(l);
if (c == ' ' || c == '\t' || c == '\r' || c == '\n') advance(l);
else if (c == '/' && l->src[l->pos + 1] == '/') {
while (peek(l) != '\n' && peek(l) != '\0') advance(l);
}
else if (c == '/' && l->src[l->pos + 1] == '*') {
advance(l); advance(l); // skip /*
while (!(peek(l) == '*' && l->src[l->pos + 1] == '/')) {
if (peek(l) == '\0') error("unterminated comment");
advance(l);
}
advance(l); advance(l); // skip */
}
else break;
}
}
Token identifier_or_keyword(Lexer* l, int start) {
while (isalnum(peek(l)) || peek(l) == '_') advance(l);
char* text = slice(l->src + start, l->pos - start);
// check keyword map
TokenType type = lookup_keyword(text); // use hash or strcmp
return Token { type, text, l->line };
}
Token number(Lexer* l, int start) {
while (isdigit(peek(l))) advance(l);
char* text = slice(l->src + start, l->pos - start);
return Token { TOK_INTEGER, text, l->line };
}
Token string(Lexer* l) {
advance(l); // skip opening quote
int start = l->pos;
while (peek(l) != '"' && peek(l) != '\0') {
if (peek(l) == '\\') advance(l); // escape
advance(l);
}
char* text = slice(l->src + start, l->pos - start);
if (peek(l) != '"') error("unterminated string");
advance(l); // closing quote
return Token { TOK_STRING, text, l->line };
}
Token next_token(Lexer* l) {
skip_whitespace_and_comments(l);
int start = l->pos;
char c = advance(l);
switch (c) {
case '\0': return Token { TOK_EOF, "", l->line };
case '{': return Token { TOK_LBRACE, "{", l->line };
case '}': return Token { TOK_RBRACE, "}", l->line };
case '(': return Token { TOK_LPAREN, "(", l->line };
case ')': return Token { TOK_RPAREN, ")", l->line };
case ',': return Token { TOK_COMMA, ",", l->line };
case ';': return Token { TOK_SEMICOLON, ";", l->line };
case '*': return Token { TOK_STAR, "*", l->line };
case '=': return Token { TOK_EQUAL, "=", l->line };
case '+': return Token { TOK_PLUS, "+", l->line };
case '-':
if (match(l, '>')) return Token { TOK_ARROW, "->", l->line };
else return Token { TOK_MINUS, "-", l->line };
case '/': return Token { TOK_SLASH, "/", l->line };
case '!': return Token { TOK_BANG, "!", l->line };
case '"': return string(l);
default:
if (isalpha(c) || c == '_') return identifier_or_keyword(l, start);
if (isdigit(c)) return number(l, start);
error("unexpected character");
}
}