enum TokenType { TOK_EOF, TOK_IDENTIFIER, TOK_INTEGER, TOK_STRING, TOK_CHAR, TOK_FN, TOK_RETURN, TOK_STRUCT, TOK_TYPEDEF, TOK_TYPE_U8, TOK_TYPE_U32, // ... add all types TOK_FN_STATIC, TOK_FN_EXPORT, // ... all fn variants TOK_LBRACE, TOK_RBRACE, TOK_LPAREN, TOK_RPAREN, TOK_COMMA, TOK_SEMICOLON, TOK_STAR, TOK_EQUAL, TOK_PLUS, TOK_MINUS, TOK_SLASH, TOK_BANG, TOK_ARROW, // ... }; struct Token { TokenType type; char* lexeme; int line; }; struct Lexer { char* src; int pos; int line; }; char peek(Lexer* l) { return l->src[l->pos]; } char advance(Lexer* l) { if (l->src[l->pos] == '\n') l->line++; return l->src[l->pos++]; } bool match(Lexer* l, char expected) { if (l->src[l->pos] != expected) return false; l->pos++; return true; } void skip_whitespace_and_comments(Lexer* l) { while (true) { char c = peek(l); if (c == ' ' || c == '\t' || c == '\r' || c == '\n') advance(l); else if (c == '/' && l->src[l->pos + 1] == '/') { while (peek(l) != '\n' && peek(l) != '\0') advance(l); } else if (c == '/' && l->src[l->pos + 1] == '*') { advance(l); advance(l); // skip /* while (!(peek(l) == '*' && l->src[l->pos + 1] == '/')) { if (peek(l) == '\0') error("unterminated comment"); advance(l); } advance(l); advance(l); // skip */ } else break; } } Token identifier_or_keyword(Lexer* l, int start) { while (isalnum(peek(l)) || peek(l) == '_') advance(l); char* text = slice(l->src + start, l->pos - start); // check keyword map TokenType type = lookup_keyword(text); // use hash or strcmp return Token { type, text, l->line }; } Token number(Lexer* l, int start) { while (isdigit(peek(l))) advance(l); char* text = slice(l->src + start, l->pos - start); return Token { TOK_INTEGER, text, l->line }; } Token string(Lexer* l) { advance(l); // skip opening quote int start = l->pos; while (peek(l) != '"' && peek(l) != '\0') { if (peek(l) == '\\') advance(l); // escape advance(l); } char* text = slice(l->src + start, l->pos - start); if (peek(l) != '"') error("unterminated string"); advance(l); // closing quote return Token { TOK_STRING, text, l->line }; } Token next_token(Lexer* l) { skip_whitespace_and_comments(l); int start = l->pos; char c = advance(l); switch (c) { case '\0': return Token { TOK_EOF, "", l->line }; case '{': return Token { TOK_LBRACE, "{", l->line }; case '}': return Token { TOK_RBRACE, "}", l->line }; case '(': return Token { TOK_LPAREN, "(", l->line }; case ')': return Token { TOK_RPAREN, ")", l->line }; case ',': return Token { TOK_COMMA, ",", l->line }; case ';': return Token { TOK_SEMICOLON, ";", l->line }; case '*': return Token { TOK_STAR, "*", l->line }; case '=': return Token { TOK_EQUAL, "=", l->line }; case '+': return Token { TOK_PLUS, "+", l->line }; case '-': if (match(l, '>')) return Token { TOK_ARROW, "->", l->line }; else return Token { TOK_MINUS, "-", l->line }; case '/': return Token { TOK_SLASH, "/", l->line }; case '!': return Token { TOK_BANG, "!", l->line }; case '"': return string(l); default: if (isalpha(c) || c == '_') return identifier_or_keyword(l, start); if (isdigit(c)) return number(l, start); error("unexpected character"); } }