deleted the c compiler

This commit is contained in:
2026-02-05 01:07:59 +00:00
parent a1099249e9
commit 8d130a870c
14 changed files with 0 additions and 4441 deletions
-756
View File
@@ -1,756 +0,0 @@
use std::collections::HashMap;
use std::hash::Hash;
use std::sync::LazyLock;
use std::sync::atomic::AtomicU32;
use std::time::SystemTime;
use chrono::{DateTime, Local};
use crate::registers::{Location, RegisterAllocator};
use crate::{block, cmd, comment, dsa};
use crate::parser::{
BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression,
Program, Statement, UnaryOperator, Variable,
};
pub struct CodeGenerator {
ast: Program,
imports: HashMap<String, String>,
globals: Vec<String>,
functions: Vec<String>,
symbols: Vec<String>,
allocator: RegisterAllocator,
}
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
HashMap::from([
// ("print", "print::print"),
// ("println", "print::println"),
// ("printnum", "print::print_num"),
// ("print_space", "print::print_whitespace"),
// ("print_newline", "print::print_newline"),
// ("print_char", "print::print_byte"),
// ("print_word", "print::print_word"),
// ("print_hex", "print::print_hex_word"),
])
});
fn import(name: &str, path: &str) -> String {
format!("include {name}: \"{}\"", path)
}
impl CodeGenerator {
const RET: &'static str = "\tjmp _ret";
pub fn new(ast: Program) -> Self {
CodeGenerator {
ast,
imports: HashMap::new(),
globals: Vec::new(),
functions: Vec::new(),
symbols: Vec::new(),
allocator: RegisterAllocator::new(),
}
}
pub fn include(&mut self, name: &str, path: &str) {
self.imports.insert(name.to_string(), path.to_string());
}
fn is_global(&self, name: &str) -> bool {
// Check if this variable is in the globals list
self.globals
.iter()
.any(|g| g.contains(&format!("dw {}:", name)))
}
pub fn generate(&mut self) -> Result<String, CompilerError> {
// always include the print library for debugging!
self.include("print", "./lib/io/print.dsa");
for block in self.ast.clone().declarations {
match block {
Declaration::Variable {
var: Variable { name, .. },
..
} => self.symbols.push(name),
Declaration::Function { name, .. } => self.symbols.push(name),
Declaration::Dependency(Dependency { name, .. }) => {
self.symbols.push(name)
}
}
}
for block in self.ast.clone().declarations {
self.generate_block(block.clone())?;
}
self.generate_layout()
}
fn generate_layout(&mut self) -> Result<String, CompilerError> {
let datetime: DateTime<Local> = SystemTime::now().into();
Ok(dsa![
"",
comment!("GENERATED BY DSC COMPILER"),
comment!(format!(
"Generated at {}",
datetime.format("%Y-%m-%d %H:%M:%S")
)),
"",
// imports
comment!("Imports"),
self.imports
.iter()
.map(|(k, v)| import(k, v))
.collect::<Vec<String>>()
.join("\n"),
"",
// reserved memory
comment!("Globals & Reserved Memory"),
self.globals.join("\n"),
"",
// entry point
comment!("Entry Point"),
"dw stack: 0x10000",
"db message: \"Process Exited with code:\"",
block! [ "_init"
dsa![ldw stack, bpr],
dsa![mov bpr, spr],
dsa![push zero],
dsa![call main],
dsa![call print::print_newline],
dsa![lwi message, rg0],
dsa![push rg0],
dsa![call print::print],
dsa![pop zero],
dsa![call print::print_hex_word],
dsa![pop zero],
dsa![hlt]
],
"",
comment!("Return"),
block! [ "_ret"
dsa![mov bpr, spr],
dsa![pop bpr],
dsa![return]
],
comment!("Compiled Code Starts..."),
// block! [ "main"
// dsa![push bpr],
// dsa![mov spr, bpr],
// dsa![lwi 67, rg1],
// dsa![stw rg1, spr, 8],
// dsa![mov bpr, spr],
// dsa![pop bpr],
// dsa![return]
// ],
self.functions.join("\n"),
])
}
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
self.globals.push(format!(
"dw {}: {}",
name,
init.unwrap_or(ConstExpr::Number(0))
))
}
fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> {
match block {
Declaration::Variable { var, init, .. } => {
self.generate_global(&var.name, init)
}
Declaration::Function {
name,
return_type,
params,
body,
} => {
let func = self.generate_function(&name, &params, &body).join("\n");
self.functions.push(format!("{func}\n"));
}
Declaration::Dependency(Dependency { name, path }) => {
self.imports.insert(name, path);
}
};
Ok(())
}
// Example: Generate code for a function
fn generate_function(
&mut self,
name: &str,
params: &[Variable],
body: &[Statement],
) -> Vec<String> {
let mut code = Vec::new();
// Reset allocator for new function
self.allocator.reset();
// Function prologue
code.push(format!("{}:", name));
code.push("\tpush bpr".to_string());
code.push("\tmov spr, bpr".to_string());
code.push(String::new());
// Allocate parameters to registers or stack locations
for (i, param) in params.iter().enumerate() {
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
// Track that this parameter is at a stack location
let (reg, load_code) = self.allocator.alloc_var(&param.name).unwrap();
code.extend(load_code);
code.push(format!("\tldw bpr, {}, {}", reg, offset));
}
// Generate code for function body
for stmt in body {
let stmt_code = self.generate_statement(stmt).unwrap();
code.extend(stmt_code);
}
// automatically return at function end
if let Some(x) = code.last()
&& x == Self::RET
{
} else {
code.push(Self::RET.to_string());
}
code
}
// Example: Generate code for a statement
fn generate_statement(
&mut self,
stmt: &Statement,
) -> Result<Vec<String>, CompilerError> {
let mut code = Vec::new();
match stmt {
Statement::Declaration { var, value } => {
if let Some(expr) = value {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(expr, true)?;
code.extend(expr_code);
// Store result in variable
let store_code = self.allocator.store_var(&var.name, &result_reg);
code.extend(store_code);
// Free temporary register
self.allocator.free_temp(&result_reg);
} else {
// Just declaring variable without initialization
self.allocator.alloc_var(&var.name)?;
}
}
Statement::Break => unimplemented!(),
Statement::Continue => unimplemented!(),
Statement::PtrWrite { ptr, value } => {
let (result_reg, expr_code) = self.generate_expression(value, true)?;
code.extend(expr_code);
let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?;
code.extend(ptr_code);
code.push(format!("\tstw {}, {}", result_reg, ptr_reg));
self.allocator.free_temp(&result_reg);
self.allocator.free_temp(&ptr_reg);
}
Statement::Assign { varname, value } => {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(value, true)?;
code.extend(expr_code);
// Check if this is a global variable
if self.is_global(varname) {
// Store to global label
code.push(format!("\tstw {}, {}", result_reg, varname));
} else {
// Store result in local variable
let store_code = self.allocator.store_var(varname, &result_reg);
code.extend(store_code);
}
// Free temporary register
self.allocator.free_temp(&result_reg);
}
Statement::Return(expr) => {
if let Some(e) = expr {
let (result_reg, expr_code) = self.generate_expression(e, true)?;
code.extend(expr_code);
code.push(format!("\tstw {}, bpr, 8", result_reg));
code.push(format!("\tjmp _ret"));
self.allocator.free_temp(&result_reg);
}
}
Statement::If {
condition,
then_stmt,
else_stmt,
} => {
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
code.extend(cond_code);
// Compare with zero
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
// Generate unique labels
let then_label = format!("_then_{}", self.get_unique_label());
let else_label = format!("_else_{}", self.get_unique_label());
let end_label = format!("_end_{}", self.get_unique_label());
// Jump to else if condition is false (equal to zero)
code.push(format!("\tjeq {}", else_label));
// Then block
code.push(format!("{}:", then_label));
for s in then_stmt {
code.extend(self.generate_statement(s)?);
}
if then_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("\tjmp {}", end_label));
// Else block
code.push(format!("{}:", else_label));
for s in else_stmt {
code.extend(self.generate_statement(s)?);
}
if else_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("{}:", end_label));
}
Statement::While { condition, body } => {
let loop_start = format!("_while_start_{}", self.get_unique_label());
let loop_end = format!("_while_end_{}", self.get_unique_label());
code.push(format!("{}:", loop_start));
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
code.extend(cond_code);
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
code.push(format!("\tjeq {}", loop_end));
// Loop body
for s in body {
code.extend(self.generate_statement(s)?);
}
code.push(format!("\tjmp {}", loop_start));
code.push(format!("{}:", loop_end));
}
Statement::Loop(body) => {
let loop_start = format!("_loop_start_{}", self.get_unique_label());
code.push(format!("{}:", loop_start));
for s in body {
code.extend(self.generate_statement(s)?);
}
code.push(format!("\tjmp {}", loop_start));
}
Statement::Expression { expr } => {
let (result_reg, expr_code) = self.generate_expression(expr, false)?;
code.extend(expr_code);
self.allocator.free_temp(&result_reg);
}
Statement::Block(statements) => {
for s in statements {
code.extend(self.generate_statement(s)?);
}
}
}
Ok(code)
}
// Example: Generate code for an expression
// Returns (register containing result, assembly code)
fn generate_expression(
&mut self,
expr: &Expression,
use_result: bool,
) -> Result<(String, Vec<String>), CompilerError> {
let mut code = Vec::new();
// optimisation to prevent generating dead code!
if expr.is_pure() && !use_result {
return Ok((String::new(), code));
}
match expr {
Expression::StringLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// write string into memory
let uuid = self.get_unique_label();
code.push(format!("\tdb str_{uuid}: \"{value}\""));
// Load pointer to string
code.push(format!("\tlwi str_{uuid}, {reg}"));
Ok((reg, code))
}
Expression::CharLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load immediate value
code.push(format!("\tlli {}, {} // '{value}'", *value as u8, reg));
Ok((reg, code))
}
Expression::Number(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load immediate value
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
if *value > 0xFFFF || *value < 0 {
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
}
Ok((reg, code))
}
Expression::Variable { name, .. } => {
if self.is_global(&name.name) {
// Allocate a temporary register for the global
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load from global label
code.push(format!("\tldw {}, {}", name.name, reg));
Ok((reg, code))
} else {
// Local variable - use existing allocator logic
let (reg, load_code) = self.allocator.load_var(&name.name)?;
code.extend(load_code);
Ok((reg, code))
}
}
Expression::Binary { op, left, right } => {
// Evaluate left operand
let (left_reg, left_code) = self.generate_expression(left, true)?;
code.extend(left_code);
// Evaluate right operand
let (right_reg, right_code) = self.generate_expression(right, true)?;
code.extend(right_code);
// Allocate result register
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
// Generate operation
match op {
BinaryOperator::Add => {
code.push(format!(
"\tadd {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Sub => {
code.push(format!(
"\tsub {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Mul => {
self.include("maths", "./lib/maths/core.dsa");
// Call multiply function
code.push(format!("\tpush {}", right_reg));
code.push(format!("\tpush {}", left_reg));
code.push("\tcall maths::multiply".to_string());
code.push(format!("\tpop {}", result_reg));
code.push("\tpop zero".to_string());
}
// Comparison operators - return 1 (true) or 0 (false)
BinaryOperator::Eq => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ne => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Lt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Le => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Gt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ge => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
_ => unimplemented!(),
}
// Free operand registers (allocator will protect variables)
self.allocator.free_temp(&left_reg);
self.allocator.free_temp(&right_reg);
Ok((result_reg, code))
}
Expression::Call { name, args } => {
// first evaluate all the args we're going to need
let mut arg_regs = Vec::new();
for arg in args.iter().rev() {
let (arg_reg, arg_code) = self.generate_expression(arg, true)?;
code.extend(arg_code);
arg_regs.push(arg_reg);
}
// Save caller-saved registers and track which ones we saved
// old method, inefficient.
// let saved_regs = self.allocator.get_caller_saved_registers();
// for reg in &saved_regs {
// code.push(format!("\tpush {}", reg));
// }
// Save caller-saved registers and track which ones we saved
let saved_regs = self.allocator.get_caller_saved_registers();
for reg in &saved_regs {
// spill variables to stack
code.extend(self.allocator.spill_register(reg).unwrap());
}
// Evaluate and push arguments in reverse order
for (i, arg_reg) in arg_regs.iter().enumerate() {
code.push(format!(
"\tpush {} // push arg {}",
arg_reg,
args.len() - 1 - i
));
}
// if GLOBAL_METHODS.contains_key(name.name.as_str()) {
// code.push(format!("\tcall {}",
// GLOBAL_METHODS[name.name.as_str()])); } else
if self.symbols.contains(&name.name) {
// Call local function
code.push(format!("\tcall {}", name));
} else if let Some(ns) = name.namespace.clone()
&& self.imports.contains_key(&ns)
{
code.push(format!("\tcall {}", name));
} else {
return Err(CompilerError::Undefined(name.clone()));
}
let result_reg: String;
if use_result {
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
result_reg = temp_result_reg;
code.extend(result_alloc);
code.push(format!("\tpop {}", result_reg));
// Clean up arguments
if args.len() > 1 {
for _ in 0..(args.len() - 1) {
code.push("\tpop zero".to_string());
}
}
} else {
result_reg = "zero".to_string();
// Clean up arguments
if args.len() > 0 {
for _ in 0..(args.len()) {
code.push("\tpop zero".to_string());
}
}
}
// Restore caller-saved registers in reverse order (LIFO)
// for reg in saved_regs.iter().rev() {
// code.push(format!("\tpop {}", reg));
// }
// Free argument registers
for reg in arg_regs {
self.allocator.free_temp(&reg);
}
Ok((result_reg, code))
}
Expression::Unary { op, operand } => {
let (operand_reg, operand_code) =
self.generate_expression(operand, true)?;
code.extend(operand_code);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
match op {
UnaryOperator::Minus => {
// Negate: result = 0 - operand
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
}
UnaryOperator::Plus => {
// Just move
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
}
UnaryOperator::Dereference => {
code.push(format!("\tldw {}, {}", operand_reg, result_reg));
}
UnaryOperator::Reference => {
code.extend(self.allocator.spill_register(&operand_reg)?);
code.push(format!(
"\tsubi bpr {} {}",
-(4 + self.allocator.get_stack_offset()),
result_reg
))
}
}
self.allocator.free_temp(&operand_reg);
Ok((result_reg, code))
}
Expression::Empty => Ok(("zero".to_string(), code)),
}
}
// Helper for generating unique labels
fn get_unique_label(&mut self) -> String {
// You'd implement a counter here
static COUNTER: AtomicU32 = AtomicU32::new(0);
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
(val + 1).to_string()
}
}
/// Build a single string from any number of arguments.
/// Each argument must implement `Display` or be convertible to a string.
#[macro_export]
macro_rules! dsa {
($($arg:expr),* $(,)?) => {{
// Start with an empty String well grow it as we go.
use std::fmt::Write;
let mut s = ::std::string::String::new();
$(
// `write!` is cheaper than `format!` for each element
// because it reuses the same buffer.
write!(s, "{}\n", $arg).expect("write to String failed");
)*
s
}};
}
// ──────────────────────── dsa! ────────────────────────
// A tiny helper that just turns its tokenstream into a string.
// The trailing comma is kept its part of the syntax you want.
#[macro_export]
macro_rules! cmd {
($($tokens:tt)*) => {{
// Well just stringify the tokens and return a String.
format!("{}", concat!(stringify!($tokens), "\n"))
}};
}
// ──────────────────────── block! ────────────────────────
// Usage:
//
// let asm = block![ "name"
// dsa![mov rg0, rg1],
// dsa![add rg1, rg1]
// ];
//
// `asm` is a `&'static str` containing:
//
// name:
// mov rg0, rg1
// add rg1, rg1
//
#[macro_export]
macro_rules! block {
// The first token must be a string literal thats the label.
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
// Build a single string at compile time.
const CODE: &str = concat!(
$label, ":\n",
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
// We add a newline after each one to get the desired layout.
$(concat!("\t", stringify!($($ins)*), "\n")),*
);
CODE
}};
}
#[macro_export]
macro_rules! comment {
($text:expr) => {{ format!("// {}", $text) }};
}
-627
View File
@@ -1,627 +0,0 @@
use std::iter::Peekable;
use std::str::Chars;
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
// Keywords
Fn,
Let,
If,
Else,
Loop,
While,
Break,
Return,
Continue,
Include,
Static,
Const,
// Identifiers and literals
Identifier(Name),
String(String),
Integer(u64),
Char(char),
// Symbols
LeftParen, // (
RightParen, // )
LeftBrace, // {
RightBrace, // }
Semicolon, // ;
Colon, // :
Comma, // ,
// Operators
Plus, // +
Minus, // -
Star, // *
Amphersand, // &
Slash, // /
Assign, // =
EqualEqual, // ==
Bang, // !
BangEqual, // !=
Less, // <
LessEqual, // <=
Greater, // >
GreaterEqual, // >=
RightArrow, // ->
// Special
Eof,
}
#[derive(Debug, PartialEq, Clone)]
pub struct Name {
pub name: String,
pub namespace: Option<String>,
}
use std::fmt;
impl fmt::Display for Name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(ref ns) = self.namespace {
write!(f, "{}::{}", ns, self.name)
} else {
write!(f, "{}", self.name)
}
}
}
impl Token {
pub fn tt(&self) -> &str {
match self {
Token::Const => "Const",
Token::Static => "Static",
Token::Include => "Include",
Token::Fn => "Fn",
Token::If => "If",
Token::Let => "Let",
Token::Else => "Else",
Token::Loop => "Loop",
Token::While => "While",
Token::Break => "Break",
Token::Return => "Return",
Token::Continue => "Continue",
Token::Identifier(_) => "Identifier",
Token::String(_) => "String",
Token::Integer(_) => "UnsignedInt",
Token::Char(_) => "Char",
Token::LeftParen => "LeftParen",
Token::RightParen => "RightParen",
Token::LeftBrace => "LeftBrace",
Token::RightBrace => "RightBrace",
Token::Semicolon => "Semicolon",
Token::Colon => "Colon",
Token::Comma => "Comma",
Token::RightArrow => "RightArrow",
Token::Plus => "Plus",
Token::Minus => "Minus",
Token::Star => "Star",
Token::Amphersand => "Amphersand",
Token::Slash => "Slash",
Token::Assign => "Assign",
Token::EqualEqual => "EqualEqual",
Token::Bang => "Bang",
Token::BangEqual => "BangEqual",
Token::Less => "Less",
Token::LessEqual => "LessEqual",
Token::Greater => "Greater",
Token::GreaterEqual => "GreaterEqual",
Token::Eof => "Eof",
}
}
}
#[derive(Debug)]
pub struct Lexer<'a> {
chars: Peekable<Chars<'a>>,
current: Option<char>,
line: usize,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut chars = input.chars().peekable();
let current = chars.next();
Lexer {
chars,
current,
line: 1,
}
}
fn advance(&mut self) -> Option<char> {
self.current = self.chars.next();
self.current
}
fn peek(&mut self) -> Option<&char> {
self.chars.peek()
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.current {
if !c.is_whitespace() {
break;
}
if c == '\n' {
self.line += 1;
}
self.advance();
}
}
fn skip_line_comment(&mut self) {
// Skip the two slashes
self.advance(); // first /
self.advance(); // second /
// Skip until newline or EOF
while let Some(c) = self.current {
if c == '\n' {
self.line += 1;
self.advance();
break;
}
self.advance();
}
}
fn skip_block_comment(&mut self) -> Result<(), String> {
// Skip the /*
self.advance(); // /
self.advance(); // *
let start_line = self.line;
// Look for */
while let Some(c) = self.current {
if c == '\n' {
self.line += 1;
}
if c == '*' {
if let Some(&next) = self.peek() {
if next == '/' {
self.advance(); // *
self.advance(); // /
return Ok(());
}
}
}
self.advance();
}
Err(format!(
"Unterminated block comment starting at line {}",
start_line
))
}
fn skip_whitespace_and_comments(&mut self) {
loop {
self.skip_whitespace();
// Check for comments
if let Some('/') = self.current {
if let Some(&next) = self.peek() {
match next {
'/' => {
self.skip_line_comment();
continue;
}
'*' => {
if let Err(e) = self.skip_block_comment() {
eprintln!("Lexer error: {}", e);
}
continue;
}
_ => break,
}
}
}
break;
}
}
fn read_identifier(&mut self) -> String {
let mut ident = String::new();
// Include the current character if it's valid
if let Some(c) = self.current {
if c.is_alphabetic() || c == '_' {
ident.push(c);
}
}
// Read remaining characters
while let Some(&c) = self.peek() {
if c.is_alphanumeric() || c == '_' {
self.advance();
ident.push(c);
} else {
break;
}
}
ident
}
fn keyword_or_identifier(&mut self) -> Token {
let first_ident = self.read_identifier();
// Check if it's a keyword first (keywords can't have namespaces)
let keyword = match first_ident.as_str() {
"fn" => Some(Token::Fn),
"if" => Some(Token::If),
"else" => Some(Token::Else),
"while" => Some(Token::While),
"loop" => Some(Token::Loop),
"break" => Some(Token::Break),
"return" => Some(Token::Return),
"continue" => Some(Token::Continue),
"include" => Some(Token::Include),
"let" => Some(Token::Let),
"const" => Some(Token::Const),
"static" => Some(Token::Static),
_ => None,
};
if let Some(kw) = keyword {
return kw;
}
// Not a keyword - check for namespace separator (::)
// We need to peek TWO characters ahead without consuming anything
if let Some(&':') = self.peek() {
// We see one colon, but we need to check if there's another one after it
// We can't peek two ahead directly, so we need a different approach
// Save the current position by using a temporary peekable iterator
// Actually, we can't do that easily. Instead, let's just check:
// If we see ':', temporarily advance and check the next char
// Create a temporary check
let mut temp_chars = self.chars.clone();
let first_peek = temp_chars.next(); // This is the ':' we already saw
let second_peek = temp_chars.peek();
if let Some(&':') = second_peek {
// It's :: - consume both colons
self.advance(); // consume first :
self.advance(); // consume second :
// Read the second identifier (the actual name)
let second_ident = self.read_identifier();
// Return namespaced identifier
return Token::Identifier(Name {
namespace: Some(first_ident),
name: second_ident,
});
}
// else: It's a single colon (type annotation) - DON'T consume it
// Just fall through and return the identifier
}
// No namespace separator - just a regular identifier
Token::Identifier(Name {
namespace: None,
name: first_ident,
})
}
fn read_number(&mut self) -> Result<u64, String> {
let current = self.current.unwrap();
// Check for hex (0x) or binary (0b) prefix
if current == '0' {
if let Some(&next_char) = self.peek() {
match next_char {
'x' | 'X' => {
self.advance(); // consume '0'
self.advance(); // consume 'x'
return self.read_hex_number();
}
'b' | 'B' => {
self.advance(); // consume '0'
self.advance(); // consume 'b'
return self.read_binary_number();
}
_ => {}
}
}
}
// Read decimal number
self.read_decimal_number()
}
fn read_decimal_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
if let Some(c) = self.current {
num_str.push(c);
}
while let Some(&c) = self.peek() {
if c.is_ascii_digit() {
self.advance();
num_str.push(c);
} else {
break;
}
}
num_str
.parse::<u64>()
.map_err(|_| format!("Invalid decimal number: {}", num_str))
}
fn read_hex_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
// Read current character if it's a hex digit
if let Some(c) = self.current {
if c.is_ascii_hexdigit() {
num_str.push(c);
}
}
while let Some(&c) = self.peek() {
if c.is_ascii_hexdigit() {
self.advance();
num_str.push(c);
} else {
break;
}
}
if num_str.is_empty() {
return Err("Invalid hexadecimal number: no digits after 0x".to_string());
}
u64::from_str_radix(&num_str, 16)
.map_err(|_| format!("Invalid hexadecimal number: {}", num_str))
}
fn read_binary_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
// Read current character if it's a binary digit
if let Some(c) = self.current {
if c == '0' || c == '1' {
num_str.push(c);
}
}
while let Some(&c) = self.peek() {
if c == '0' || c == '1' {
self.advance();
num_str.push(c);
} else {
break;
}
}
if num_str.is_empty() {
return Err("Invalid binary number: no digits after 0b".to_string());
}
u64::from_str_radix(&num_str, 2)
.map_err(|_| format!("Invalid binary number: {}", num_str))
}
fn read_string(&mut self) -> Result<String, String> {
self.advance(); // Skip the opening quote
let mut s = String::new();
while let Some(c) = self.current {
if c == '"' {
return Ok(s);
}
// Handle escape sequences
if c == '\\' {
self.advance();
if let Some(escaped) = self.current {
let escaped_char = match escaped {
'n' => '\n',
't' => '\t',
'r' => '\r',
'\\' => '\\',
'"' => '"',
_ => escaped, // For now, just use the character as-is
};
s.push(escaped_char);
} else {
return Err("Unexpected end of string after escape".to_string());
}
} else {
s.push(c);
}
self.advance();
}
Err("Unterminated string literal".to_string())
}
fn match_next(&mut self, expected: char) -> bool {
match self.peek() {
Some(&c) if c == expected => {
self.advance();
true
}
_ => false,
}
}
fn scan_single_char_token(&mut self, c: char) -> Option<Token> {
match c {
'(' => Some(Token::LeftParen),
')' => Some(Token::RightParen),
'{' => Some(Token::LeftBrace),
'}' => Some(Token::RightBrace),
';' => Some(Token::Semicolon),
',' => Some(Token::Comma),
'&' => Some(Token::Amphersand),
'+' => Some(Token::Plus),
'*' => Some(Token::Star),
_ => None,
}
}
fn scan_operator(&mut self, c: char) -> Option<Token> {
match c {
'-' => Some(if self.match_next('>') {
Token::RightArrow
} else {
Token::Minus
}),
'!' => Some(if self.match_next('=') {
Token::BangEqual
} else {
Token::Bang
}),
'=' => Some(if self.match_next('=') {
Token::EqualEqual
} else {
Token::Assign
}),
'<' => Some(if self.match_next('=') {
Token::LessEqual
} else {
Token::Less
}),
'>' => Some(if self.match_next('=') {
Token::GreaterEqual
} else {
Token::Greater
}),
':' => {
// Single colon (for type annotations)
// Note: :: is handled in keyword_or_identifier for namespaces
Some(Token::Colon)
}
'/' => {
// Check if it's a comment or division
if let Some(&next) = self.peek() {
if next == '/' || next == '*' {
// It's a comment, don't consume it here
// Let skip_whitespace_and_comments handle it
None
} else {
Some(Token::Slash)
}
} else {
Some(Token::Slash)
}
}
_ => None,
}
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace_and_comments();
let Some(c) = self.current else {
return Token::Eof;
};
// Try single-character tokens first
if let Some(token) = self.scan_single_char_token(c) {
self.advance();
return token;
}
// Try operators (may be multi-character)
if let Some(token) = self.scan_operator(c) {
self.advance();
return token;
}
// String literals
if c == '"' {
let token = match self.read_string() {
Ok(s) => Token::String(s),
Err(e) => {
eprintln!("Lexer error on line {}: {}", self.line, e);
// Skip to next quote or end
while let Some(ch) = self.current {
if ch == '"' || ch == '\n' {
break;
}
self.advance();
}
Token::String(String::new())
}
};
self.advance();
return token;
}
// Identifiers and keywords (including namespaced identifiers)
if c.is_alphabetic() || c == '_' {
let token = self.keyword_or_identifier();
self.advance();
return token;
}
// Numbers (decimal, hex, binary)
if c.is_ascii_digit() {
let token = match self.read_number() {
Ok(num) => Token::Integer(num),
Err(e) => {
eprintln!("Lexer error on line {}: {}", self.line, e);
// Skip invalid number
while let Some(&ch) = self.peek() {
if !ch.is_alphanumeric() {
break;
}
self.advance();
}
Token::Integer(0)
}
};
self.advance();
return token;
}
// Unknown character - skip it
eprintln!(
"Lexer warning on line {}: Skipping unknown character '{}'",
self.line, c
);
self.advance();
self.next_token()
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
match self.next_token() {
Token::Eof => None,
token => Some(token),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic() {
// Placeholder test
assert!(true);
}
}