From 328741eb51bd19d18ec73b5a50a6e2cc129cd2c7 Mon Sep 17 00:00:00 2001 From: zxq5 Date: Sun, 8 Feb 2026 20:03:31 +0000 Subject: [PATCH] updated compiler with support for more operators. (only the unary operators from this are implemented for now) --- compiler/src/backend/dsa/codegen.rs | 55 +- compiler/src/backend/dsa/registers.rs | 4 +- compiler/src/frontend/dsc/lexer.rs | 834 +++++++++++++++++--------- compiler/src/frontend/dsc/parser.rs | 179 ++++-- compiler/src/model.rs | 154 ++++- 5 files changed, 868 insertions(+), 358 deletions(-) diff --git a/compiler/src/backend/dsa/codegen.rs b/compiler/src/backend/dsa/codegen.rs index 3072ade..64fa0a2 100644 --- a/compiler/src/backend/dsa/codegen.rs +++ b/compiler/src/backend/dsa/codegen.rs @@ -510,7 +510,7 @@ impl CodeGenerator { code.push("\tpop zero".to_string()); } // Comparison operators - return 1 (true) or 0 (false) - BinaryOperator::Eq => { + BinaryOperator::Equal => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); code.push(format!("\tlli 1, {}", result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); @@ -518,7 +518,7 @@ impl CodeGenerator { code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } - BinaryOperator::Ne => { + BinaryOperator::NotEqual => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); code.push(format!("\tlli 1, {}", result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); @@ -526,7 +526,7 @@ impl CodeGenerator { code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } - BinaryOperator::Lt => { + BinaryOperator::LessThan => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); code.push(format!("\tlli 1, {}", result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); @@ -534,7 +534,7 @@ impl CodeGenerator { code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } - BinaryOperator::Le => { + BinaryOperator::LessOrEqual => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); code.push(format!("\tlli 1, {}", result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); @@ -542,7 +542,7 @@ impl CodeGenerator { code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } - BinaryOperator::Gt => { + BinaryOperator::GreaterThan => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); code.push(format!("\tlli 1, {}", result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); @@ -550,7 +550,7 @@ impl CodeGenerator { code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } - BinaryOperator::Ge => { + BinaryOperator::GreaterOrEqual => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); code.push(format!("\tlli 1, {}", result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); @@ -581,13 +581,6 @@ impl CodeGenerator { arg_regs.push(arg_reg); } - // Save caller-saved registers and track which ones we saved - // old method, inefficient. - // let saved_regs = self.allocator.get_caller_saved_registers(); - // for reg in &saved_regs { - // code.push(format!("\tpush {}", reg)); - // } - // Save caller-saved registers and track which ones we saved let saved_regs = self.allocator.get_caller_saved_registers(); for reg in &saved_regs { @@ -604,9 +597,6 @@ impl CodeGenerator { )); } - // if GLOBAL_METHODS.contains_key(name.name.as_str()) { - // code.push(format!("\tcall {}", - // GLOBAL_METHODS[name.name.as_str()])); } else if self.symbols.contains(&name.name) { // Call local function code.push(format!("\tcall {}", name)); @@ -644,11 +634,6 @@ impl CodeGenerator { } } - // Restore caller-saved registers in reverse order (LIFO) - // for reg in saved_regs.iter().rev() { - // code.push(format!("\tpop {}", reg)); - // } - // Free argument registers for reg in arg_regs { self.allocator.free_temp(reg); @@ -677,7 +662,9 @@ impl CodeGenerator { UnaryOperator::Dereference => { code.push(format!("\tldw {}, {}", operand_reg, result_reg)); } - UnaryOperator::Reference => { + UnaryOperator::AddressOf => { + // ensure the referenced variable is on the stack and return its + // address. let (offset, alloc_code) = self.allocator.free_register(&operand_reg)?; code.extend(alloc_code); @@ -687,6 +674,28 @@ impl CodeGenerator { result_reg )); } + UnaryOperator::SizeOf => { + if let Ok(id) = operand.type_id() { + let size = id.size(); + code.push(format!("\tmov {}, {}", size, result_reg)); + } + } + UnaryOperator::CastAs => {} /* this should be removed once the */ + // semantic analyser can handle it! + UnaryOperator::Increment => { + // prefix increment + code.push(format!("\tmov {}, {}", operand_reg, result_reg)); + code.push(format!("\taddi {}, {}, 1", operand_reg, operand_reg)); + } + UnaryOperator::Decrement => { + // prefix decrement + code.push(format!("\tmov {}, {}", operand_reg, result_reg)); + code.push(format!("\tsubi {}, {}, 1", operand_reg, operand_reg)); + } + UnaryOperator::BitwiseNot => { + code.push(format!("\tnot {}, {}", operand_reg, result_reg)); + } + UnaryOperator::LogicalNot => unimplemented!(), } self.allocator.free_temp(operand_reg); @@ -694,6 +703,8 @@ impl CodeGenerator { } Expression::Empty => Ok((Register::Null, code)), + + _ => unimplemented!(), } } diff --git a/compiler/src/backend/dsa/registers.rs b/compiler/src/backend/dsa/registers.rs index abf5df3..90da055 100644 --- a/compiler/src/backend/dsa/registers.rs +++ b/compiler/src/backend/dsa/registers.rs @@ -132,7 +132,7 @@ impl RegisterAllocator { } // This is a true temporary - safe to free - if reg != Register::Zero { + if !matches!(reg, Register::Zero | Register::Null) { self.in_use.insert(reg, false); } } @@ -141,7 +141,7 @@ impl RegisterAllocator { // Check if this variable is in a register if let Some(location) = self.variable_locations.get(var).cloned() { if let Some(reg) = location.register - && reg != Register::Zero + && !matches!(reg, Register::Zero | Register::Null) { self.register_contents.remove(®); self.in_use.insert(reg, false); diff --git a/compiler/src/frontend/dsc/lexer.rs b/compiler/src/frontend/dsc/lexer.rs index c41a62b..5805f6a 100644 --- a/compiler/src/frontend/dsc/lexer.rs +++ b/compiler/src/frontend/dsc/lexer.rs @@ -16,6 +16,8 @@ pub enum Token { Include, Static, Const, + As, + SizeOf, // Identifiers and literals Identifier(Name), @@ -23,38 +25,68 @@ pub enum Token { Integer(u64), Char(char), - // Symbols - LeftParen, // ( - RightParen, // ) - LeftBrace, // { - RightBrace, // } - Semicolon, // ; - Colon, // : - Comma, // , + // Delimiters + LeftParen, // ( + RightParen, // ) + LeftBrace, // { + RightBrace, // } + LeftBracket, // [ + RightBracket, // ] + Semicolon, // ; + Colon, // : + Comma, // , + Dot, // . + RightArrow, // -> - // Operators - Plus, // + - Minus, // - - Star, // * - Amphersand, // & - Slash, // / - Assign, // = + // Arithmetic operators + Plus, // + + Minus, // - + Star, // * + Slash, // / + Percent, // % + PlusPlus, // ++ + MinusMinus, // -- + + // Bitwise operators + Ampersand, // & + Pipe, // | + Caret, // ^ + Tilde, // ~ + LeftShift, // << + RightShift, // >> + + // Logical operators + Bang, // ! + LogicalAnd, // && + LogicalOr, // || + + // Comparison operators EqualEqual, // == - Bang, // ! BangEqual, // != Less, // < LessEqual, // <= Greater, // > GreaterEqual, // >= - RightArrow, // -> + + // Assignment operators + Assign, // = + PlusEqual, // += + MinusEqual, // -= + StarEqual, // *= + SlashEqual, // /= + PercentEqual, // %= + AndEqual, // &= + OrEqual, // |= + XorEqual, // ^= + ShlEqual, // <<= + ShrEqual, // >>= // Special Eof, } -use std::fmt; - use crate::model::Name; +use std::fmt; impl fmt::Display for Name { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -81,6 +113,7 @@ impl Token { Token::Break => "Break", Token::Return => "Return", Token::Continue => "Continue", + Token::As => "As", Token::Identifier(_) => "Identifier", Token::String(_) => "String", Token::Integer(_) => "UnsignedInt", @@ -89,29 +122,52 @@ impl Token { Token::RightParen => "RightParen", Token::LeftBrace => "LeftBrace", Token::RightBrace => "RightBrace", + Token::LeftBracket => "LeftBracket", + Token::RightBracket => "RightBracket", Token::Semicolon => "Semicolon", Token::Colon => "Colon", Token::Comma => "Comma", + Token::Dot => "Dot", Token::RightArrow => "RightArrow", Token::Plus => "Plus", Token::Minus => "Minus", Token::Star => "Star", - Token::Amphersand => "Amphersand", Token::Slash => "Slash", - Token::Assign => "Assign", - Token::EqualEqual => "EqualEqual", + Token::Percent => "Percent", + Token::PlusPlus => "PlusPlus", + Token::MinusMinus => "MinusMinus", + Token::Ampersand => "Ampersand", + Token::Pipe => "Pipe", + Token::Caret => "Caret", + Token::Tilde => "Tilde", + Token::LeftShift => "LeftShift", + Token::RightShift => "RightShift", Token::Bang => "Bang", + Token::LogicalAnd => "LogicalAnd", + Token::LogicalOr => "LogicalOr", + Token::EqualEqual => "EqualEqual", Token::BangEqual => "BangEqual", Token::Less => "Less", Token::LessEqual => "LessEqual", Token::Greater => "Greater", Token::GreaterEqual => "GreaterEqual", + Token::Assign => "Assign", + Token::PlusEqual => "PlusEqual", + Token::MinusEqual => "MinusEqual", + Token::StarEqual => "StarEqual", + Token::SlashEqual => "SlashEqual", + Token::PercentEqual => "PercentEqual", + Token::AndEqual => "AndEqual", + Token::OrEqual => "OrEqual", + Token::XorEqual => "XorEqual", + Token::ShlEqual => "ShlEqual", + Token::ShrEqual => "ShrEqual", + Token::SizeOf => "SizeOf", Token::Eof => "Eof", } } } -#[derive(Debug)] pub struct Lexer<'a> { chars: Peekable>, current: Option, @@ -130,31 +186,59 @@ impl<'a> Lexer<'a> { } } + // ======================================================================== + // Character Navigation + // ======================================================================== + + /// Advance to the next character and return it fn advance(&mut self) -> Option { self.current = self.chars.next(); self.current } - fn peek(&mut self) -> Option<&char> { - self.chars.peek() + /// Peek at the next character without consuming it + fn peek(&mut self) -> Option { + self.chars.peek().copied() } + /// Peek two characters ahead + fn peek_second(&mut self) -> Option { + let mut temp = self.chars.clone(); + temp.next(); // Skip the first peek + temp.next() + } + + /// Check if the next character matches expected, and consume it if so + fn match_next(&mut self, expected: char) -> bool { + if self.peek() == Some(expected) { + self.advance(); + true + } else { + false + } + } + + // ======================================================================== + // Whitespace and Comments + // ======================================================================== + fn skip_whitespace(&mut self) { while let Some(c) = self.current { - if !c.is_whitespace() { + if c.is_whitespace() { + if c == '\n' { + self.line += 1; + } + self.advance(); + } else { break; } - if c == '\n' { - self.line += 1; - } - self.advance(); } } fn skip_line_comment(&mut self) { - // Skip the two slashes - self.advance(); // first / - self.advance(); // second / + // We're at the first '/', advance past '//' + self.advance(); // consume first '/' + self.advance(); // consume second '/' // Skip until newline or EOF while let Some(c) = self.current { @@ -168,26 +252,22 @@ impl<'a> Lexer<'a> { } fn skip_block_comment(&mut self) -> Result<(), String> { - // Skip the /* - self.advance(); // / - self.advance(); // * - let start_line = self.line; - // Look for */ + // We're at '/', advance past '/*' + self.advance(); // consume '/' + self.advance(); // consume '*' + + // Look for closing '*/' while let Some(c) = self.current { if c == '\n' { self.line += 1; } - if c == '*' { - if let Some(&next) = self.peek() { - if next == '/' { - self.advance(); // * - self.advance(); // / - return Ok(()); - } - } + if c == '*' && self.peek() == Some('/') { + self.advance(); // consume '*' + self.advance(); // consume '/' + return Ok(()); } self.advance(); @@ -204,21 +284,19 @@ impl<'a> Lexer<'a> { self.skip_whitespace(); // Check for comments - if let Some('/') = self.current { - if let Some(&next) = self.peek() { - match next { - '/' => { - self.skip_line_comment(); - continue; - } - '*' => { - if let Err(e) = self.skip_block_comment() { - eprintln!("Lexer error: {}", e); - } - continue; - } - _ => break, + if self.current == Some('/') { + match self.peek() { + Some('/') => { + self.skip_line_comment(); + continue; } + Some('*') => { + if let Err(e) = self.skip_block_comment() { + self.error(&e); + } + continue; + } + _ => break, } } @@ -226,18 +304,20 @@ impl<'a> Lexer<'a> { } } + // ======================================================================== + // Identifiers and Keywords + // ======================================================================== + fn read_identifier(&mut self) -> String { let mut ident = String::new(); - // Include the current character if it's valid + // Include the current character (already validated as alphabetic or '_') if let Some(c) = self.current { - if c.is_alphabetic() || c == '_' { - ident.push(c); - } + ident.push(c); } - // Read remaining characters - while let Some(&c) = self.peek() { + // Read remaining alphanumeric or underscore characters + while let Some(c) = self.peek() { if c.is_alphanumeric() || c == '_' { self.advance(); ident.push(c); @@ -249,89 +329,93 @@ impl<'a> Lexer<'a> { ident } - fn keyword_or_identifier(&mut self) -> Token { - let first_ident = self.read_identifier(); + fn scan_identifier_or_keyword(&mut self) -> Token { + let first_part = self.read_identifier(); - // Check if it's a keyword first (keywords can't have namespaces) - let keyword = match first_ident.as_str() { + // Check if it's a keyword (keywords cannot have namespaces) + if let Some(keyword) = self.match_keyword(&first_part) { + return keyword; + } + + // Check for namespace separator '::' + if self.peek() == Some(':') && self.peek_second() == Some(':') { + // Consume '::' + self.advance(); // consume first ':' + self.advance(); // consume second ':' + self.advance(); // move to the first character of the next identifier + + // Read the second part (the actual name) + let second_part = self.read_identifier(); + + return Token::Identifier(Name { + namespace: Some(first_part), + name: second_part, + }); + } + + // Plain identifier without namespace + Token::Identifier(Name { + namespace: None, + name: first_part, + }) + } + + fn match_keyword(&self, word: &str) -> Option { + match word { "fn" => Some(Token::Fn), + "let" => Some(Token::Let), "if" => Some(Token::If), "else" => Some(Token::Else), - "while" => Some(Token::While), "loop" => Some(Token::Loop), + "while" => Some(Token::While), "break" => Some(Token::Break), "return" => Some(Token::Return), "continue" => Some(Token::Continue), "include" => Some(Token::Include), - "let" => Some(Token::Let), "const" => Some(Token::Const), "static" => Some(Token::Static), + "as" => Some(Token::As), + "sizeof" => Some(Token::SizeOf), _ => None, - }; - - if let Some(kw) = keyword { - return kw; } + } - // Not a keyword - check for namespace separator (::) - // We need to peek TWO characters ahead without consuming anything - if let Some(&':') = self.peek() { - // We see one colon, but we need to check if there's another one after it - // We can't peek two ahead directly, so we need a different approach + // ======================================================================== + // Numbers + // ======================================================================== - // Save the current position by using a temporary peekable iterator - // Actually, we can't do that easily. Instead, let's just check: - // If we see ':', temporarily advance and check the next char - - // Create a temporary check - let mut temp_chars = self.chars.clone(); - let _ = temp_chars.next(); // This is the ':' we already saw - let second_peek = temp_chars.peek(); - - if let Some(&':') = second_peek { - // It's :: - consume both colons - self.advance(); // consume first : - self.advance(); // consume second : - - // Read the second identifier (the actual name) - let second_ident = self.read_identifier(); - - // Return namespaced identifier - return Token::Identifier(Name { - namespace: Some(first_ident), - name: second_ident, - }); + fn scan_number(&mut self) -> Token { + match self.read_number() { + Ok(num) => Token::Integer(num), + Err(e) => { + self.error(&e); + // Skip the invalid number + while let Some(c) = self.peek() { + if !c.is_alphanumeric() && c != '_' { + break; + } + self.advance(); + } + Token::Integer(0) } - // else: It's a single colon (type annotation) - DON'T consume it - // Just fall through and return the identifier } - - // No namespace separator - just a regular identifier - Token::Identifier(Name { - namespace: None, - name: first_ident, - }) } fn read_number(&mut self) -> Result { - let current = self.current.unwrap(); - // Check for hex (0x) or binary (0b) prefix - if current == '0' { - if let Some(&next_char) = self.peek() { - match next_char { - 'x' | 'X' => { - self.advance(); // consume '0' - self.advance(); // consume 'x' - return self.read_hex_number(); - } - 'b' | 'B' => { - self.advance(); // consume '0' - self.advance(); // consume 'b' - return self.read_binary_number(); - } - _ => {} + if self.current == Some('0') { + match self.peek() { + Some('x') | Some('X') => { + self.advance(); // consume '0' + self.advance(); // consume 'x' + return self.read_hex_number(); } + Some('b') | Some('B') => { + self.advance(); // consume '0' + self.advance(); // consume 'b' + return self.read_binary_number(); + } + _ => {} } } @@ -346,10 +430,13 @@ impl<'a> Lexer<'a> { num_str.push(c); } - while let Some(&c) = self.peek() { + while let Some(c) = self.peek() { if c.is_ascii_digit() { self.advance(); num_str.push(c); + } else if c == '_' { + // Allow underscores as separators (like Rust) + self.advance(); } else { break; } @@ -363,17 +450,19 @@ impl<'a> Lexer<'a> { fn read_hex_number(&mut self) -> Result { let mut num_str = String::new(); - // Read current character if it's a hex digit + // Read the first hex digit (current character) if let Some(c) = self.current { if c.is_ascii_hexdigit() { num_str.push(c); } } - while let Some(&c) = self.peek() { + while let Some(c) = self.peek() { if c.is_ascii_hexdigit() { self.advance(); num_str.push(c); + } else if c == '_' { + self.advance(); // Allow underscores as separators } else { break; } @@ -390,17 +479,19 @@ impl<'a> Lexer<'a> { fn read_binary_number(&mut self) -> Result { let mut num_str = String::new(); - // Read current character if it's a binary digit + // Read the first binary digit (current character) if let Some(c) = self.current { if c == '0' || c == '1' { num_str.push(c); } } - while let Some(&c) = self.peek() { + while let Some(c) = self.peek() { if c == '0' || c == '1' { self.advance(); num_str.push(c); + } else if c == '_' { + self.advance(); // Allow underscores as separators } else { break; } @@ -414,6 +505,27 @@ impl<'a> Lexer<'a> { .map_err(|_| format!("Invalid binary number: {}", num_str)) } + // ======================================================================== + // String and Character Literals + // ======================================================================== + + fn scan_string(&mut self) -> Token { + match self.read_string() { + Ok(s) => Token::String(s), + Err(e) => { + self.error(&e); + // Skip to the end of the string or newline + while let Some(c) = self.current { + if c == '"' || c == '\n' { + break; + } + self.advance(); + } + Token::String(String::new()) + } + } + } + fn read_string(&mut self) -> Result { self.advance(); // Skip the opening quote let mut s = String::new(); @@ -423,6 +535,10 @@ impl<'a> Lexer<'a> { return Ok(s); } + if c == '\n' { + return Err("Unterminated string literal (newline)".to_string()); + } + // Handle escape sequences if c == '\\' { self.advance(); @@ -433,7 +549,14 @@ impl<'a> Lexer<'a> { 'r' => '\r', '\\' => '\\', '"' => '"', - _ => escaped, // For now, just use the character as-is + '\'' => '\'', + '0' => '\0', + _ => { + return Err(format!( + "Invalid escape sequence: \\{}", + escaped + )); + } }; s.push(escaped_char); } else { @@ -449,81 +572,213 @@ impl<'a> Lexer<'a> { Err("Unterminated string literal".to_string()) } - fn match_next(&mut self, expected: char) -> bool { - match self.peek() { - Some(&c) if c == expected => { - self.advance(); - true - } - _ => false, - } - } - - fn scan_single_char_token(&mut self, c: char) -> Option { - match c { - '(' => Some(Token::LeftParen), - ')' => Some(Token::RightParen), - '{' => Some(Token::LeftBrace), - '}' => Some(Token::RightBrace), - ';' => Some(Token::Semicolon), - ',' => Some(Token::Comma), - '&' => Some(Token::Amphersand), - '+' => Some(Token::Plus), - '*' => Some(Token::Star), - _ => None, - } - } - - fn scan_operator(&mut self, c: char) -> Option { - match c { - '-' => Some(if self.match_next('>') { - Token::RightArrow - } else { - Token::Minus - }), - '!' => Some(if self.match_next('=') { - Token::BangEqual - } else { - Token::Bang - }), - '=' => Some(if self.match_next('=') { - Token::EqualEqual - } else { - Token::Assign - }), - '<' => Some(if self.match_next('=') { - Token::LessEqual - } else { - Token::Less - }), - '>' => Some(if self.match_next('=') { - Token::GreaterEqual - } else { - Token::Greater - }), - ':' => { - // Single colon (for type annotations) - // Note: :: is handled in keyword_or_identifier for namespaces - Some(Token::Colon) - } - '/' => { - // Check if it's a comment or division - if let Some(&next) = self.peek() { - if next == '/' || next == '*' { - // It's a comment, don't consume it here - // Let skip_whitespace_and_comments handle it - None - } else { - Some(Token::Slash) + fn scan_char(&mut self) -> Token { + match self.read_char() { + Ok(ch) => Token::Char(ch), + Err(e) => { + self.error(&e); + // Skip to the end of the char literal + while let Some(c) = self.current { + if c == '\'' || c == '\n' { + break; + } + self.advance(); + } + Token::Char('\0') + } + } + } + + fn read_char(&mut self) -> Result { + self.advance(); // Skip opening quote + + let ch = match self.current { + Some('\\') => { + // Handle escape sequences + self.advance(); + match self.current { + Some('n') => '\n', + Some('t') => '\t', + Some('r') => '\r', + Some('\\') => '\\', + Some('\'') => '\'', + Some('"') => '"', + Some('0') => '\0', + Some(c) => return Err(format!("Invalid escape sequence: \\{}", c)), + None => { + return Err( + "Unexpected end after escape in char literal".to_string() + ); } - } else { - Some(Token::Slash) } } - _ => None, + Some('\'') => return Err("Empty character literal".to_string()), + Some('\n') => return Err("Unterminated character literal".to_string()), + Some(c) => c, + None => return Err("Unterminated character literal".to_string()), + }; + + self.advance(); // Move to closing quote + + if self.current != Some('\'') { + return Err( + "Character literal must contain exactly one character".to_string() + ); + } + + Ok(ch) + } + + // ======================================================================== + // Operators and Punctuation + // ======================================================================== + + fn scan_operator(&mut self, c: char) -> Token { + match c { + // Single-character tokens that can't be extended + '(' => Token::LeftParen, + ')' => Token::RightParen, + '{' => Token::LeftBrace, + '}' => Token::RightBrace, + '[' => Token::LeftBracket, + ']' => Token::RightBracket, + ';' => Token::Semicolon, + ',' => Token::Comma, + '.' => Token::Dot, + '~' => Token::Tilde, + ':' => Token::Colon, // '::' is handled in identifier scanning + + // Operators that may have compound forms + '+' => { + if self.match_next('+') { + Token::PlusPlus + } else if self.match_next('=') { + Token::PlusEqual + } else { + Token::Plus + } + } + + '-' => { + if self.match_next('-') { + Token::MinusMinus + } else if self.match_next('>') { + Token::RightArrow + } else if self.match_next('=') { + Token::MinusEqual + } else { + Token::Minus + } + } + + '*' => { + if self.match_next('=') { + Token::StarEqual + } else { + Token::Star + } + } + + '/' => { + // Comments are handled in skip_whitespace_and_comments + if self.match_next('=') { + Token::SlashEqual + } else { + Token::Slash + } + } + + '%' => { + if self.match_next('=') { + Token::PercentEqual + } else { + Token::Percent + } + } + + '&' => { + if self.match_next('&') { + Token::LogicalAnd + } else if self.match_next('=') { + Token::AndEqual + } else { + Token::Ampersand + } + } + + '|' => { + if self.match_next('|') { + Token::LogicalOr + } else if self.match_next('=') { + Token::OrEqual + } else { + Token::Pipe + } + } + + '^' => { + if self.match_next('=') { + Token::XorEqual + } else { + Token::Caret + } + } + + '!' => { + if self.match_next('=') { + Token::BangEqual + } else { + Token::Bang + } + } + + '=' => { + if self.match_next('=') { + Token::EqualEqual + } else { + Token::Assign + } + } + + '<' => { + if self.match_next('<') { + if self.match_next('=') { + Token::ShlEqual + } else { + Token::LeftShift + } + } else if self.match_next('=') { + Token::LessEqual + } else { + Token::Less + } + } + + '>' => { + if self.match_next('>') { + if self.match_next('=') { + Token::ShrEqual + } else { + Token::RightShift + } + } else if self.match_next('=') { + Token::GreaterEqual + } else { + Token::Greater + } + } + + _ => { + self.error(&format!("Unexpected character: '{}'", c)); + Token::Eof // This shouldn't happen + } } } + // ======================================================================== + // Main Token Scanning + // ======================================================================== + pub fn next_token(&mut self) -> Token { self.skip_whitespace_and_comments(); @@ -531,90 +786,40 @@ impl<'a> Lexer<'a> { return Token::Eof; }; - // Try single-character tokens first - if let Some(token) = self.scan_single_char_token(c) { - self.advance(); - return token; - } + let token = match c { + // Identifiers and keywords + 'a'..='z' | 'A'..='Z' | '_' => self.scan_identifier_or_keyword(), - // Try operators (may be multi-character) - if let Some(token) = self.scan_operator(c) { - self.advance(); - return token; - } + // Numbers + '0'..='9' => self.scan_number(), - // Char literals - if c == '\'' { - let mut value = ' '; - self.advance(); - if let Some(ch) = self.current { - value = ch; - self.advance(); - } - if self.current == Some('\'') { - self.advance(); - return Token::Char(value); - } - eprintln!("Lexer error on line {}: Invalid char literal", self.line); - } + // String literals + '"' => self.scan_string(), - // String literals - if c == '"' { - let token = match self.read_string() { - Ok(s) => Token::String(s), - Err(e) => { - eprintln!("Lexer error on line {}: {}", self.line, e); - // Skip to next quote or end - while let Some(ch) = self.current { - if ch == '"' || ch == '\n' { - break; - } - self.advance(); - } - Token::String(String::new()) - } - }; - self.advance(); - return token; - } + // Character literals + '\'' => self.scan_char(), - // Identifiers and keywords (including namespaced identifiers) - if c.is_alphabetic() || c == '_' { - let token = self.keyword_or_identifier(); - self.advance(); - return token; - } + // Operators and punctuation + _ => self.scan_operator(c), + }; - // Numbers (decimal, hex, binary) - if c.is_ascii_digit() { - let token = match self.read_number() { - Ok(num) => Token::Integer(num), - Err(e) => { - eprintln!("Lexer error on line {}: {}", self.line, e); - // Skip invalid number - while let Some(&ch) = self.peek() { - if !ch.is_alphanumeric() { - break; - } - self.advance(); - } - Token::Integer(0) - } - }; - self.advance(); - return token; - } - - // Unknown character - skip it - eprintln!( - "Lexer warning on line {}: Skipping unknown character '{}'", - self.line, c - ); self.advance(); - self.next_token() + token + } + + // ======================================================================== + // Error Handling + // ======================================================================== + + fn error(&self, message: &str) { + eprintln!("Lexer error on line {}: {}", self.line, message); } } +// ======================================================================== +// Iterator Implementation +// ======================================================================== + impl<'a> Iterator for Lexer<'a> { type Item = Token; @@ -625,3 +830,78 @@ impl<'a> Iterator for Lexer<'a> { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_operators() { + let input = "+ ++ += - -- -= * *= / /= % %= & &= && | |= || ^ ^= ! != = == < <= << <<= > >= >> >>="; + let mut lexer = Lexer::new(input); + + let expected = vec![ + Token::Plus, + Token::PlusPlus, + Token::PlusEqual, + Token::Minus, + Token::MinusMinus, + Token::MinusEqual, + Token::Star, + Token::StarEqual, + Token::Slash, + Token::SlashEqual, + Token::Percent, + Token::PercentEqual, + Token::Ampersand, + Token::AndEqual, + Token::LogicalAnd, + Token::Pipe, + Token::OrEqual, + Token::LogicalOr, + Token::Caret, + Token::XorEqual, + Token::Bang, + Token::BangEqual, + Token::Assign, + Token::EqualEqual, + Token::Less, + Token::LessEqual, + Token::LeftShift, + Token::ShlEqual, + Token::Greater, + Token::GreaterEqual, + Token::RightShift, + Token::ShrEqual, + ]; + + for expected_token in expected { + assert_eq!(lexer.next_token(), expected_token); + } + } + + #[test] + fn test_numbers() { + let input = "42 0x2A 0b101010 123_456"; + let mut lexer = Lexer::new(input); + + assert_eq!(lexer.next_token(), Token::Integer(42)); + assert_eq!(lexer.next_token(), Token::Integer(42)); + assert_eq!(lexer.next_token(), Token::Integer(42)); + assert_eq!(lexer.next_token(), Token::Integer(123456)); + } + + #[test] + fn test_namespaced_identifier() { + let input = "print::println std::io::read"; + let mut lexer = Lexer::new(input); + + let first = lexer.next_token(); + if let Token::Identifier(name) = first { + assert_eq!(name.namespace, Some("print".to_string())); + assert_eq!(name.name, "println"); + } else { + panic!("Expected namespaced identifier"); + } + } +} diff --git a/compiler/src/frontend/dsc/parser.rs b/compiler/src/frontend/dsc/parser.rs index 2fa533b..a922a8e 100644 --- a/compiler/src/frontend/dsc/parser.rs +++ b/compiler/src/frontend/dsc/parser.rs @@ -353,12 +353,12 @@ impl Parser { let mut expr = self.parse_additive()?; while let Some(op) = match self.peek_next()? { - Token::EqualEqual => Some(BinaryOperator::Eq), - Token::BangEqual => Some(BinaryOperator::Ne), - Token::Less => Some(BinaryOperator::Lt), - Token::Greater => Some(BinaryOperator::Gt), - Token::LessEqual => Some(BinaryOperator::Le), - Token::GreaterEqual => Some(BinaryOperator::Ge), + Token::EqualEqual => Some(BinaryOperator::Equal), + Token::BangEqual => Some(BinaryOperator::NotEqual), + Token::Less => Some(BinaryOperator::LessThan), + Token::Greater => Some(BinaryOperator::GreaterThan), + Token::LessEqual => Some(BinaryOperator::LessOrEqual), + Token::GreaterEqual => Some(BinaryOperator::GreaterOrEqual), _ => None, } { self.next()?; @@ -412,11 +412,27 @@ impl Parser { fn parse_unary(&mut self) -> ParseResult { let op = match self.peek_next()? { + // prefix inc/dec + Token::PlusPlus => UnaryOperator::Increment, + Token::MinusMinus => UnaryOperator::Decrement, + + // arithmetic Token::Plus => UnaryOperator::Plus, Token::Minus => UnaryOperator::Minus, + + // pointer Token::Star => UnaryOperator::Dereference, - Token::Amphersand => UnaryOperator::Reference, - _ => return ParseResult::Accept(self.parse_primary()?), + Token::Ampersand => UnaryOperator::AddressOf, + + // boolean + Token::Bang => UnaryOperator::LogicalNot, + Token::Tilde => UnaryOperator::BitwiseNot, + + Token::SizeOf => UnaryOperator::SizeOf, + _ => { + let expr = self.parse_primary()?; + return self.parse_postfix(expr); + } }; self.next()?; @@ -428,6 +444,99 @@ impl Parser { }) } + fn parse_postfix( + &mut self, + mut expr: Expression, + ) -> ParseResult { + loop { + match self.peek_next()? { + // Type cast: expr as Type + Token::As => { + self.next()?; // consume 'as' + let target_type = self.parse_type()?; + expr = Expression::TypeCast { + expr: Box::new(expr), + target_type, + type_id: None, + }; + } + + // Postfix increment/decrement + Token::PlusPlus => { + self.next()?; + expr = Expression::UnaryPostfix { + op: UnaryOperator::Increment, + operand: Box::new(expr), + type_id: None, + }; + } + Token::MinusMinus => { + self.next()?; + expr = Expression::UnaryPostfix { + op: UnaryOperator::Decrement, + operand: Box::new(expr), + type_id: None, + }; + } + + // Array indexing: expr[index] + Token::LeftBracket => { + self.next()?; // consume '[' + let index = Box::new(self.parse_expression()?); + + let _ = expect_tt!(self.next()?, RightBracket)?; + + expr = Expression::IndexAccess { + expr: Box::new(expr), + index, + type_id: None, + }; + } + + // Function call: expr(args...) + Token::LeftParen => { + self.next()?; // consume '(' + let mut args = Vec::new(); + + if !matches!(self.peek_next()?, Token::RightParen) { + loop { + args.push(self.parse_expression()?); + if !matches!(self.peek_next()?, Token::Comma) { + break; + } + self.next()?; // consume comma + } + } + + let _ = expect_tt!(self.next()?, RightParen)?; + + if let Expression::Variable { name, .. } = expr { + expr = Expression::Call { + func: Call { name, args }, + type_id: None, + }; + } + } + + // Member access: expr.member (if you support structs) + Token::Dot => { + self.next()?; + let field_name = expect_value!(self.next()?, Identifier)?; + expr = Expression::MemberAccess { + expr: Box::new(expr), + field_name, + type_id: None, + }; + } + + // No more postfix operations + _ => break, + } + } + + ParseResult::Accept(expr) + } + fn parse_primary(&mut self) -> ParseResult { match self.peek_next()? { Token::Integer(value) => { @@ -441,39 +550,37 @@ impl Parser { self.next()?; ParseResult::Accept(Expression::StringLiteral(value)) } - Token::Identifier(_) => { - let name = expect_value!(self.next()?, Identifier)?; + Token::Char(value) => { + self.next()?; + ParseResult::Accept(Expression::CharLiteral(value)) + } - if matches!(self.peek_next()?, Token::LeftParen) { - // Function call - self.next()?; - let mut args = Vec::new(); + Token::Identifier(name) => { + self.next()?; + ParseResult::Accept(Expression::Variable { + name, + expr_type: None, + }) + } + Token::LeftBracket => { + self.next()?; // consume '[' + let mut elements = Vec::new(); - if !matches!(self.peek_next()?, Token::RightParen) { - args.push(self.parse_expression()?); - - while matches!(self.peek_next()?, Token::Comma) { - self.next()?; - args.push(self.parse_expression()?); + if !matches!(self.peek_next()?, Token::RightBracket) { + loop { + elements.push(self.parse_expression()?); + if !matches!(self.peek_next()?, Token::Comma) { + break; } + self.next()?; // consume comma } - - let _ = expect_tt!(self.next()?, RightParen)?; - - ParseResult::Accept(Expression::Call { - func: Call { - name: name.clone(), - args, - }, - - type_id: None, - }) - } else { - ParseResult::Accept(Expression::Variable { - name, - expr_type: None, - }) } + + expect_tt!(self.next()?, RightBracket)?; + ParseResult::Accept(Expression::ArrayLiteral { + elements, + type_id: None, + }) } Token::LeftParen => { self.next()?; diff --git a/compiler/src/model.rs b/compiler/src/model.rs index df1269f..a1abd87 100644 --- a/compiler/src/model.rs +++ b/compiler/src/model.rs @@ -65,6 +65,26 @@ pub enum TypeId { Struct { name: Name, fields: Vec }, } +impl TypeId { + pub fn size(&self) -> usize { + match self { + Self::U8 => 1, + Self::U16 => 2, + Self::U32 => 4, + Self::I8 => 1, + Self::I16 => 2, + Self::I32 => 4, + Self::Bool => 1, + Self::Char => 1, + Self::Void => 0, + Self::Ptr(t) => t.size(), + Self::Ref(t) => t.size(), + Self::Array(t, size) => t.size() * size, + Self::Struct { fields, .. } => fields.iter().map(|t| t.size()).sum(), + } + } +} + impl fmt::Display for TypeId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -169,10 +189,38 @@ pub enum Expression { // Post-Semantic Analysis type_id: Option, }, + UnaryPostfix { + op: UnaryOperator, + operand: Box, + + // Post-Semantic Analysis + type_id: Option, + }, Variable { name: Name, expr_type: Option, }, + TypeCast { + expr: Box, + target_type: TypeId, + + // Post-Semantic Analysis + type_id: Option, + }, + IndexAccess { + expr: Box, + index: Box, + + // Post-Semantic Analysis + type_id: Option, + }, + MemberAccess { + expr: Box, + field_name: Name, + + // Post-Semantic Analysis + type_id: Option, + }, Call { func: Call, @@ -187,6 +235,10 @@ pub enum Expression { }, StringLiteral(String), CharLiteral(char), + ArrayLiteral { + elements: Vec, + type_id: Option, + }, } #[derive(Debug, Clone)] @@ -204,8 +256,17 @@ impl Expression { Expression::Call { .. } => false, Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(), Expression::Unary { operand, .. } => operand.is_pure(), + Expression::UnaryPostfix { operand, .. } => operand.is_pure(), Expression::Empty => true, Expression::Variable { .. } => true, + Expression::TypeCast { expr, .. } => expr.is_pure(), + Expression::IndexAccess { expr, index, .. } => { + expr.is_pure() && index.is_pure() + } + Expression::MemberAccess { expr, .. } => expr.is_pure(), + Expression::ArrayLiteral { elements, type_id } => { + elements.iter().all(|element| element.is_pure()) + } } } @@ -225,10 +286,24 @@ impl Expression { Expression::Unary { type_id, .. } => { type_id.clone().ok_or(CompilerError::UnknownType) } + Expression::UnaryPostfix { type_id, .. } => { + type_id.clone().ok_or(CompilerError::UnknownType) + } Expression::Empty => Ok(TypeId::Void), Expression::Variable { expr_type, .. } => { expr_type.clone().ok_or(CompilerError::UnknownType) } + Expression::TypeCast { type_id, .. } => { + type_id.clone().ok_or(CompilerError::UnknownType) + } + Expression::IndexAccess { expr, .. } => expr.type_id(), + Expression::MemberAccess { expr, .. } => expr.type_id(), + Expression::ArrayLiteral { elements, .. } => { + let element_type = elements + .first() + .map_or(TypeId::Void, |e| e.type_id().unwrap_or(TypeId::Void)); + Ok(TypeId::Array(Box::new(element_type), elements.len())) + } } } } @@ -236,31 +311,56 @@ impl Expression { #[allow(unused)] #[derive(Debug, Clone, PartialEq)] pub enum BinaryOperator { + // arithmetic Add, Sub, Mul, Div, - Eq, - Ne, - Lt, - Gt, - Le, - Ge, + Mod, + + // comparison + Equal, + NotEqual, + LessThan, + GreaterThan, + LessOrEqual, + GreaterOrEqual, + + // bitwise + BitwiseAnd, + BitwiseOr, + BitwiseXor, + + // logical + LogicalAnd, + LogicalOr, + + // shift + LeftShift, + RightShift, } impl fmt::Display for BinaryOperator { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - BinaryOperator::Add => write!(f, "+"), - BinaryOperator::Sub => write!(f, "-"), - BinaryOperator::Mul => write!(f, "*"), - BinaryOperator::Div => write!(f, "/"), - BinaryOperator::Eq => write!(f, "=="), - BinaryOperator::Ne => write!(f, "!="), - BinaryOperator::Lt => write!(f, "<"), - BinaryOperator::Gt => write!(f, ">"), - BinaryOperator::Le => write!(f, "<="), - BinaryOperator::Ge => write!(f, ">="), + Self::Add => write!(f, "+"), + Self::Sub => write!(f, "-"), + Self::Mul => write!(f, "*"), + Self::Div => write!(f, "/"), + Self::Mod => write!(f, "%"), + Self::Equal => write!(f, "=="), + Self::NotEqual => write!(f, "!="), + Self::LessThan => write!(f, "<"), + Self::GreaterThan => write!(f, ">"), + Self::LessOrEqual => write!(f, "<="), + Self::GreaterOrEqual => write!(f, ">="), + Self::BitwiseAnd => write!(f, "&"), + Self::BitwiseOr => write!(f, "|"), + Self::BitwiseXor => write!(f, "^"), + Self::LogicalAnd => write!(f, "&&"), + Self::LogicalOr => write!(f, "||"), + Self::LeftShift => write!(f, "<<"), + Self::RightShift => write!(f, ">>"), } } } @@ -269,17 +369,29 @@ impl fmt::Display for BinaryOperator { pub enum UnaryOperator { Plus, Minus, - Reference, + AddressOf, Dereference, + CastAs, + BitwiseNot, + LogicalNot, + Increment, + Decrement, + SizeOf, } impl fmt::Display for UnaryOperator { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - UnaryOperator::Plus => write!(f, "+"), - UnaryOperator::Minus => write!(f, "-"), - UnaryOperator::Dereference => write!(f, "*"), - UnaryOperator::Reference => write!(f, "&"), + Self::Increment => write!(f, "++"), + Self::Decrement => write!(f, "--"), + Self::Plus => write!(f, "+"), + Self::Minus => write!(f, "-"), + Self::Dereference => write!(f, "*"), + Self::AddressOf => write!(f, "&"), + Self::CastAs => write!(f, "as"), + Self::BitwiseNot => write!(f, "~"), + Self::LogicalNot => write!(f, "!"), + Self::SizeOf => write!(f, "sizeof"), } } }