From 931af90789a0f2900ac443250da6aed92e21f548 Mon Sep 17 00:00:00 2001 From: zxq5 Date: Tue, 10 Feb 2026 10:03:48 +0000 Subject: [PATCH] - renamed assembler_runner to just assembler - implemented type parsing including custom types and generics (useless for now as we do no semantic analysis) - implemented struct literal parsing - implemented struct definition parsing (no generics yet) - implemented tuple parsing - registers are now allocated starting from zero - updated to-dos --- assembler/Cargo.toml | 2 +- compiler/src/backend/dsa/codegen.rs | 11 ++ compiler/src/backend/dsa/registers.rs | 41 +++--- compiler/src/frontend/dsc/lexer.rs | 3 + compiler/src/frontend/dsc/mod.rs | 2 +- compiler/src/frontend/dsc/parser.rs | 204 +++++++++++++++++++++----- compiler/src/model.rs | 98 +++++++++++-- docs/todo.md | 26 +++- 8 files changed, 316 insertions(+), 71 deletions(-) diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index 676d37d..0b577da 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -5,7 +5,7 @@ edition.workspace = true authors.workspace = true [[bin]] -name = "assembler_runner" +name = "assembler" path = "src/main.rs" [lib] diff --git a/compiler/src/backend/dsa/codegen.rs b/compiler/src/backend/dsa/codegen.rs index 7ba886c..4ca3c8e 100644 --- a/compiler/src/backend/dsa/codegen.rs +++ b/compiler/src/backend/dsa/codegen.rs @@ -65,6 +65,10 @@ impl CodeGenerator { Declaration::Dependency(Dependency { name, .. }) => { self.symbols.push(name) } + Declaration::Struct { .. } => {} /* we can't do any code generation for + * a struct yet. we may need to later + * once these become class-like + * objects with implementations */ } } @@ -164,6 +168,8 @@ impl CodeGenerator { Declaration::Dependency(Dependency { name, path }) => { self.imports.insert(name, path); } + Declaration::Struct { .. } => {} /* can't do any codegen for these yet, + * they're just types. */ }; Ok(()) @@ -536,6 +542,11 @@ impl CodeGenerator { } Expression::ArrayLiteral { elements, type_id } => todo!(), + Expression::StructLiteral { + name, + fields, + type_id, + } => todo!(), Expression::Variable { name, .. } => { if self.is_global(&name.name) { diff --git a/compiler/src/backend/dsa/registers.rs b/compiler/src/backend/dsa/registers.rs index 90da055..2436b8c 100644 --- a/compiler/src/backend/dsa/registers.rs +++ b/compiler/src/backend/dsa/registers.rs @@ -18,7 +18,7 @@ pub struct RegisterAllocator { stack_offset: i32, /// Track which registers are currently in use - in_use: HashMap, + in_use: Vec<(Register, bool)>, } #[derive(Debug, Clone)] @@ -85,7 +85,7 @@ impl RegisterAllocator { // println!("finding! {:#?}", self.in_use); if let Some(reg) = self.find_free_register() { - self.in_use.insert(reg, true); + self.in_use[reg as usize].1 = true; return Ok((reg, Vec::new())); } @@ -133,7 +133,7 @@ impl RegisterAllocator { // This is a true temporary - safe to free if !matches!(reg, Register::Zero | Register::Null) { - self.in_use.insert(reg, false); + self.in_use[reg as usize].1 = false; } } @@ -144,7 +144,7 @@ impl RegisterAllocator { && !matches!(reg, Register::Zero | Register::Null) { self.register_contents.remove(®); - self.in_use.insert(reg, false); + self.in_use[reg as usize].1 = false; } self.variable_locations.remove(var); @@ -252,7 +252,7 @@ impl RegisterAllocator { .insert(var_name.to_string(), Location::register(*source_reg)); self.register_contents .insert(*source_reg, var_name.to_string()); - self.in_use.insert(*source_reg, true); + self.in_use[*source_reg as usize].1 = true; return Vec::new(); } @@ -264,7 +264,7 @@ impl RegisterAllocator { .insert(var_name.to_string(), Location::register(free_reg)); self.register_contents .insert(free_reg.clone(), var_name.to_string()); - self.in_use.insert(free_reg, true); + self.in_use[free_reg as usize].1 = true; return vec![format!("\tmov {}, {}", source_reg, free_reg)]; } @@ -459,26 +459,31 @@ impl RegisterAllocator { pub fn get_caller_saved_registers(&self) -> Vec { self.register_contents .iter() - .filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false)) + .filter(|(reg, _)| { + self.in_use + .get(**reg as usize) + .unwrap_or(&(Register::Null, false)) + .1 + }) .map(|(reg, _)| reg.clone()) .collect() } /// Save caller-saved registers before a function call /// Returns assembly code to save them - pub fn _save_caller_saved(&mut self) -> Vec { - let mut code = Vec::new(); + // pub fn _save_caller_saved(&mut self) -> Vec { + // let mut code = Vec::new(); - // For simplicity, save all currently used registers - // In a more sophisticated compiler, you'd only save registers that are live - for (reg, _) in self.register_contents.clone() { - if *self.in_use.get(®).unwrap_or(&false) { - code.push(format!("\tpush {}", reg)); - } - } + // // For simplicity, save all currently used registers + // // In a more sophisticated compiler, you'd only save registers that are live + // for (reg, _) in self.register_contents.clone() { + // if *self.in_use.get(reg as usize).unwrap_or(&false) { + // code.push(format!("\tpush {}", reg)); + // } + // } - code - } + // code + // } /// Restore caller-saved registers after a function call /// Returns assembly code to restore them diff --git a/compiler/src/frontend/dsc/lexer.rs b/compiler/src/frontend/dsc/lexer.rs index 5805f6a..a5369ec 100644 --- a/compiler/src/frontend/dsc/lexer.rs +++ b/compiler/src/frontend/dsc/lexer.rs @@ -18,6 +18,7 @@ pub enum Token { Const, As, SizeOf, + Struct, // Identifiers and literals Identifier(Name), @@ -104,6 +105,7 @@ impl Token { Token::Const => "Const", Token::Static => "Static", Token::Include => "Include", + Token::Struct => "Struct", Token::Fn => "Fn", Token::If => "If", Token::Let => "Let", @@ -376,6 +378,7 @@ impl<'a> Lexer<'a> { "static" => Some(Token::Static), "as" => Some(Token::As), "sizeof" => Some(Token::SizeOf), + "struct" => Some(Token::Struct), _ => None, } } diff --git a/compiler/src/frontend/dsc/mod.rs b/compiler/src/frontend/dsc/mod.rs index 5718e6f..7778fa8 100644 --- a/compiler/src/frontend/dsc/mod.rs +++ b/compiler/src/frontend/dsc/mod.rs @@ -13,7 +13,7 @@ pub fn generate_ast(input: &str) -> Result { let lexer = lexer::Lexer::new(&input); let tokens = lexer.collect::>(); - // println!("{tokens:?}"); + println!("{tokens:#?}"); log(&format!("Parsing {} Tokens...", tokens.len())); diff --git a/compiler/src/frontend/dsc/parser.rs b/compiler/src/frontend/dsc/parser.rs index 78cf2a9..009b521 100644 --- a/compiler/src/frontend/dsc/parser.rs +++ b/compiler/src/frontend/dsc/parser.rs @@ -39,6 +39,10 @@ impl Parser { return self.parse_func(); } + if expect_tt!(self.peek_next()?, Struct).accepted() { + return self.parse_struct(); + } + if expect_tt!(self.peek_next()?, Include).accepted() { // expect include keyword let _ = self.next(); @@ -99,6 +103,28 @@ impl Parser { ParseResult::Reject(CompilerError::UnexpectedEndOfInput) } + fn parse_struct(&mut self) -> ParseResult { + let _ = expect_tt!(self.next()?, Struct)?; + let name = expect_value!(self.next()?, Identifier)?; + + let _ = expect_tt!(self.next()?, LeftBrace)?; + + let mut fields = Vec::new(); + while expect_tt!(self.peek_next()?, Identifier).accepted() { + let arg = self.parse_var_decl()?; + fields.push(arg); + + if expect_tt!(self.peek_next()?, Comma).accepted() { + self.next()?; + } else { + break; + } + } + + let _ = expect_tt!(self.next()?, RightBrace)?; + return ParseResult::Accept(Declaration::Struct { name, fields }); + } + fn parse_func(&mut self) -> ParseResult { // expect function keyword let _ = expect_tt!(self.next()?, Fn); @@ -318,18 +344,28 @@ impl Parser { }); } - // handle assignment without "let" - let name = expect_value!(self.peek_next()?, Identifier); - if name.accepted() { - let varname = name?; - if expect_tt!(self.peek(1)?, LeftParen).accepted() { - let expr = self.parse_expression()?; // a function call expr - let _ = expect_tt!(self.next()?, Semicolon)?; - return ParseResult::Accept(Statement::Expression { expr }); - } - + // handle an in-place function call + if let ParseResult::Accept(name) = expect_value!(self.peek_next()?, Identifier) + && let ParseResult::Accept(operator) = expect_tt!( + self.peek(1)?, + Assign, + PlusEqual, + MinusEqual, + StarEqual, + SlashEqual, + PercentEqual, + AndEqual, + OrEqual, + XorEqual, + ShlEqual, + ShrEqual + ) + { + // consume name token self.next()?; - let operator = match self.peek_next()? { + + // pattern match to find operator + let operator = match operator { Token::Assign => AssignmentOperator::Assign, Token::PlusEqual => AssignmentOperator::AddAssign, Token::MinusEqual => AssignmentOperator::SubAssign, @@ -348,6 +384,7 @@ impl Parser { } }; + // consume operator token self.next()?; let value = self.parse_expression()?; @@ -355,15 +392,17 @@ impl Parser { let _ = expect_tt!(self.next()?, Semicolon); return ParseResult::Accept(Statement::Assign { - varname: varname.name, + varname: name.name, operator, value, }); } - ParseResult::Reject(CompilerError::UnexpectedToken( - self.peek_next()?.tt().to_string(), - )) + // parse an expression and a semicolon + let expr = self.parse_expression()?; + let _ = expect_tt!(self.next()?, Semicolon)?; + + return ParseResult::Accept(Statement::Expression { expr }); } fn parse_expression(&mut self) -> ParseResult { @@ -374,7 +413,7 @@ impl Parser { let left = self.parse_logical_and()?; let op = match self.peek_next()? { - Token::Ampersand => BinaryOperator::LogicalOr, + Token::LogicalOr => BinaryOperator::LogicalOr, _ => return ParseResult::Accept(left), }; @@ -391,7 +430,7 @@ impl Parser { let left = self.parse_bitwise_or()?; let op = match self.peek_next()? { - Token::Ampersand => BinaryOperator::LogicalAnd, + Token::LogicalAnd => BinaryOperator::LogicalAnd, _ => return ParseResult::Accept(left), }; @@ -408,7 +447,7 @@ impl Parser { let left = self.parse_bitwise_xor()?; let op = match self.peek_next()? { - Token::Ampersand => BinaryOperator::BitwiseOr, + Token::Pipe => BinaryOperator::BitwiseOr, _ => return ParseResult::Accept(left), }; @@ -425,7 +464,7 @@ impl Parser { let left = self.parse_bitwise_and()?; let op = match self.peek_next()? { - Token::Ampersand => BinaryOperator::BitwiseXor, + Token::Caret => BinaryOperator::BitwiseXor, _ => return ParseResult::Accept(left), }; @@ -678,9 +717,39 @@ impl Parser { Token::Identifier(name) => { self.next()?; - ParseResult::Accept(Expression::Variable { + + // if the next token isn't the beginning of a struct literal this is just + // an identifier. + if !expect_tt!(self.peek_next()?, LeftBrace).accepted() { + return ParseResult::Accept(Expression::Variable { + name, + expr_type: None, + }); + } + + let _ = self.next()?; + + let mut fields = Vec::new(); + while !expect_tt!(self.peek_next()?, RightBrace).accepted() { + let name = expect_value!(self.next()?, Identifier)?; + let _ = expect_tt!(self.next()?, Colon)?; + let expr = self.parse_expression()?; + + fields.push((name, expr)); + + if expect_tt!(self.peek_next()?, Comma).accepted() { + self.next()?; + } else { + break; + } + } + + let _ = expect_tt!(self.next()?, RightBrace)?; + + ParseResult::Accept(Expression::StructLiteral { name, - expr_type: None, + fields, + type_id: None, }) } Token::LeftBracket => { @@ -729,21 +798,88 @@ impl Parser { } fn parse_type(&mut self) -> ParseResult { - // get the type name incl namespace - let typename = expect_value!(self.next()?, Identifier)?; + println!("yes {:?}", self.peek_next()?); - match typename.name.as_str() { - "u32" => ParseResult::Accept(TypeId::U32), - "u16" => ParseResult::Accept(TypeId::U16), - "u8" => ParseResult::Accept(TypeId::U8), - "i32" => ParseResult::Accept(TypeId::I32), - "i16" => ParseResult::Accept(TypeId::I16), - "i8" => ParseResult::Accept(TypeId::I8), - "void" => ParseResult::Accept(TypeId::Void), - "char" => ParseResult::Accept(TypeId::Char), - "str" => ParseResult::Accept(TypeId::Ptr(Box::new(TypeId::Char))), - _ => todo!("Implement parsing for other types!!"), + // parse primitive or named type + if expect_tt!(self.peek_next()?, Identifier).accepted() { + return self.parse_type_identifier(); } + + // parse array type + if expect_tt!(self.peek_next()?, LeftBracket).accepted() { + let _ = self.next()?; + + let internal_type = self.parse_type()?; + let _ = expect_tt!(self.next()?, Semicolon)?; + + let size = expect_value!(self.next()?, Integer)?; + + let _ = expect_tt!(self.next()?, RightBracket)?; + + return ParseResult::Accept(TypeId::Array { + r#type: Box::new(internal_type), + size: size as usize, + }); + } + + // parse tuple type + if expect_tt!(self.peek_next()?, LeftParen).accepted() { + let _ = self.next()?; + + let mut types = Vec::new(); + while !expect_tt!(self.peek_next()?, RightParen).accepted() { + types.push(self.parse_type()?); + if !expect_tt!(self.peek_next()?, Comma).accepted() { + break; + } + let _ = self.next()?; + } + let _ = expect_tt!(self.next()?, RightParen)?; + + return ParseResult::Accept(TypeId::Tuple(types)); + } + + ParseResult::Reject(CompilerError::Generic(format!( + "Parsing type but no valid type was detected: {:?}", + self.peek_next()? + ))) + } + + fn parse_type_identifier(&mut self) -> ParseResult { + // get the type name incl namespace + let name = expect_value!(self.next()?, Identifier)?; + + let type_id = match name.name.as_str() { + "u32" => TypeId::U32, + "u16" => TypeId::U16, + "u8" => TypeId::U8, + "i32" => TypeId::I32, + "i16" => TypeId::I16, + "i8" => TypeId::I8, + "void" => TypeId::Void, + "char" => TypeId::Char, + "str" => TypeId::Ptr(Box::new(TypeId::Char)), + _ => { + let mut generics = Vec::new(); + if expect_tt!(self.peek_next()?, Less).accepted() { + let _ = self.next()?; + + // loop until we find the closing '>' + while !expect_tt!(self.peek_next()?, Greater).accepted() { + generics.push(self.parse_type()?); + if !expect_tt!(self.peek_next()?, Comma).accepted() { + break; + } + let _ = self.next()?; + } + let _ = expect_tt!(self.next()?, Greater)?; + } + + TypeId::UnknownCustom { name, generics } + } + }; + + ParseResult::Accept(type_id) } fn next(&mut self) -> ParseResult { diff --git a/compiler/src/model.rs b/compiler/src/model.rs index 1ad5cc4..83969b7 100644 --- a/compiler/src/model.rs +++ b/compiler/src/model.rs @@ -40,6 +40,10 @@ pub enum Declaration { is_const: bool, }, Dependency(Dependency), + Struct { + name: Name, + fields: Vec, + }, } #[derive(Debug, Clone)] @@ -62,8 +66,20 @@ pub enum TypeId { Void, Ptr(Box), Ref(Box), - Array(Box, usize), - Struct { name: Name, fields: Vec }, + Tuple(Vec), + Array { + r#type: Box, + size: usize, + }, + UnknownCustom { + name: Name, + generics: Vec, + }, + Struct { + name: Name, + fields: Vec, + generics: Vec, + }, } impl TypeId { @@ -80,7 +96,10 @@ impl TypeId { Self::Void => 0, Self::Ptr(t) => t.size(), Self::Ref(t) => t.size(), - Self::Array(t, size) => t.size() * size, + Self::Tuple(types) => types.iter().map(|t| t.size()).sum(), + Self::Array { r#type, size } => r#type.size() * size, + Self::UnknownCustom { .. } => 1, /* TODO: calculate type size during */ + // semantic analysis Self::Struct { fields, .. } => fields.iter().map(|t| t.size()).sum(), } } @@ -100,14 +119,47 @@ impl fmt::Display for TypeId { Self::Void => write!(f, "void"), Self::Ptr(t) => write!(f, "*{}", t), Self::Ref(t) => write!(f, "&{}", t), - Self::Array(t, len) => write!(f, "[{}; {}]", t, len), - Self::Struct { name, fields } => { - write!(f, "struct {} {{", name)?; - for (i, field) in fields.iter().enumerate() { - write!(f, "{}: {}", i, field)?; - } - write!(f, "}}") + Self::Tuple(elems) => write!( + f, + "({})", + elems + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", ") + ), + Self::Array { r#type, size } => write!(f, "[{}; {}]", r#type, size), + Self::UnknownCustom { name, generics } => { + write!( + f, + "{}<{}>", + name, + generics + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", ") + ) } + Self::Struct { + name, + fields, + generics, + } => write!( + f, + "struct<{}> {} {{{}}}", + generics + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", "), + name, + fields + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(", ") + ), } } } @@ -241,6 +293,11 @@ pub enum Expression { elements: Vec, type_id: Option, }, + StructLiteral { + name: Name, + fields: Vec<(Name, Expression)>, + type_id: Option, + }, } #[derive(Debug, Clone)] @@ -266,9 +323,12 @@ impl Expression { expr.is_pure() && index.is_pure() } Expression::MemberAccess { expr, .. } => expr.is_pure(), - Expression::ArrayLiteral { elements, type_id } => { + Expression::ArrayLiteral { elements, .. } => { elements.iter().all(|element| element.is_pure()) } + Expression::StructLiteral { fields, .. } => { + fields.iter().all(|(_, expr)| expr.is_pure()) + } } } @@ -304,7 +364,21 @@ impl Expression { let element_type = elements .first() .map_or(TypeId::Void, |e| e.type_id().unwrap_or(TypeId::Void)); - Ok(TypeId::Array(Box::new(element_type), elements.len())) + Ok(TypeId::Array { + r#type: Box::new(element_type), + size: elements.len(), + }) + } + Expression::StructLiteral { name, fields, .. } => { + let fields = fields + .iter() + .map(|(_, expr)| expr.type_id()) + .collect::, _>>()?; + Ok(TypeId::Struct { + name: name.clone(), + fields, + generics: Vec::new(), + }) } } } diff --git a/docs/todo.md b/docs/todo.md index 4ecd16e..4d565eb 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -1,10 +1,26 @@ +# General TODO's -# Compiler optimisations! +# Bugfixes +- [x] [EASY] Investigate logical and operator not compiling - either a lexer or parser issue. + - **note**: this was a parser issue. +# Missing features +- [x] [MEDIUM] Get shift operations working correctly. +- [ ] [MEDIUM] proper prefix/postfix inc/dec implementation. slightly more complex as we need to check for a variable and modify it in place +- [ ] [EASY] Add multiply and divide operations to code generation + - **note**: very easy to do but our division algorithm is hopelessly slow so not worth doing for now. + +# Performance Improvements +- [ ] [MEDIUM] implement a proper div/mod library that's not slow af. - [ ] [HARD] Immediate operations for values that support it (up to +/- u16::max for addi and subi respectively) - this requires significant complexity in code generation as we need to traverse down the tree when we come across these operations to prevent additional register allocations. -- [ ] [EASY] Add multiply and divide operations to code generation -- [ ] [MEDIUM] proper prefix/postfix inc/dec implementation. slightly more complex as we need to check for a variable and modify it in place -- [ ] [EASY] Investigate logical and operator not compiling - either a lexer or parser issue. -- [x] [MEDIUM] Get shift operations working correctly. +# Compiler optimisations + +# Codegen improvements +- [ ] [MEDIUM / time consuming] Add scoping to code generation +- [ ] [MEDIUM / time consuming] Rewrite entire codegen to imrpove code quality and make the code more readable. + - [ ] type-safe instruction builder + - [ ] Instruction & Register enums + - [ ] Instruction builder helper fns eg `fn add(left: &Register, right: &Register, dest: &Register) -> Instruction` + - [ ] Instruction Block types.