- renamed assembler_runner to just assembler

- implemented type parsing including custom types and generics (useless
  for now as we do no semantic analysis)
- implemented struct literal parsing
- implemented struct definition parsing (no generics yet)
- implemented tuple parsing
- registers are now allocated starting from zero
- updated to-dos
This commit is contained in:
2026-02-10 10:03:48 +00:00
parent 509b3465f1
commit 931af90789
8 changed files with 316 additions and 71 deletions
+1 -1
View File
@@ -5,7 +5,7 @@ edition.workspace = true
authors.workspace = true authors.workspace = true
[[bin]] [[bin]]
name = "assembler_runner" name = "assembler"
path = "src/main.rs" path = "src/main.rs"
[lib] [lib]
+11
View File
@@ -65,6 +65,10 @@ impl CodeGenerator {
Declaration::Dependency(Dependency { name, .. }) => { Declaration::Dependency(Dependency { name, .. }) => {
self.symbols.push(name) self.symbols.push(name)
} }
Declaration::Struct { .. } => {} /* we can't do any code generation for
* a struct yet. we may need to later
* once these become class-like
* objects with implementations */
} }
} }
@@ -164,6 +168,8 @@ impl CodeGenerator {
Declaration::Dependency(Dependency { name, path }) => { Declaration::Dependency(Dependency { name, path }) => {
self.imports.insert(name, path); self.imports.insert(name, path);
} }
Declaration::Struct { .. } => {} /* can't do any codegen for these yet,
* they're just types. */
}; };
Ok(()) Ok(())
@@ -536,6 +542,11 @@ impl CodeGenerator {
} }
Expression::ArrayLiteral { elements, type_id } => todo!(), Expression::ArrayLiteral { elements, type_id } => todo!(),
Expression::StructLiteral {
name,
fields,
type_id,
} => todo!(),
Expression::Variable { name, .. } => { Expression::Variable { name, .. } => {
if self.is_global(&name.name) { if self.is_global(&name.name) {
+23 -18
View File
@@ -18,7 +18,7 @@ pub struct RegisterAllocator {
stack_offset: i32, stack_offset: i32,
/// Track which registers are currently in use /// Track which registers are currently in use
in_use: HashMap<Register, bool>, in_use: Vec<(Register, bool)>,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -85,7 +85,7 @@ impl RegisterAllocator {
// println!("finding! {:#?}", self.in_use); // println!("finding! {:#?}", self.in_use);
if let Some(reg) = self.find_free_register() { if let Some(reg) = self.find_free_register() {
self.in_use.insert(reg, true); self.in_use[reg as usize].1 = true;
return Ok((reg, Vec::new())); return Ok((reg, Vec::new()));
} }
@@ -133,7 +133,7 @@ impl RegisterAllocator {
// This is a true temporary - safe to free // This is a true temporary - safe to free
if !matches!(reg, Register::Zero | Register::Null) { if !matches!(reg, Register::Zero | Register::Null) {
self.in_use.insert(reg, false); self.in_use[reg as usize].1 = false;
} }
} }
@@ -144,7 +144,7 @@ impl RegisterAllocator {
&& !matches!(reg, Register::Zero | Register::Null) && !matches!(reg, Register::Zero | Register::Null)
{ {
self.register_contents.remove(&reg); self.register_contents.remove(&reg);
self.in_use.insert(reg, false); self.in_use[reg as usize].1 = false;
} }
self.variable_locations.remove(var); self.variable_locations.remove(var);
@@ -252,7 +252,7 @@ impl RegisterAllocator {
.insert(var_name.to_string(), Location::register(*source_reg)); .insert(var_name.to_string(), Location::register(*source_reg));
self.register_contents self.register_contents
.insert(*source_reg, var_name.to_string()); .insert(*source_reg, var_name.to_string());
self.in_use.insert(*source_reg, true); self.in_use[*source_reg as usize].1 = true;
return Vec::new(); return Vec::new();
} }
@@ -264,7 +264,7 @@ impl RegisterAllocator {
.insert(var_name.to_string(), Location::register(free_reg)); .insert(var_name.to_string(), Location::register(free_reg));
self.register_contents self.register_contents
.insert(free_reg.clone(), var_name.to_string()); .insert(free_reg.clone(), var_name.to_string());
self.in_use.insert(free_reg, true); self.in_use[free_reg as usize].1 = true;
return vec![format!("\tmov {}, {}", source_reg, free_reg)]; return vec![format!("\tmov {}, {}", source_reg, free_reg)];
} }
@@ -459,26 +459,31 @@ impl RegisterAllocator {
pub fn get_caller_saved_registers(&self) -> Vec<Register> { pub fn get_caller_saved_registers(&self) -> Vec<Register> {
self.register_contents self.register_contents
.iter() .iter()
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false)) .filter(|(reg, _)| {
self.in_use
.get(**reg as usize)
.unwrap_or(&(Register::Null, false))
.1
})
.map(|(reg, _)| reg.clone()) .map(|(reg, _)| reg.clone())
.collect() .collect()
} }
/// Save caller-saved registers before a function call /// Save caller-saved registers before a function call
/// Returns assembly code to save them /// Returns assembly code to save them
pub fn _save_caller_saved(&mut self) -> Vec<String> { // pub fn _save_caller_saved(&mut self) -> Vec<String> {
let mut code = Vec::new(); // let mut code = Vec::new();
// For simplicity, save all currently used registers // // For simplicity, save all currently used registers
// In a more sophisticated compiler, you'd only save registers that are live // // In a more sophisticated compiler, you'd only save registers that are live
for (reg, _) in self.register_contents.clone() { // for (reg, _) in self.register_contents.clone() {
if *self.in_use.get(&reg).unwrap_or(&false) { // if *self.in_use.get(reg as usize).unwrap_or(&false) {
code.push(format!("\tpush {}", reg)); // code.push(format!("\tpush {}", reg));
} // }
} // }
code // code
} // }
/// Restore caller-saved registers after a function call /// Restore caller-saved registers after a function call
/// Returns assembly code to restore them /// Returns assembly code to restore them
+3
View File
@@ -18,6 +18,7 @@ pub enum Token {
Const, Const,
As, As,
SizeOf, SizeOf,
Struct,
// Identifiers and literals // Identifiers and literals
Identifier(Name), Identifier(Name),
@@ -104,6 +105,7 @@ impl Token {
Token::Const => "Const", Token::Const => "Const",
Token::Static => "Static", Token::Static => "Static",
Token::Include => "Include", Token::Include => "Include",
Token::Struct => "Struct",
Token::Fn => "Fn", Token::Fn => "Fn",
Token::If => "If", Token::If => "If",
Token::Let => "Let", Token::Let => "Let",
@@ -376,6 +378,7 @@ impl<'a> Lexer<'a> {
"static" => Some(Token::Static), "static" => Some(Token::Static),
"as" => Some(Token::As), "as" => Some(Token::As),
"sizeof" => Some(Token::SizeOf), "sizeof" => Some(Token::SizeOf),
"struct" => Some(Token::Struct),
_ => None, _ => None,
} }
} }
+1 -1
View File
@@ -13,7 +13,7 @@ pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
let lexer = lexer::Lexer::new(&input); let lexer = lexer::Lexer::new(&input);
let tokens = lexer.collect::<Vec<_>>(); let tokens = lexer.collect::<Vec<_>>();
// println!("{tokens:?}"); println!("{tokens:#?}");
log(&format!("Parsing {} Tokens...", tokens.len())); log(&format!("Parsing {} Tokens...", tokens.len()));
+170 -34
View File
@@ -39,6 +39,10 @@ impl Parser {
return self.parse_func(); return self.parse_func();
} }
if expect_tt!(self.peek_next()?, Struct).accepted() {
return self.parse_struct();
}
if expect_tt!(self.peek_next()?, Include).accepted() { if expect_tt!(self.peek_next()?, Include).accepted() {
// expect include keyword // expect include keyword
let _ = self.next(); let _ = self.next();
@@ -99,6 +103,28 @@ impl Parser {
ParseResult::Reject(CompilerError::UnexpectedEndOfInput) ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
} }
fn parse_struct(&mut self) -> ParseResult<Declaration, CompilerError> {
let _ = expect_tt!(self.next()?, Struct)?;
let name = expect_value!(self.next()?, Identifier)?;
let _ = expect_tt!(self.next()?, LeftBrace)?;
let mut fields = Vec::new();
while expect_tt!(self.peek_next()?, Identifier).accepted() {
let arg = self.parse_var_decl()?;
fields.push(arg);
if expect_tt!(self.peek_next()?, Comma).accepted() {
self.next()?;
} else {
break;
}
}
let _ = expect_tt!(self.next()?, RightBrace)?;
return ParseResult::Accept(Declaration::Struct { name, fields });
}
fn parse_func(&mut self) -> ParseResult<Declaration, CompilerError> { fn parse_func(&mut self) -> ParseResult<Declaration, CompilerError> {
// expect function keyword // expect function keyword
let _ = expect_tt!(self.next()?, Fn); let _ = expect_tt!(self.next()?, Fn);
@@ -318,18 +344,28 @@ impl Parser {
}); });
} }
// handle assignment without "let" // handle an in-place function call
let name = expect_value!(self.peek_next()?, Identifier); if let ParseResult::Accept(name) = expect_value!(self.peek_next()?, Identifier)
if name.accepted() { && let ParseResult::Accept(operator) = expect_tt!(
let varname = name?; self.peek(1)?,
if expect_tt!(self.peek(1)?, LeftParen).accepted() { Assign,
let expr = self.parse_expression()?; // a function call expr PlusEqual,
let _ = expect_tt!(self.next()?, Semicolon)?; MinusEqual,
return ParseResult::Accept(Statement::Expression { expr }); StarEqual,
} SlashEqual,
PercentEqual,
AndEqual,
OrEqual,
XorEqual,
ShlEqual,
ShrEqual
)
{
// consume name token
self.next()?; self.next()?;
let operator = match self.peek_next()? {
// pattern match to find operator
let operator = match operator {
Token::Assign => AssignmentOperator::Assign, Token::Assign => AssignmentOperator::Assign,
Token::PlusEqual => AssignmentOperator::AddAssign, Token::PlusEqual => AssignmentOperator::AddAssign,
Token::MinusEqual => AssignmentOperator::SubAssign, Token::MinusEqual => AssignmentOperator::SubAssign,
@@ -348,6 +384,7 @@ impl Parser {
} }
}; };
// consume operator token
self.next()?; self.next()?;
let value = self.parse_expression()?; let value = self.parse_expression()?;
@@ -355,15 +392,17 @@ impl Parser {
let _ = expect_tt!(self.next()?, Semicolon); let _ = expect_tt!(self.next()?, Semicolon);
return ParseResult::Accept(Statement::Assign { return ParseResult::Accept(Statement::Assign {
varname: varname.name, varname: name.name,
operator, operator,
value, value,
}); });
} }
ParseResult::Reject(CompilerError::UnexpectedToken( // parse an expression and a semicolon
self.peek_next()?.tt().to_string(), let expr = self.parse_expression()?;
)) let _ = expect_tt!(self.next()?, Semicolon)?;
return ParseResult::Accept(Statement::Expression { expr });
} }
fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> { fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> {
@@ -374,7 +413,7 @@ impl Parser {
let left = self.parse_logical_and()?; let left = self.parse_logical_and()?;
let op = match self.peek_next()? { let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::LogicalOr, Token::LogicalOr => BinaryOperator::LogicalOr,
_ => return ParseResult::Accept(left), _ => return ParseResult::Accept(left),
}; };
@@ -391,7 +430,7 @@ impl Parser {
let left = self.parse_bitwise_or()?; let left = self.parse_bitwise_or()?;
let op = match self.peek_next()? { let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::LogicalAnd, Token::LogicalAnd => BinaryOperator::LogicalAnd,
_ => return ParseResult::Accept(left), _ => return ParseResult::Accept(left),
}; };
@@ -408,7 +447,7 @@ impl Parser {
let left = self.parse_bitwise_xor()?; let left = self.parse_bitwise_xor()?;
let op = match self.peek_next()? { let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::BitwiseOr, Token::Pipe => BinaryOperator::BitwiseOr,
_ => return ParseResult::Accept(left), _ => return ParseResult::Accept(left),
}; };
@@ -425,7 +464,7 @@ impl Parser {
let left = self.parse_bitwise_and()?; let left = self.parse_bitwise_and()?;
let op = match self.peek_next()? { let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::BitwiseXor, Token::Caret => BinaryOperator::BitwiseXor,
_ => return ParseResult::Accept(left), _ => return ParseResult::Accept(left),
}; };
@@ -678,9 +717,39 @@ impl Parser {
Token::Identifier(name) => { Token::Identifier(name) => {
self.next()?; self.next()?;
ParseResult::Accept(Expression::Variable {
// if the next token isn't the beginning of a struct literal this is just
// an identifier.
if !expect_tt!(self.peek_next()?, LeftBrace).accepted() {
return ParseResult::Accept(Expression::Variable {
name,
expr_type: None,
});
}
let _ = self.next()?;
let mut fields = Vec::new();
while !expect_tt!(self.peek_next()?, RightBrace).accepted() {
let name = expect_value!(self.next()?, Identifier)?;
let _ = expect_tt!(self.next()?, Colon)?;
let expr = self.parse_expression()?;
fields.push((name, expr));
if expect_tt!(self.peek_next()?, Comma).accepted() {
self.next()?;
} else {
break;
}
}
let _ = expect_tt!(self.next()?, RightBrace)?;
ParseResult::Accept(Expression::StructLiteral {
name, name,
expr_type: None, fields,
type_id: None,
}) })
} }
Token::LeftBracket => { Token::LeftBracket => {
@@ -729,21 +798,88 @@ impl Parser {
} }
fn parse_type(&mut self) -> ParseResult<TypeId, CompilerError> { fn parse_type(&mut self) -> ParseResult<TypeId, CompilerError> {
// get the type name incl namespace println!("yes {:?}", self.peek_next()?);
let typename = expect_value!(self.next()?, Identifier)?;
match typename.name.as_str() { // parse primitive or named type
"u32" => ParseResult::Accept(TypeId::U32), if expect_tt!(self.peek_next()?, Identifier).accepted() {
"u16" => ParseResult::Accept(TypeId::U16), return self.parse_type_identifier();
"u8" => ParseResult::Accept(TypeId::U8),
"i32" => ParseResult::Accept(TypeId::I32),
"i16" => ParseResult::Accept(TypeId::I16),
"i8" => ParseResult::Accept(TypeId::I8),
"void" => ParseResult::Accept(TypeId::Void),
"char" => ParseResult::Accept(TypeId::Char),
"str" => ParseResult::Accept(TypeId::Ptr(Box::new(TypeId::Char))),
_ => todo!("Implement parsing for other types!!"),
} }
// parse array type
if expect_tt!(self.peek_next()?, LeftBracket).accepted() {
let _ = self.next()?;
let internal_type = self.parse_type()?;
let _ = expect_tt!(self.next()?, Semicolon)?;
let size = expect_value!(self.next()?, Integer)?;
let _ = expect_tt!(self.next()?, RightBracket)?;
return ParseResult::Accept(TypeId::Array {
r#type: Box::new(internal_type),
size: size as usize,
});
}
// parse tuple type
if expect_tt!(self.peek_next()?, LeftParen).accepted() {
let _ = self.next()?;
let mut types = Vec::new();
while !expect_tt!(self.peek_next()?, RightParen).accepted() {
types.push(self.parse_type()?);
if !expect_tt!(self.peek_next()?, Comma).accepted() {
break;
}
let _ = self.next()?;
}
let _ = expect_tt!(self.next()?, RightParen)?;
return ParseResult::Accept(TypeId::Tuple(types));
}
ParseResult::Reject(CompilerError::Generic(format!(
"Parsing type but no valid type was detected: {:?}",
self.peek_next()?
)))
}
fn parse_type_identifier(&mut self) -> ParseResult<TypeId, CompilerError> {
// get the type name incl namespace
let name = expect_value!(self.next()?, Identifier)?;
let type_id = match name.name.as_str() {
"u32" => TypeId::U32,
"u16" => TypeId::U16,
"u8" => TypeId::U8,
"i32" => TypeId::I32,
"i16" => TypeId::I16,
"i8" => TypeId::I8,
"void" => TypeId::Void,
"char" => TypeId::Char,
"str" => TypeId::Ptr(Box::new(TypeId::Char)),
_ => {
let mut generics = Vec::new();
if expect_tt!(self.peek_next()?, Less).accepted() {
let _ = self.next()?;
// loop until we find the closing '>'
while !expect_tt!(self.peek_next()?, Greater).accepted() {
generics.push(self.parse_type()?);
if !expect_tt!(self.peek_next()?, Comma).accepted() {
break;
}
let _ = self.next()?;
}
let _ = expect_tt!(self.next()?, Greater)?;
}
TypeId::UnknownCustom { name, generics }
}
};
ParseResult::Accept(type_id)
} }
fn next(&mut self) -> ParseResult<Token, CompilerError> { fn next(&mut self) -> ParseResult<Token, CompilerError> {
+86 -12
View File
@@ -40,6 +40,10 @@ pub enum Declaration {
is_const: bool, is_const: bool,
}, },
Dependency(Dependency), Dependency(Dependency),
Struct {
name: Name,
fields: Vec<Variable>,
},
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -62,8 +66,20 @@ pub enum TypeId {
Void, Void,
Ptr(Box<TypeId>), Ptr(Box<TypeId>),
Ref(Box<TypeId>), Ref(Box<TypeId>),
Array(Box<TypeId>, usize), Tuple(Vec<TypeId>),
Struct { name: Name, fields: Vec<TypeId> }, Array {
r#type: Box<TypeId>,
size: usize,
},
UnknownCustom {
name: Name,
generics: Vec<TypeId>,
},
Struct {
name: Name,
fields: Vec<TypeId>,
generics: Vec<TypeId>,
},
} }
impl TypeId { impl TypeId {
@@ -80,7 +96,10 @@ impl TypeId {
Self::Void => 0, Self::Void => 0,
Self::Ptr(t) => t.size(), Self::Ptr(t) => t.size(),
Self::Ref(t) => t.size(), Self::Ref(t) => t.size(),
Self::Array(t, size) => t.size() * size, Self::Tuple(types) => types.iter().map(|t| t.size()).sum(),
Self::Array { r#type, size } => r#type.size() * size,
Self::UnknownCustom { .. } => 1, /* TODO: calculate type size during */
// semantic analysis
Self::Struct { fields, .. } => fields.iter().map(|t| t.size()).sum(), Self::Struct { fields, .. } => fields.iter().map(|t| t.size()).sum(),
} }
} }
@@ -100,14 +119,47 @@ impl fmt::Display for TypeId {
Self::Void => write!(f, "void"), Self::Void => write!(f, "void"),
Self::Ptr(t) => write!(f, "*{}", t), Self::Ptr(t) => write!(f, "*{}", t),
Self::Ref(t) => write!(f, "&{}", t), Self::Ref(t) => write!(f, "&{}", t),
Self::Array(t, len) => write!(f, "[{}; {}]", t, len), Self::Tuple(elems) => write!(
Self::Struct { name, fields } => { f,
write!(f, "struct {} {{", name)?; "({})",
for (i, field) in fields.iter().enumerate() { elems
write!(f, "{}: {}", i, field)?; .iter()
} .map(|t| t.to_string())
write!(f, "}}") .collect::<Vec<String>>()
.join(", ")
),
Self::Array { r#type, size } => write!(f, "[{}; {}]", r#type, size),
Self::UnknownCustom { name, generics } => {
write!(
f,
"{}<{}>",
name,
generics
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", ")
)
} }
Self::Struct {
name,
fields,
generics,
} => write!(
f,
"struct<{}> {} {{{}}}",
generics
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", "),
name,
fields
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", ")
),
} }
} }
} }
@@ -241,6 +293,11 @@ pub enum Expression {
elements: Vec<Expression>, elements: Vec<Expression>,
type_id: Option<TypeId>, type_id: Option<TypeId>,
}, },
StructLiteral {
name: Name,
fields: Vec<(Name, Expression)>,
type_id: Option<TypeId>,
},
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -266,9 +323,12 @@ impl Expression {
expr.is_pure() && index.is_pure() expr.is_pure() && index.is_pure()
} }
Expression::MemberAccess { expr, .. } => expr.is_pure(), Expression::MemberAccess { expr, .. } => expr.is_pure(),
Expression::ArrayLiteral { elements, type_id } => { Expression::ArrayLiteral { elements, .. } => {
elements.iter().all(|element| element.is_pure()) elements.iter().all(|element| element.is_pure())
} }
Expression::StructLiteral { fields, .. } => {
fields.iter().all(|(_, expr)| expr.is_pure())
}
} }
} }
@@ -304,7 +364,21 @@ impl Expression {
let element_type = elements let element_type = elements
.first() .first()
.map_or(TypeId::Void, |e| e.type_id().unwrap_or(TypeId::Void)); .map_or(TypeId::Void, |e| e.type_id().unwrap_or(TypeId::Void));
Ok(TypeId::Array(Box::new(element_type), elements.len())) Ok(TypeId::Array {
r#type: Box::new(element_type),
size: elements.len(),
})
}
Expression::StructLiteral { name, fields, .. } => {
let fields = fields
.iter()
.map(|(_, expr)| expr.type_id())
.collect::<Result<Vec<_>, _>>()?;
Ok(TypeId::Struct {
name: name.clone(),
fields,
generics: Vec::new(),
})
} }
} }
} }
+21 -5
View File
@@ -1,10 +1,26 @@
# General TODO's
# Compiler optimisations! # Bugfixes
- [x] [EASY] Investigate logical and operator not compiling - either a lexer or parser issue.
- **note**: this was a parser issue.
# Missing features
- [x] [MEDIUM] Get shift operations working correctly.
- [ ] [MEDIUM] proper prefix/postfix inc/dec implementation. slightly more complex as we need to check for a variable and modify it in place
- [ ] [EASY] Add multiply and divide operations to code generation
- **note**: very easy to do but our division algorithm is hopelessly slow so not worth doing for now.
# Performance Improvements
- [ ] [MEDIUM] implement a proper div/mod library that's not slow af.
- [ ] [HARD] Immediate operations for values that support it (up to +/- u16::max for addi and subi respectively) - [ ] [HARD] Immediate operations for values that support it (up to +/- u16::max for addi and subi respectively)
- this requires significant complexity in code generation as we need to traverse down the tree when we come across these operations to prevent additional register allocations. - this requires significant complexity in code generation as we need to traverse down the tree when we come across these operations to prevent additional register allocations.
- [ ] [EASY] Add multiply and divide operations to code generation # Compiler optimisations
- [ ] [MEDIUM] proper prefix/postfix inc/dec implementation. slightly more complex as we need to check for a variable and modify it in place
- [ ] [EASY] Investigate logical and operator not compiling - either a lexer or parser issue. # Codegen improvements
- [x] [MEDIUM] Get shift operations working correctly. - [ ] [MEDIUM / time consuming] Add scoping to code generation
- [ ] [MEDIUM / time consuming] Rewrite entire codegen to imrpove code quality and make the code more readable.
- [ ] type-safe instruction builder
- [ ] Instruction & Register enums
- [ ] Instruction builder helper fns eg `fn add(left: &Register, right: &Register, dest: &Register) -> Instruction`
- [ ] Instruction Block types.