Merge compiler and emulator progress from last few months into main. #11

Merged
zxq5 merged 55 commits from compiler into main 2026-02-14 11:54:15 +00:00
8 changed files with 316 additions and 71 deletions
Showing only changes of commit 931af90789 - Show all commits
+1 -1
View File
@@ -5,7 +5,7 @@ edition.workspace = true
authors.workspace = true
[[bin]]
name = "assembler_runner"
name = "assembler"
path = "src/main.rs"
[lib]
+11
View File
@@ -65,6 +65,10 @@ impl CodeGenerator {
Declaration::Dependency(Dependency { name, .. }) => {
self.symbols.push(name)
}
Declaration::Struct { .. } => {} /* we can't do any code generation for
* a struct yet. we may need to later
* once these become class-like
* objects with implementations */
}
}
@@ -164,6 +168,8 @@ impl CodeGenerator {
Declaration::Dependency(Dependency { name, path }) => {
self.imports.insert(name, path);
}
Declaration::Struct { .. } => {} /* can't do any codegen for these yet,
* they're just types. */
};
Ok(())
@@ -536,6 +542,11 @@ impl CodeGenerator {
}
Expression::ArrayLiteral { elements, type_id } => todo!(),
Expression::StructLiteral {
name,
fields,
type_id,
} => todo!(),
Expression::Variable { name, .. } => {
if self.is_global(&name.name) {
+23 -18
View File
@@ -18,7 +18,7 @@ pub struct RegisterAllocator {
stack_offset: i32,
/// Track which registers are currently in use
in_use: HashMap<Register, bool>,
in_use: Vec<(Register, bool)>,
}
#[derive(Debug, Clone)]
@@ -85,7 +85,7 @@ impl RegisterAllocator {
// println!("finding! {:#?}", self.in_use);
if let Some(reg) = self.find_free_register() {
self.in_use.insert(reg, true);
self.in_use[reg as usize].1 = true;
return Ok((reg, Vec::new()));
}
@@ -133,7 +133,7 @@ impl RegisterAllocator {
// This is a true temporary - safe to free
if !matches!(reg, Register::Zero | Register::Null) {
self.in_use.insert(reg, false);
self.in_use[reg as usize].1 = false;
}
}
@@ -144,7 +144,7 @@ impl RegisterAllocator {
&& !matches!(reg, Register::Zero | Register::Null)
{
self.register_contents.remove(&reg);
self.in_use.insert(reg, false);
self.in_use[reg as usize].1 = false;
}
self.variable_locations.remove(var);
@@ -252,7 +252,7 @@ impl RegisterAllocator {
.insert(var_name.to_string(), Location::register(*source_reg));
self.register_contents
.insert(*source_reg, var_name.to_string());
self.in_use.insert(*source_reg, true);
self.in_use[*source_reg as usize].1 = true;
return Vec::new();
}
@@ -264,7 +264,7 @@ impl RegisterAllocator {
.insert(var_name.to_string(), Location::register(free_reg));
self.register_contents
.insert(free_reg.clone(), var_name.to_string());
self.in_use.insert(free_reg, true);
self.in_use[free_reg as usize].1 = true;
return vec![format!("\tmov {}, {}", source_reg, free_reg)];
}
@@ -459,26 +459,31 @@ impl RegisterAllocator {
pub fn get_caller_saved_registers(&self) -> Vec<Register> {
self.register_contents
.iter()
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
.filter(|(reg, _)| {
self.in_use
.get(**reg as usize)
.unwrap_or(&(Register::Null, false))
.1
})
.map(|(reg, _)| reg.clone())
.collect()
}
/// Save caller-saved registers before a function call
/// Returns assembly code to save them
pub fn _save_caller_saved(&mut self) -> Vec<String> {
let mut code = Vec::new();
// pub fn _save_caller_saved(&mut self) -> Vec<String> {
// let mut code = Vec::new();
// For simplicity, save all currently used registers
// In a more sophisticated compiler, you'd only save registers that are live
for (reg, _) in self.register_contents.clone() {
if *self.in_use.get(&reg).unwrap_or(&false) {
code.push(format!("\tpush {}", reg));
}
}
// // For simplicity, save all currently used registers
// // In a more sophisticated compiler, you'd only save registers that are live
// for (reg, _) in self.register_contents.clone() {
// if *self.in_use.get(reg as usize).unwrap_or(&false) {
// code.push(format!("\tpush {}", reg));
// }
// }
code
}
// code
// }
/// Restore caller-saved registers after a function call
/// Returns assembly code to restore them
+3
View File
@@ -18,6 +18,7 @@ pub enum Token {
Const,
As,
SizeOf,
Struct,
// Identifiers and literals
Identifier(Name),
@@ -104,6 +105,7 @@ impl Token {
Token::Const => "Const",
Token::Static => "Static",
Token::Include => "Include",
Token::Struct => "Struct",
Token::Fn => "Fn",
Token::If => "If",
Token::Let => "Let",
@@ -376,6 +378,7 @@ impl<'a> Lexer<'a> {
"static" => Some(Token::Static),
"as" => Some(Token::As),
"sizeof" => Some(Token::SizeOf),
"struct" => Some(Token::Struct),
_ => None,
}
}
+1 -1
View File
@@ -13,7 +13,7 @@ pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
let lexer = lexer::Lexer::new(&input);
let tokens = lexer.collect::<Vec<_>>();
// println!("{tokens:?}");
println!("{tokens:#?}");
log(&format!("Parsing {} Tokens...", tokens.len()));
+170 -34
View File
@@ -39,6 +39,10 @@ impl Parser {
return self.parse_func();
}
if expect_tt!(self.peek_next()?, Struct).accepted() {
return self.parse_struct();
}
if expect_tt!(self.peek_next()?, Include).accepted() {
// expect include keyword
let _ = self.next();
@@ -99,6 +103,28 @@ impl Parser {
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
}
fn parse_struct(&mut self) -> ParseResult<Declaration, CompilerError> {
let _ = expect_tt!(self.next()?, Struct)?;
let name = expect_value!(self.next()?, Identifier)?;
let _ = expect_tt!(self.next()?, LeftBrace)?;
let mut fields = Vec::new();
while expect_tt!(self.peek_next()?, Identifier).accepted() {
let arg = self.parse_var_decl()?;
fields.push(arg);
if expect_tt!(self.peek_next()?, Comma).accepted() {
self.next()?;
} else {
break;
}
}
let _ = expect_tt!(self.next()?, RightBrace)?;
return ParseResult::Accept(Declaration::Struct { name, fields });
}
fn parse_func(&mut self) -> ParseResult<Declaration, CompilerError> {
// expect function keyword
let _ = expect_tt!(self.next()?, Fn);
@@ -318,18 +344,28 @@ impl Parser {
});
}
// handle assignment without "let"
let name = expect_value!(self.peek_next()?, Identifier);
if name.accepted() {
let varname = name?;
if expect_tt!(self.peek(1)?, LeftParen).accepted() {
let expr = self.parse_expression()?; // a function call expr
let _ = expect_tt!(self.next()?, Semicolon)?;
return ParseResult::Accept(Statement::Expression { expr });
}
// handle an in-place function call
if let ParseResult::Accept(name) = expect_value!(self.peek_next()?, Identifier)
&& let ParseResult::Accept(operator) = expect_tt!(
self.peek(1)?,
Assign,
PlusEqual,
MinusEqual,
StarEqual,
SlashEqual,
PercentEqual,
AndEqual,
OrEqual,
XorEqual,
ShlEqual,
ShrEqual
)
{
// consume name token
self.next()?;
let operator = match self.peek_next()? {
// pattern match to find operator
let operator = match operator {
Token::Assign => AssignmentOperator::Assign,
Token::PlusEqual => AssignmentOperator::AddAssign,
Token::MinusEqual => AssignmentOperator::SubAssign,
@@ -348,6 +384,7 @@ impl Parser {
}
};
// consume operator token
self.next()?;
let value = self.parse_expression()?;
@@ -355,15 +392,17 @@ impl Parser {
let _ = expect_tt!(self.next()?, Semicolon);
return ParseResult::Accept(Statement::Assign {
varname: varname.name,
varname: name.name,
operator,
value,
});
}
ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?.tt().to_string(),
))
// parse an expression and a semicolon
let expr = self.parse_expression()?;
let _ = expect_tt!(self.next()?, Semicolon)?;
return ParseResult::Accept(Statement::Expression { expr });
}
fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> {
@@ -374,7 +413,7 @@ impl Parser {
let left = self.parse_logical_and()?;
let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::LogicalOr,
Token::LogicalOr => BinaryOperator::LogicalOr,
_ => return ParseResult::Accept(left),
};
@@ -391,7 +430,7 @@ impl Parser {
let left = self.parse_bitwise_or()?;
let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::LogicalAnd,
Token::LogicalAnd => BinaryOperator::LogicalAnd,
_ => return ParseResult::Accept(left),
};
@@ -408,7 +447,7 @@ impl Parser {
let left = self.parse_bitwise_xor()?;
let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::BitwiseOr,
Token::Pipe => BinaryOperator::BitwiseOr,
_ => return ParseResult::Accept(left),
};
@@ -425,7 +464,7 @@ impl Parser {
let left = self.parse_bitwise_and()?;
let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::BitwiseXor,
Token::Caret => BinaryOperator::BitwiseXor,
_ => return ParseResult::Accept(left),
};
@@ -678,9 +717,39 @@ impl Parser {
Token::Identifier(name) => {
self.next()?;
ParseResult::Accept(Expression::Variable {
// if the next token isn't the beginning of a struct literal this is just
// an identifier.
if !expect_tt!(self.peek_next()?, LeftBrace).accepted() {
return ParseResult::Accept(Expression::Variable {
name,
expr_type: None,
});
}
let _ = self.next()?;
let mut fields = Vec::new();
while !expect_tt!(self.peek_next()?, RightBrace).accepted() {
let name = expect_value!(self.next()?, Identifier)?;
let _ = expect_tt!(self.next()?, Colon)?;
let expr = self.parse_expression()?;
fields.push((name, expr));
if expect_tt!(self.peek_next()?, Comma).accepted() {
self.next()?;
} else {
break;
}
}
let _ = expect_tt!(self.next()?, RightBrace)?;
ParseResult::Accept(Expression::StructLiteral {
name,
expr_type: None,
fields,
type_id: None,
})
}
Token::LeftBracket => {
@@ -729,21 +798,88 @@ impl Parser {
}
fn parse_type(&mut self) -> ParseResult<TypeId, CompilerError> {
// get the type name incl namespace
let typename = expect_value!(self.next()?, Identifier)?;
println!("yes {:?}", self.peek_next()?);
match typename.name.as_str() {
"u32" => ParseResult::Accept(TypeId::U32),
"u16" => ParseResult::Accept(TypeId::U16),
"u8" => ParseResult::Accept(TypeId::U8),
"i32" => ParseResult::Accept(TypeId::I32),
"i16" => ParseResult::Accept(TypeId::I16),
"i8" => ParseResult::Accept(TypeId::I8),
"void" => ParseResult::Accept(TypeId::Void),
"char" => ParseResult::Accept(TypeId::Char),
"str" => ParseResult::Accept(TypeId::Ptr(Box::new(TypeId::Char))),
_ => todo!("Implement parsing for other types!!"),
// parse primitive or named type
if expect_tt!(self.peek_next()?, Identifier).accepted() {
return self.parse_type_identifier();
}
// parse array type
if expect_tt!(self.peek_next()?, LeftBracket).accepted() {
let _ = self.next()?;
let internal_type = self.parse_type()?;
let _ = expect_tt!(self.next()?, Semicolon)?;
let size = expect_value!(self.next()?, Integer)?;
let _ = expect_tt!(self.next()?, RightBracket)?;
return ParseResult::Accept(TypeId::Array {
r#type: Box::new(internal_type),
size: size as usize,
});
}
// parse tuple type
if expect_tt!(self.peek_next()?, LeftParen).accepted() {
let _ = self.next()?;
let mut types = Vec::new();
while !expect_tt!(self.peek_next()?, RightParen).accepted() {
types.push(self.parse_type()?);
if !expect_tt!(self.peek_next()?, Comma).accepted() {
break;
}
let _ = self.next()?;
}
let _ = expect_tt!(self.next()?, RightParen)?;
return ParseResult::Accept(TypeId::Tuple(types));
}
ParseResult::Reject(CompilerError::Generic(format!(
"Parsing type but no valid type was detected: {:?}",
self.peek_next()?
)))
}
fn parse_type_identifier(&mut self) -> ParseResult<TypeId, CompilerError> {
// get the type name incl namespace
let name = expect_value!(self.next()?, Identifier)?;
let type_id = match name.name.as_str() {
"u32" => TypeId::U32,
"u16" => TypeId::U16,
"u8" => TypeId::U8,
"i32" => TypeId::I32,
"i16" => TypeId::I16,
"i8" => TypeId::I8,
"void" => TypeId::Void,
"char" => TypeId::Char,
"str" => TypeId::Ptr(Box::new(TypeId::Char)),
_ => {
let mut generics = Vec::new();
if expect_tt!(self.peek_next()?, Less).accepted() {
let _ = self.next()?;
// loop until we find the closing '>'
while !expect_tt!(self.peek_next()?, Greater).accepted() {
generics.push(self.parse_type()?);
if !expect_tt!(self.peek_next()?, Comma).accepted() {
break;
}
let _ = self.next()?;
}
let _ = expect_tt!(self.next()?, Greater)?;
}
TypeId::UnknownCustom { name, generics }
}
};
ParseResult::Accept(type_id)
}
fn next(&mut self) -> ParseResult<Token, CompilerError> {
+86 -12
View File
@@ -40,6 +40,10 @@ pub enum Declaration {
is_const: bool,
},
Dependency(Dependency),
Struct {
name: Name,
fields: Vec<Variable>,
},
}
#[derive(Debug, Clone)]
@@ -62,8 +66,20 @@ pub enum TypeId {
Void,
Ptr(Box<TypeId>),
Ref(Box<TypeId>),
Array(Box<TypeId>, usize),
Struct { name: Name, fields: Vec<TypeId> },
Tuple(Vec<TypeId>),
Array {
r#type: Box<TypeId>,
size: usize,
},
UnknownCustom {
name: Name,
generics: Vec<TypeId>,
},
Struct {
name: Name,
fields: Vec<TypeId>,
generics: Vec<TypeId>,
},
}
impl TypeId {
@@ -80,7 +96,10 @@ impl TypeId {
Self::Void => 0,
Self::Ptr(t) => t.size(),
Self::Ref(t) => t.size(),
Self::Array(t, size) => t.size() * size,
Self::Tuple(types) => types.iter().map(|t| t.size()).sum(),
Self::Array { r#type, size } => r#type.size() * size,
Self::UnknownCustom { .. } => 1, /* TODO: calculate type size during */
// semantic analysis
Self::Struct { fields, .. } => fields.iter().map(|t| t.size()).sum(),
}
}
@@ -100,14 +119,47 @@ impl fmt::Display for TypeId {
Self::Void => write!(f, "void"),
Self::Ptr(t) => write!(f, "*{}", t),
Self::Ref(t) => write!(f, "&{}", t),
Self::Array(t, len) => write!(f, "[{}; {}]", t, len),
Self::Struct { name, fields } => {
write!(f, "struct {} {{", name)?;
for (i, field) in fields.iter().enumerate() {
write!(f, "{}: {}", i, field)?;
}
write!(f, "}}")
Self::Tuple(elems) => write!(
f,
"({})",
elems
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", ")
),
Self::Array { r#type, size } => write!(f, "[{}; {}]", r#type, size),
Self::UnknownCustom { name, generics } => {
write!(
f,
"{}<{}>",
name,
generics
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", ")
)
}
Self::Struct {
name,
fields,
generics,
} => write!(
f,
"struct<{}> {} {{{}}}",
generics
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", "),
name,
fields
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", ")
),
}
}
}
@@ -241,6 +293,11 @@ pub enum Expression {
elements: Vec<Expression>,
type_id: Option<TypeId>,
},
StructLiteral {
name: Name,
fields: Vec<(Name, Expression)>,
type_id: Option<TypeId>,
},
}
#[derive(Debug, Clone)]
@@ -266,9 +323,12 @@ impl Expression {
expr.is_pure() && index.is_pure()
}
Expression::MemberAccess { expr, .. } => expr.is_pure(),
Expression::ArrayLiteral { elements, type_id } => {
Expression::ArrayLiteral { elements, .. } => {
elements.iter().all(|element| element.is_pure())
}
Expression::StructLiteral { fields, .. } => {
fields.iter().all(|(_, expr)| expr.is_pure())
}
}
}
@@ -304,7 +364,21 @@ impl Expression {
let element_type = elements
.first()
.map_or(TypeId::Void, |e| e.type_id().unwrap_or(TypeId::Void));
Ok(TypeId::Array(Box::new(element_type), elements.len()))
Ok(TypeId::Array {
r#type: Box::new(element_type),
size: elements.len(),
})
}
Expression::StructLiteral { name, fields, .. } => {
let fields = fields
.iter()
.map(|(_, expr)| expr.type_id())
.collect::<Result<Vec<_>, _>>()?;
Ok(TypeId::Struct {
name: name.clone(),
fields,
generics: Vec::new(),
})
}
}
}
+21 -5
View File
@@ -1,10 +1,26 @@
# General TODO's
# Compiler optimisations!
# Bugfixes
- [x] [EASY] Investigate logical and operator not compiling - either a lexer or parser issue.
- **note**: this was a parser issue.
# Missing features
- [x] [MEDIUM] Get shift operations working correctly.
- [ ] [MEDIUM] proper prefix/postfix inc/dec implementation. slightly more complex as we need to check for a variable and modify it in place
- [ ] [EASY] Add multiply and divide operations to code generation
- **note**: very easy to do but our division algorithm is hopelessly slow so not worth doing for now.
# Performance Improvements
- [ ] [MEDIUM] implement a proper div/mod library that's not slow af.
- [ ] [HARD] Immediate operations for values that support it (up to +/- u16::max for addi and subi respectively)
- this requires significant complexity in code generation as we need to traverse down the tree when we come across these operations to prevent additional register allocations.
- [ ] [EASY] Add multiply and divide operations to code generation
- [ ] [MEDIUM] proper prefix/postfix inc/dec implementation. slightly more complex as we need to check for a variable and modify it in place
- [ ] [EASY] Investigate logical and operator not compiling - either a lexer or parser issue.
- [x] [MEDIUM] Get shift operations working correctly.
# Compiler optimisations
# Codegen improvements
- [ ] [MEDIUM / time consuming] Add scoping to code generation
- [ ] [MEDIUM / time consuming] Rewrite entire codegen to imrpove code quality and make the code more readable.
- [ ] type-safe instruction builder
- [ ] Instruction & Register enums
- [ ] Instruction builder helper fns eg `fn add(left: &Register, right: &Register, dest: &Register) -> Instruction`
- [ ] Instruction Block types.