updated compiler to support multiple frontends and backends

This commit is contained in:
2026-02-05 01:09:14 +00:00
parent 8d130a870c
commit a35cfbe864
14 changed files with 1737 additions and 324 deletions
+1 -1
View File
@@ -1,7 +1,7 @@
cargo-features = ["codegen-backend"]
[workspace]
members = ["emulator", "common", "assembler", "dsa_editor", "compiler", "c_compiler"]
members = ["emulator", "common", "assembler", "dsa_editor", "compiler"]
resolver = "3"
[workspace.package]
+738
View File
@@ -0,0 +1,738 @@
use std::collections::HashMap;
use std::sync::atomic::AtomicU32;
use std::time::SystemTime;
use chrono::{DateTime, Local};
use super::registers::RegisterAllocator;
use crate::{block, comment, dsa};
use crate::model::{
BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression,
Program, Statement, UnaryOperator, Variable,
};
pub struct CodeGenerator {
ast: Program,
imports: HashMap<String, String>,
globals: Vec<String>,
functions: Vec<String>,
symbols: Vec<String>,
allocator: RegisterAllocator,
}
fn import(name: &str, path: &str) -> String {
format!("include {name}: \"{}\"", path)
}
impl CodeGenerator {
const RET: &'static str = "\tjmp _ret";
pub fn new(ast: Program) -> Self {
CodeGenerator {
ast,
imports: HashMap::new(),
globals: Vec::new(),
functions: Vec::new(),
symbols: Vec::new(),
allocator: RegisterAllocator::new(),
}
}
pub fn include(&mut self, name: &str, path: &str) {
self.imports.insert(name.to_string(), path.to_string());
}
fn is_global(&self, name: &str) -> bool {
// Check if this variable is in the globals list
self.globals
.iter()
.any(|g| g.contains(&format!("dw {}:", name)))
}
pub fn generate(&mut self) -> Result<String, CompilerError> {
// always include the print library for debugging!
self.include("print", "./lib/io/print.dsa");
for block in self.ast.clone().declarations {
match block {
Declaration::Variable {
var: Variable { name, .. },
..
} => self.symbols.push(name),
Declaration::Function { name, .. } => self.symbols.push(name),
Declaration::Dependency(Dependency { name, .. }) => {
self.symbols.push(name)
}
}
}
for block in self.ast.clone().declarations {
self.generate_block(block.clone())?;
}
self.generate_layout()
}
fn generate_layout(&mut self) -> Result<String, CompilerError> {
let datetime: DateTime<Local> = SystemTime::now().into();
Ok(dsa![
"",
comment!("GENERATED BY DSC COMPILER"),
comment!(format!(
"Generated at {}",
datetime.format("%Y-%m-%d %H:%M:%S")
)),
"",
// imports
comment!("Imports"),
self.imports
.iter()
.map(|(k, v)| import(k, v))
.collect::<Vec<String>>()
.join("\n"),
"",
// reserved memory
comment!("Globals & Reserved Memory"),
self.globals.join("\n"),
"",
// entry point
comment!("Entry Point"),
"dw stack: 0x10000",
"db message: \"Process Exited with code:\"",
block! [ "_init"
dsa![ldw stack, bpr],
dsa![mov bpr, spr],
dsa![push zero],
dsa![call main],
dsa![call print::print_newline],
dsa![lwi message, rg0],
dsa![push rg0],
dsa![call print::print],
dsa![pop zero],
dsa![call print::print_hex_word],
dsa![pop zero],
dsa![hlt]
],
"",
comment!("Return"),
block! [ "_ret"
dsa![mov bpr, spr],
dsa![pop bpr],
dsa![return]
],
comment!("Compiled Code Starts..."),
// block! [ "main"
// dsa![push bpr],
// dsa![mov spr, bpr],
// dsa![lwi 67, rg1],
// dsa![stw rg1, spr, 8],
// dsa![mov bpr, spr],
// dsa![pop bpr],
// dsa![return]
// ],
self.functions.join("\n"),
])
}
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
self.globals.push(format!(
"dw {}: {}",
name,
init.unwrap_or(ConstExpr::Number(0))
))
}
fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> {
match block {
Declaration::Variable { var, init, .. } => {
self.generate_global(&var.name, init)
}
Declaration::Function {
name, params, body, ..
} => {
let func = self.generate_function(&name, &params, &body).join("\n");
self.functions.push(format!("{func}\n"));
}
Declaration::Dependency(Dependency { name, path }) => {
self.imports.insert(name, path);
}
};
Ok(())
}
// Example: Generate code for a function
fn generate_function(
&mut self,
name: &str,
params: &[Variable],
body: &[Statement],
) -> Vec<String> {
let mut code = Vec::new();
// Reset allocator for new function
self.allocator.reset();
// Function prologue
code.push(format!("{}:", name));
code.push("\tpush bpr".to_string());
code.push("\tmov spr, bpr".to_string());
code.push(String::new());
// Allocate parameters to registers or stack locations
for (i, param) in params.iter().enumerate() {
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
// Track that this parameter is at a stack location
let (reg, load_code) = self.allocator.alloc_var(&param.name).unwrap();
code.extend(load_code);
code.push(format!("\tldw bpr, {}, {}", reg, offset));
}
// Generate code for function body
for stmt in body {
let stmt_code = self.generate_statement(stmt).unwrap();
code.extend(stmt_code);
}
// automatically return at function end
if let Some(x) = code.last()
&& x == Self::RET
{
} else {
code.push(Self::RET.to_string());
}
code
}
// Example: Generate code for a statement
fn generate_statement(
&mut self,
stmt: &Statement,
) -> Result<Vec<String>, CompilerError> {
let mut code = Vec::new();
match stmt {
Statement::Declaration { var, value } => {
if let Some(expr) = value {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(expr, true)?;
code.extend(expr_code);
// Store result in variable
let store_code = self.allocator.store_var(&var.name, &result_reg);
code.extend(store_code);
// Free temporary register
self.allocator.free_temp(&result_reg);
} else {
// Just declaring variable without initialization
self.allocator.alloc_var(&var.name)?;
}
}
Statement::Break => unimplemented!(),
Statement::Continue => unimplemented!(),
Statement::PtrWrite { ptr, value } => {
let (result_reg, expr_code) = self.generate_expression(value, true)?;
code.extend(expr_code);
let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?;
code.extend(ptr_code);
code.push(format!("\tstw {}, {}", result_reg, ptr_reg));
self.allocator.free_temp(&result_reg);
self.allocator.free_temp(&ptr_reg);
}
Statement::Assign { varname, value } => {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(value, true)?;
code.extend(expr_code);
// Check if this is a global variable
if self.is_global(varname) {
// Store to global label
code.push(format!("\tstw {}, {}", result_reg, varname));
} else {
// Store result in local variable
let store_code = self.allocator.store_var(varname, &result_reg);
code.extend(store_code);
}
// Free temporary register
self.allocator.free_temp(&result_reg);
}
Statement::Return(expr) => {
if let Some(e) = expr {
let (result_reg, expr_code) = self.generate_expression(e, true)?;
code.extend(expr_code);
code.push(format!("\tstw {}, bpr, 8", result_reg));
code.push(format!("\tjmp _ret"));
self.allocator.free_temp(&result_reg);
}
}
Statement::If {
condition,
then_stmt,
else_stmt,
} => {
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
code.extend(cond_code);
// Compare with zero
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
// Generate unique labels
let then_label = format!("_then_{}", self.get_unique_label());
let else_label = format!("_else_{}", self.get_unique_label());
let end_label = format!("_end_{}", self.get_unique_label());
// Jump to else if condition is false (equal to zero)
code.push(format!("\tjeq {}", else_label));
// Then block
code.push(format!("{}:", then_label));
for s in then_stmt {
code.extend(self.generate_statement(s)?);
}
if then_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("\tjmp {}", end_label));
// Else block
code.push(format!("{}:", else_label));
for s in else_stmt {
code.extend(self.generate_statement(s)?);
}
if else_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("{}:", end_label));
}
Statement::While { condition, body } => {
let loop_start = format!("_while_start_{}", self.get_unique_label());
let loop_end = format!("_while_end_{}", self.get_unique_label());
code.push(format!("{}:", loop_start));
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
code.extend(cond_code);
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
code.push(format!("\tjeq {}", loop_end));
// Loop body
for s in body {
code.extend(self.generate_statement(s)?);
}
code.push(format!("\tjmp {}", loop_start));
code.push(format!("{}:", loop_end));
}
Statement::Loop(body) => {
let loop_start = format!("_loop_start_{}", self.get_unique_label());
code.push(format!("{}:", loop_start));
for s in body {
code.extend(self.generate_statement(s)?);
}
code.push(format!("\tjmp {}", loop_start));
}
Statement::Expression { expr } => {
let (result_reg, expr_code) = self.generate_expression(expr, false)?;
code.extend(expr_code);
self.allocator.free_temp(&result_reg);
}
Statement::Block(statements) => {
for s in statements {
code.extend(self.generate_statement(s)?);
}
}
}
Ok(code)
}
// Example: Generate code for an expression
// Returns (register containing result, assembly code)
fn generate_expression(
&mut self,
expr: &Expression,
use_result: bool,
) -> Result<(String, Vec<String>), CompilerError> {
let mut code = Vec::new();
// optimisation to prevent generating dead code!
if expr.is_pure() && !use_result {
return Ok((String::new(), code));
}
match expr {
Expression::StringLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// write string into memory
let uuid = self.get_unique_label();
code.push(format!("\tdb str_{uuid}: \"{value}\""));
// Load pointer to string
code.push(format!("\tlwi str_{uuid}, {reg}"));
Ok((reg, code))
}
Expression::CharLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load immediate value
code.push(format!("\tlli {}, {} // '{value}'", *value as u8, reg));
Ok((reg, code))
}
Expression::Number(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load immediate value
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
if *value > 0xFFFF || *value < 0 {
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
}
Ok((reg, code))
}
Expression::Variable { name, .. } => {
if self.is_global(&name.name) {
// Allocate a temporary register for the global
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load from global label
code.push(format!("\tldw {}, {}", name.name, reg));
Ok((reg, code))
} else {
// Local variable - use existing allocator logic
let (reg, load_code) = self.allocator.load_var(&name.name)?;
code.extend(load_code);
Ok((reg, code))
}
}
Expression::Binary { op, left, right } => {
// Evaluate left operand
let (left_reg, left_code) = self.generate_expression(left, true)?;
code.extend(left_code);
// Evaluate right operand
let (right_reg, right_code) = self.generate_expression(right, true)?;
code.extend(right_code);
// Allocate result register
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
// Generate operation
match op {
BinaryOperator::Add => {
code.push(format!(
"\tadd {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Sub => {
code.push(format!(
"\tsub {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Mul => {
self.include("maths", "./lib/maths/core.dsa");
// Call multiply function
code.push(format!("\tpush {}", right_reg));
code.push(format!("\tpush {}", left_reg));
code.push("\tcall maths::multiply".to_string());
code.push(format!("\tpop {}", result_reg));
code.push("\tpop zero".to_string());
}
// Comparison operators - return 1 (true) or 0 (false)
BinaryOperator::Eq => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ne => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Lt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Le => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Gt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ge => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
_ => unimplemented!(),
}
// Free operand registers (allocator will protect variables)
self.allocator.free_temp(&left_reg);
self.allocator.free_temp(&right_reg);
Ok((result_reg, code))
}
Expression::Call { name, args } => {
// first evaluate all the args we're going to need
let mut arg_regs = Vec::new();
for arg in args.iter().rev() {
let (arg_reg, arg_code) = self.generate_expression(arg, true)?;
code.extend(arg_code);
arg_regs.push(arg_reg);
}
// Save caller-saved registers and track which ones we saved
// old method, inefficient.
// let saved_regs = self.allocator.get_caller_saved_registers();
// for reg in &saved_regs {
// code.push(format!("\tpush {}", reg));
// }
// Save caller-saved registers and track which ones we saved
let saved_regs = self.allocator.get_caller_saved_registers();
for reg in &saved_regs {
// spill variables to stack
code.extend(self.allocator.spill_register(reg).unwrap());
}
// Evaluate and push arguments in reverse order
for (i, arg_reg) in arg_regs.iter().enumerate() {
code.push(format!(
"\tpush {} // push arg {}",
arg_reg,
args.len() - 1 - i
));
}
// if GLOBAL_METHODS.contains_key(name.name.as_str()) {
// code.push(format!("\tcall {}",
// GLOBAL_METHODS[name.name.as_str()])); } else
if self.symbols.contains(&name.name) {
// Call local function
code.push(format!("\tcall {}", name));
} else if let Some(ns) = name.namespace.clone()
&& self.imports.contains_key(&ns)
{
code.push(format!("\tcall {}", name));
} else {
return Err(CompilerError::Undefined(name.clone()));
}
let result_reg: String;
if use_result {
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
result_reg = temp_result_reg;
code.extend(result_alloc);
code.push(format!("\tpop {}", result_reg));
// Clean up arguments
if args.len() > 1 {
for _ in 0..(args.len() - 1) {
code.push("\tpop zero".to_string());
}
}
} else {
result_reg = "zero".to_string();
// Clean up arguments
if args.len() > 0 {
for _ in 0..(args.len()) {
code.push("\tpop zero".to_string());
}
}
}
// Restore caller-saved registers in reverse order (LIFO)
// for reg in saved_regs.iter().rev() {
// code.push(format!("\tpop {}", reg));
// }
// Free argument registers
for reg in arg_regs {
self.allocator.free_temp(&reg);
}
Ok((result_reg, code))
}
Expression::Unary { op, operand } => {
let (operand_reg, operand_code) =
self.generate_expression(operand, true)?;
code.extend(operand_code);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
match op {
UnaryOperator::Minus => {
// Negate: result = 0 - operand
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
}
UnaryOperator::Plus => {
// Just move
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
}
UnaryOperator::Dereference => {
code.push(format!("\tldw {}, {}", operand_reg, result_reg));
}
UnaryOperator::Reference => {
code.extend(self.allocator.spill_register(&operand_reg)?);
code.push(format!(
"\tsubi bpr {} {}",
-(4 + self.allocator.get_stack_offset()),
result_reg
))
}
}
self.allocator.free_temp(&operand_reg);
Ok((result_reg, code))
}
Expression::Empty => Ok(("zero".to_string(), code)),
}
}
// Helper for generating unique labels
fn get_unique_label(&mut self) -> String {
// You'd implement a counter here
static COUNTER: AtomicU32 = AtomicU32::new(0);
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
(val + 1).to_string()
}
}
/// Build a single string from any number of arguments.
/// Each argument must implement `Display` or be convertible to a string.
#[macro_export]
macro_rules! dsa {
($($arg:expr),* $(,)?) => {{
// Start with an empty String well grow it as we go.
use std::fmt::Write;
let mut s = ::std::string::String::new();
$(
// `write!` is cheaper than `format!` for each element
// because it reuses the same buffer.
write!(s, "{}\n", $arg).expect("write to String failed");
)*
s
}};
}
// ──────────────────────── dsa! ────────────────────────
// A tiny helper that just turns its tokenstream into a string.
// The trailing comma is kept its part of the syntax you want.
#[macro_export]
macro_rules! cmd {
($($tokens:tt)*) => {{
// Well just stringify the tokens and return a String.
format!("{}", concat!(stringify!($tokens), "\n"))
}};
}
// ──────────────────────── block! ────────────────────────
// Usage:
//
// let asm = block![ "name"
// dsa![mov rg0, rg1],
// dsa![add rg1, rg1]
// ];
//
// `asm` is a `&'static str` containing:
//
// name:
// mov rg0, rg1
// add rg1, rg1
//
#[macro_export]
macro_rules! block {
// The first token must be a string literal thats the label.
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
// Build a single string at compile time.
const CODE: &str = concat!(
$label, ":\n",
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
// We add a newline after each one to get the desired layout.
$(concat!("\t", stringify!($($ins)*), "\n")),*
);
CODE
}};
}
#[macro_export]
macro_rules! comment {
($text:expr) => {{ format!("// {}", $text) }};
}
+9
View File
@@ -0,0 +1,9 @@
use crate::model::{CompilerError, Program};
mod codegen;
mod registers;
pub fn generate_code(ast: &Program) -> Result<String, CompilerError> {
let mut codegen = codegen::CodeGenerator::new(ast.clone());
codegen.generate()
}
@@ -1,6 +1,6 @@
use std::collections::HashMap;
use crate::parser::CompilerError;
use crate::model::CompilerError;
/// Register allocator for DSA assembly generation
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
@@ -147,7 +147,7 @@ impl RegisterAllocator {
}
/// Get the current location of a variable
pub fn get_var_location(&self, var_name: &str) -> Option<&Location> {
pub fn _get_var_location(&self, var_name: &str) -> Option<&Location> {
self.variable_locations.get(var_name)
}
@@ -264,7 +264,7 @@ impl RegisterAllocator {
}
/// Spill all registers to stack (useful before function calls)
pub fn spill_all(&mut self) -> Vec<String> {
pub fn _spill_all(&mut self) -> Vec<String> {
let mut code = Vec::new();
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
@@ -284,7 +284,7 @@ impl RegisterAllocator {
}
/// Get the total stack space needed for local variables
pub fn get_stack_size(&self) -> i32 {
pub fn _get_stack_size(&self) -> i32 {
-self.stack_offset // Convert negative offset to positive size
}
@@ -298,7 +298,7 @@ impl RegisterAllocator {
/// Mark a variable as dead (no longer needed)
/// Frees its register if it's in one
pub fn free_var(&mut self, var_name: &str) {
pub fn _free_var(&mut self, var_name: &str) {
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
let reg = reg.clone();
self.register_contents.remove(&reg);
@@ -319,12 +319,12 @@ impl RegisterAllocator {
/// Save caller-saved registers before a function call
/// Returns assembly code to save them
pub fn save_caller_saved(&mut self) -> Vec<String> {
pub fn _save_caller_saved(&mut self) -> Vec<String> {
let mut code = Vec::new();
// For simplicity, save all currently used registers
// In a more sophisticated compiler, you'd only save registers that are live
for (reg, var_name) in self.register_contents.clone() {
for (reg, _) in self.register_contents.clone() {
if *self.in_use.get(&reg).unwrap_or(&false) {
code.push(format!("\tpush {}", reg));
}
@@ -335,7 +335,7 @@ impl RegisterAllocator {
/// Restore caller-saved registers after a function call
/// Returns assembly code to restore them
pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
pub fn _restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
let mut code = Vec::new();
// Restore in reverse order (LIFO)
@@ -346,53 +346,3 @@ impl RegisterAllocator {
code
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_allocation() {
let mut allocator = RegisterAllocator::new();
let (reg1, code1) = allocator.alloc_temp().unwrap();
assert_eq!(code1.len(), 0); // No spill needed
assert_eq!(reg1, "rg0");
let (reg2, code2) = allocator.alloc_temp().unwrap();
assert_eq!(code2.len(), 0);
assert_eq!(reg2, "rg1");
allocator.free_temp(&reg1);
let (reg3, code3) = allocator.alloc_temp().unwrap();
assert_eq!(code3.len(), 0);
assert_eq!(reg3, "rg0"); // Reuses freed register
}
#[test]
fn test_variable_allocation() {
let mut allocator = RegisterAllocator::new();
let (reg, _) = allocator.alloc_var("x").unwrap();
assert_eq!(reg, "rg0");
// Requesting same variable again should return same register
let (reg2, _) = allocator.alloc_var("x").unwrap();
assert_eq!(reg2, "rg0");
}
#[test]
fn test_stack_allocation() {
let mut allocator = RegisterAllocator::new();
// Allocate all 16 registers
for i in 0..16 {
allocator.alloc_var(&format!("var{}", i)).unwrap();
}
// Next allocation should spill to stack
let (reg, code) = allocator.alloc_var("var16").unwrap();
assert!(code.len() > 0); // Should have spill code
}
}
+13
View File
@@ -0,0 +1,13 @@
use crate::model::{CompilerError, Program};
mod dsa;
pub fn compiler_backend(ext: &str, ast: &Program) -> Result<String, CompilerError> {
match ext {
"dsa" => Ok(dsa::generate_code(ast)?),
_ => Err(CompilerError::Generic(format!(
"File type {} not supported",
ext
))),
}
}
+627
View File
@@ -0,0 +1,627 @@
use std::iter::Peekable;
use std::str::Chars;
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
// Keywords
Fn,
Let,
If,
Else,
Loop,
While,
Break,
Return,
Continue,
Include,
Static,
Const,
// Identifiers and literals
Identifier(Name),
String(String),
Integer(u64),
Char(char),
// Symbols
LeftParen, // (
RightParen, // )
LeftBrace, // {
RightBrace, // }
Semicolon, // ;
Colon, // :
Comma, // ,
// Operators
Plus, // +
Minus, // -
Star, // *
Amphersand, // &
Slash, // /
Assign, // =
EqualEqual, // ==
Bang, // !
BangEqual, // !=
Less, // <
LessEqual, // <=
Greater, // >
GreaterEqual, // >=
RightArrow, // ->
// Special
Eof,
}
use std::fmt;
use crate::model::Name;
impl fmt::Display for Name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(ref ns) = self.namespace {
write!(f, "{}::{}", ns, self.name)
} else {
write!(f, "{}", self.name)
}
}
}
impl Token {
pub fn tt(&self) -> &str {
match self {
Token::Const => "Const",
Token::Static => "Static",
Token::Include => "Include",
Token::Fn => "Fn",
Token::If => "If",
Token::Let => "Let",
Token::Else => "Else",
Token::Loop => "Loop",
Token::While => "While",
Token::Break => "Break",
Token::Return => "Return",
Token::Continue => "Continue",
Token::Identifier(_) => "Identifier",
Token::String(_) => "String",
Token::Integer(_) => "UnsignedInt",
Token::Char(_) => "Char",
Token::LeftParen => "LeftParen",
Token::RightParen => "RightParen",
Token::LeftBrace => "LeftBrace",
Token::RightBrace => "RightBrace",
Token::Semicolon => "Semicolon",
Token::Colon => "Colon",
Token::Comma => "Comma",
Token::RightArrow => "RightArrow",
Token::Plus => "Plus",
Token::Minus => "Minus",
Token::Star => "Star",
Token::Amphersand => "Amphersand",
Token::Slash => "Slash",
Token::Assign => "Assign",
Token::EqualEqual => "EqualEqual",
Token::Bang => "Bang",
Token::BangEqual => "BangEqual",
Token::Less => "Less",
Token::LessEqual => "LessEqual",
Token::Greater => "Greater",
Token::GreaterEqual => "GreaterEqual",
Token::Eof => "Eof",
}
}
}
#[derive(Debug)]
pub struct Lexer<'a> {
chars: Peekable<Chars<'a>>,
current: Option<char>,
line: usize,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut chars = input.chars().peekable();
let current = chars.next();
Lexer {
chars,
current,
line: 1,
}
}
fn advance(&mut self) -> Option<char> {
self.current = self.chars.next();
self.current
}
fn peek(&mut self) -> Option<&char> {
self.chars.peek()
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.current {
if !c.is_whitespace() {
break;
}
if c == '\n' {
self.line += 1;
}
self.advance();
}
}
fn skip_line_comment(&mut self) {
// Skip the two slashes
self.advance(); // first /
self.advance(); // second /
// Skip until newline or EOF
while let Some(c) = self.current {
if c == '\n' {
self.line += 1;
self.advance();
break;
}
self.advance();
}
}
fn skip_block_comment(&mut self) -> Result<(), String> {
// Skip the /*
self.advance(); // /
self.advance(); // *
let start_line = self.line;
// Look for */
while let Some(c) = self.current {
if c == '\n' {
self.line += 1;
}
if c == '*' {
if let Some(&next) = self.peek() {
if next == '/' {
self.advance(); // *
self.advance(); // /
return Ok(());
}
}
}
self.advance();
}
Err(format!(
"Unterminated block comment starting at line {}",
start_line
))
}
fn skip_whitespace_and_comments(&mut self) {
loop {
self.skip_whitespace();
// Check for comments
if let Some('/') = self.current {
if let Some(&next) = self.peek() {
match next {
'/' => {
self.skip_line_comment();
continue;
}
'*' => {
if let Err(e) = self.skip_block_comment() {
eprintln!("Lexer error: {}", e);
}
continue;
}
_ => break,
}
}
}
break;
}
}
fn read_identifier(&mut self) -> String {
let mut ident = String::new();
// Include the current character if it's valid
if let Some(c) = self.current {
if c.is_alphabetic() || c == '_' {
ident.push(c);
}
}
// Read remaining characters
while let Some(&c) = self.peek() {
if c.is_alphanumeric() || c == '_' {
self.advance();
ident.push(c);
} else {
break;
}
}
ident
}
fn keyword_or_identifier(&mut self) -> Token {
let first_ident = self.read_identifier();
// Check if it's a keyword first (keywords can't have namespaces)
let keyword = match first_ident.as_str() {
"fn" => Some(Token::Fn),
"if" => Some(Token::If),
"else" => Some(Token::Else),
"while" => Some(Token::While),
"loop" => Some(Token::Loop),
"break" => Some(Token::Break),
"return" => Some(Token::Return),
"continue" => Some(Token::Continue),
"include" => Some(Token::Include),
"let" => Some(Token::Let),
"const" => Some(Token::Const),
"static" => Some(Token::Static),
_ => None,
};
if let Some(kw) = keyword {
return kw;
}
// Not a keyword - check for namespace separator (::)
// We need to peek TWO characters ahead without consuming anything
if let Some(&':') = self.peek() {
// We see one colon, but we need to check if there's another one after it
// We can't peek two ahead directly, so we need a different approach
// Save the current position by using a temporary peekable iterator
// Actually, we can't do that easily. Instead, let's just check:
// If we see ':', temporarily advance and check the next char
// Create a temporary check
let mut temp_chars = self.chars.clone();
let _ = temp_chars.next(); // This is the ':' we already saw
let second_peek = temp_chars.peek();
if let Some(&':') = second_peek {
// It's :: - consume both colons
self.advance(); // consume first :
self.advance(); // consume second :
// Read the second identifier (the actual name)
let second_ident = self.read_identifier();
// Return namespaced identifier
return Token::Identifier(Name {
namespace: Some(first_ident),
name: second_ident,
});
}
// else: It's a single colon (type annotation) - DON'T consume it
// Just fall through and return the identifier
}
// No namespace separator - just a regular identifier
Token::Identifier(Name {
namespace: None,
name: first_ident,
})
}
fn read_number(&mut self) -> Result<u64, String> {
let current = self.current.unwrap();
// Check for hex (0x) or binary (0b) prefix
if current == '0' {
if let Some(&next_char) = self.peek() {
match next_char {
'x' | 'X' => {
self.advance(); // consume '0'
self.advance(); // consume 'x'
return self.read_hex_number();
}
'b' | 'B' => {
self.advance(); // consume '0'
self.advance(); // consume 'b'
return self.read_binary_number();
}
_ => {}
}
}
}
// Read decimal number
self.read_decimal_number()
}
fn read_decimal_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
if let Some(c) = self.current {
num_str.push(c);
}
while let Some(&c) = self.peek() {
if c.is_ascii_digit() {
self.advance();
num_str.push(c);
} else {
break;
}
}
num_str
.parse::<u64>()
.map_err(|_| format!("Invalid decimal number: {}", num_str))
}
fn read_hex_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
// Read current character if it's a hex digit
if let Some(c) = self.current {
if c.is_ascii_hexdigit() {
num_str.push(c);
}
}
while let Some(&c) = self.peek() {
if c.is_ascii_hexdigit() {
self.advance();
num_str.push(c);
} else {
break;
}
}
if num_str.is_empty() {
return Err("Invalid hexadecimal number: no digits after 0x".to_string());
}
u64::from_str_radix(&num_str, 16)
.map_err(|_| format!("Invalid hexadecimal number: {}", num_str))
}
fn read_binary_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
// Read current character if it's a binary digit
if let Some(c) = self.current {
if c == '0' || c == '1' {
num_str.push(c);
}
}
while let Some(&c) = self.peek() {
if c == '0' || c == '1' {
self.advance();
num_str.push(c);
} else {
break;
}
}
if num_str.is_empty() {
return Err("Invalid binary number: no digits after 0b".to_string());
}
u64::from_str_radix(&num_str, 2)
.map_err(|_| format!("Invalid binary number: {}", num_str))
}
fn read_string(&mut self) -> Result<String, String> {
self.advance(); // Skip the opening quote
let mut s = String::new();
while let Some(c) = self.current {
if c == '"' {
return Ok(s);
}
// Handle escape sequences
if c == '\\' {
self.advance();
if let Some(escaped) = self.current {
let escaped_char = match escaped {
'n' => '\n',
't' => '\t',
'r' => '\r',
'\\' => '\\',
'"' => '"',
_ => escaped, // For now, just use the character as-is
};
s.push(escaped_char);
} else {
return Err("Unexpected end of string after escape".to_string());
}
} else {
s.push(c);
}
self.advance();
}
Err("Unterminated string literal".to_string())
}
fn match_next(&mut self, expected: char) -> bool {
match self.peek() {
Some(&c) if c == expected => {
self.advance();
true
}
_ => false,
}
}
fn scan_single_char_token(&mut self, c: char) -> Option<Token> {
match c {
'(' => Some(Token::LeftParen),
')' => Some(Token::RightParen),
'{' => Some(Token::LeftBrace),
'}' => Some(Token::RightBrace),
';' => Some(Token::Semicolon),
',' => Some(Token::Comma),
'&' => Some(Token::Amphersand),
'+' => Some(Token::Plus),
'*' => Some(Token::Star),
_ => None,
}
}
fn scan_operator(&mut self, c: char) -> Option<Token> {
match c {
'-' => Some(if self.match_next('>') {
Token::RightArrow
} else {
Token::Minus
}),
'!' => Some(if self.match_next('=') {
Token::BangEqual
} else {
Token::Bang
}),
'=' => Some(if self.match_next('=') {
Token::EqualEqual
} else {
Token::Assign
}),
'<' => Some(if self.match_next('=') {
Token::LessEqual
} else {
Token::Less
}),
'>' => Some(if self.match_next('=') {
Token::GreaterEqual
} else {
Token::Greater
}),
':' => {
// Single colon (for type annotations)
// Note: :: is handled in keyword_or_identifier for namespaces
Some(Token::Colon)
}
'/' => {
// Check if it's a comment or division
if let Some(&next) = self.peek() {
if next == '/' || next == '*' {
// It's a comment, don't consume it here
// Let skip_whitespace_and_comments handle it
None
} else {
Some(Token::Slash)
}
} else {
Some(Token::Slash)
}
}
_ => None,
}
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace_and_comments();
let Some(c) = self.current else {
return Token::Eof;
};
// Try single-character tokens first
if let Some(token) = self.scan_single_char_token(c) {
self.advance();
return token;
}
// Try operators (may be multi-character)
if let Some(token) = self.scan_operator(c) {
self.advance();
return token;
}
// Char literals
if c == '\'' {
let mut value = ' ';
self.advance();
if let Some(ch) = self.current {
value = ch;
self.advance();
}
if self.current == Some('\'') {
self.advance();
return Token::Char(value);
}
eprintln!("Lexer error on line {}: Invalid char literal", self.line);
}
// String literals
if c == '"' {
let token = match self.read_string() {
Ok(s) => Token::String(s),
Err(e) => {
eprintln!("Lexer error on line {}: {}", self.line, e);
// Skip to next quote or end
while let Some(ch) = self.current {
if ch == '"' || ch == '\n' {
break;
}
self.advance();
}
Token::String(String::new())
}
};
self.advance();
return token;
}
// Identifiers and keywords (including namespaced identifiers)
if c.is_alphabetic() || c == '_' {
let token = self.keyword_or_identifier();
self.advance();
return token;
}
// Numbers (decimal, hex, binary)
if c.is_ascii_digit() {
let token = match self.read_number() {
Ok(num) => Token::Integer(num),
Err(e) => {
eprintln!("Lexer error on line {}: {}", self.line, e);
// Skip invalid number
while let Some(&ch) = self.peek() {
if !ch.is_alphanumeric() {
break;
}
self.advance();
}
Token::Integer(0)
}
};
self.advance();
return token;
}
// Unknown character - skip it
eprintln!(
"Lexer warning on line {}: Skipping unknown character '{}'",
self.line, c
);
self.advance();
self.next_token()
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
match self.next_token() {
Token::Eof => None,
token => Some(token),
}
}
}
+38
View File
@@ -0,0 +1,38 @@
use common::logging::log;
use crate::model::{CompilerError, Program};
use parser::{ParseResult, Parser};
use semantic_analyser::Analyser;
pub mod lexer;
pub mod parser;
pub mod semantic_analyser;
pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
log("Tokenising Input...");
let lexer = lexer::Lexer::new(&input);
let tokens = lexer.collect::<Vec<_>>();
// println!("{tokens:?}");
log(&format!("Parsing {} Tokens...", tokens.len()));
let mut parser = Parser::new(tokens);
let ast = match parser.parse() {
ParseResult::Accept(ast) => ast,
ParseResult::Reject(e) => return Err(e),
ParseResult::Deny => {
return Err(CompilerError::Generic("Parser used ::Deny".to_string()));
}
};
// println!("{ast:#?}");
log("Analyzing AST...");
log("Checking Type Information...");
let analyser = Analyser::new();
analyser.analyse(ast.clone()).unwrap();
log("Type Checking Complete...");
Ok(ast)
}
@@ -1,6 +1,9 @@
use crate::lexer::{Name, Token};
use super::lexer::Token;
use crate::model::{
BinaryOperator, Block, CompilerError, ConstExpr, Declaration, Dependency, Expression,
Program, Statement, TypeId, UnaryOperator, Variable,
};
use crate::{expect_tt, expect_value};
use core::fmt;
use std::ops::{ControlFlow, FromResidual, Try};
#[derive(Debug, Clone)]
@@ -10,16 +13,6 @@ pub enum ParseResult<T, E> {
Reject(E),
}
#[derive(Debug, Clone)]
pub enum CompilerError {
UnexpectedToken(Token),
UnexpectedEndOfInput,
UnexpectedCharacter(char),
Undefined(Name),
InvalidSyntax(String),
Generic(String),
}
pub struct Parser {
tokens: Vec<Token>,
idx: usize,
@@ -86,7 +79,11 @@ impl Parser {
let init = match value {
Token::String(x) => Some(ConstExpr::String(x)),
Token::Integer(x) => Some(ConstExpr::Number(x as i32)),
_ => return ParseResult::Reject(CompilerError::UnexpectedToken(value)),
_ => {
return ParseResult::Reject(CompilerError::UnexpectedToken(
value.tt().to_string(),
));
}
};
let _ = expect_tt!(self.next()?, Semicolon)?;
@@ -141,7 +138,9 @@ impl Parser {
body: self.parse_block()?,
})
} else {
ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?))
ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?.tt().to_string(),
))
}
}
@@ -268,7 +267,7 @@ impl Parser {
expr
} else {
return ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?,
self.peek_next()?.tt().to_string(),
));
};
@@ -341,7 +340,9 @@ impl Parser {
});
}
ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?))
ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?.tt().to_string(),
))
}
fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> {
@@ -463,7 +464,9 @@ impl Parser {
let _ = expect_tt!(self.next()?, RightParen)?;
ParseResult::Accept(expr)
}
_ => ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?)),
_ => ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?.tt().to_string(),
)),
}
}
@@ -525,197 +528,6 @@ impl Parser {
}
}
#[derive(Debug, Clone)]
pub struct Program {
pub declarations: Vec<Declaration>,
}
#[derive(Debug, Clone)]
pub enum Declaration {
Function {
name: String,
return_type: TypeId,
params: Vec<Variable>,
body: Block,
},
Variable {
var: Variable,
init: Option<ConstExpr>,
is_const: bool,
},
Dependency(Dependency),
}
#[derive(Debug, Clone)]
pub struct Dependency {
pub name: String,
pub path: String,
}
#[derive(Debug, Clone)]
pub enum TypeId {
U8,
U16,
U32,
I8,
I16,
I32,
Char,
Void,
Ptr(Box<TypeId>),
Ref(Box<TypeId>),
Array(Box<TypeId>, usize),
Struct { name: Name, fields: Vec<Variable> },
}
pub type Block = Vec<Statement>;
#[derive(Debug, Clone)]
pub struct Variable {
pub name: String,
pub type_id: TypeId,
}
#[derive(Debug, Clone)]
pub enum Statement {
Block(Block),
Declaration {
var: Variable,
value: Option<Expression>,
},
Assign {
varname: String,
value: Expression,
},
PtrWrite {
ptr: Expression,
value: Expression,
},
Expression {
expr: Expression,
},
If {
condition: Expression,
then_stmt: Block,
else_stmt: Block,
},
While {
condition: Expression,
body: Vec<Statement>,
},
Loop(Block),
Break,
Continue,
Return(Option<Expression>),
}
#[derive(Debug, Clone)]
pub enum ConstExpr {
Number(i32),
String(String),
}
impl fmt::Display for ConstExpr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ConstExpr::Number(n) => write!(f, "{}", n),
ConstExpr::String(s) => write!(f, "\"{}\"", s),
}
}
}
#[derive(Debug, Clone)]
pub enum Expression {
Empty,
Binary {
op: BinaryOperator,
left: Box<Expression>,
right: Box<Expression>,
},
Unary {
op: UnaryOperator,
operand: Box<Expression>,
},
Variable {
name: Name,
expr_type: Option<TypeId>,
},
Call {
name: Name,
args: Vec<Expression>,
},
Number(isize),
StringLiteral(String),
CharLiteral(char),
}
impl Expression {
pub fn is_pure(&self) -> bool {
match self {
Expression::Number(_) => true,
Expression::StringLiteral(_) => true,
Expression::CharLiteral(_) => true,
Expression::Call { name, args } => false, /* TODO: will require checking */
// if the associated function
// body is pure
Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(),
Expression::Unary { op, operand } => operand.is_pure(),
Expression::Empty => true,
Expression::Variable { name, expr_type } => true,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum BinaryOperator {
Add,
Sub,
Mul,
Div,
Eq,
Ne,
Lt,
Gt,
Le,
Ge,
}
impl fmt::Display for BinaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
BinaryOperator::Add => write!(f, "+"),
BinaryOperator::Sub => write!(f, "-"),
BinaryOperator::Mul => write!(f, "*"),
BinaryOperator::Div => write!(f, "/"),
BinaryOperator::Eq => write!(f, "=="),
BinaryOperator::Ne => write!(f, "!="),
BinaryOperator::Lt => write!(f, "<"),
BinaryOperator::Gt => write!(f, ">"),
BinaryOperator::Le => write!(f, "<="),
BinaryOperator::Ge => write!(f, ">="),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum UnaryOperator {
Plus,
Minus,
Reference,
Dereference,
}
impl fmt::Display for UnaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
UnaryOperator::Plus => write!(f, "+"),
UnaryOperator::Minus => write!(f, "-"),
UnaryOperator::Dereference => write!(f, "*"),
UnaryOperator::Reference => write!(f, "&"),
}
}
}
impl<T, E> ParseResult<T, E> {
pub fn accepted(&self) -> bool {
matches!(self, ParseResult::Accept(_))
@@ -772,7 +584,7 @@ macro_rules! expect_tt {
)+
_ => {
// let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | "));
ParseResult::Reject(CompilerError::UnexpectedToken(token))
ParseResult::Reject(CompilerError::UnexpectedToken(tt))
}
}
}};
@@ -784,7 +596,9 @@ macro_rules! expect_value {
let tok = $expr;
match tok.clone() {
Token::$variant(value) => ParseResult::Accept(value),
_ => ParseResult::Reject(CompilerError::UnexpectedToken(tok)),
_ => {
ParseResult::Reject(CompilerError::UnexpectedToken(tok.tt().to_string()))
}
}
}};
}
@@ -0,0 +1,13 @@
use crate::model::{CompilerError, Program};
pub struct Analyser;
impl Analyser {
pub fn new() -> Self {
Self
}
pub fn analyse(&self, _ast: Program) -> Result<(), CompilerError> {
Ok(())
}
}
+15
View File
@@ -0,0 +1,15 @@
use crate::model::{CompilerError, Program};
mod c;
mod dsc;
pub fn compiler_frontend(ext: &str, data: &str) -> Result<Program, CompilerError> {
match ext {
"dsc" => Ok(dsc::generate_ast(&data)?),
"c" => Ok(c::generate_ast(&data)?),
_ => Err(CompilerError::Generic(format!(
"File type {} not supported",
ext
))),
}
}
+37 -41
View File
@@ -4,17 +4,12 @@ use std::path::Path;
use common::logging::log;
use crate::{
codegen::CodeGenerator,
parser::{ParseResult, Parser},
semantic_analyser::Analyser,
};
use crate::specialised::build_specialised;
mod codegen;
mod lexer;
mod parser;
mod registers;
mod semantic_analyser;
mod backend;
mod frontend;
mod model;
mod specialised;
pub fn compile_file(
input_path: &Path,
@@ -22,43 +17,44 @@ pub fn compile_file(
) -> Result<(), Box<dyn std::error::Error>> {
let input = std::fs::read_to_string(input_path).expect("Failed to read input file");
log("Tokenising Input...");
let input_ext = input_path
.extension()
.and_then(|s| s.to_str())
.unwrap_or("");
let lexer = lexer::Lexer::new(&input);
let tokens = lexer.collect::<Vec<_>>();
// println!("{tokens:?}");
log(&format!("Parsing {} Tokens...", tokens.len()));
let mut parser = Parser::new(tokens);
let ast = match parser.parse() {
ParseResult::Accept(ast) => ast,
ParseResult::Reject(e) => {
eprintln!("Error: {e:?}");
return Err("Parsing error".into());
}
ParseResult::Deny => {
panic!("Parser denied parsing")
}
// check if we're using a specialised compiler
if let Some(output) = build_specialised(input_ext, &input) {
let result = match output {
Ok(output) => output,
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
};
// println!("{ast:#?}");
log("Analyzing AST...");
log("Checking Type Information...");
std::fs::write(output_path, &result).expect("Failed to write output");
let analyser = Analyser::new();
analyser.analyse(ast.clone()).unwrap();
log(&format!(
"Compilation Successful ✅ \n\tSource: {}\n\tOutput: {}\n",
input_path.display(),
output_path.display(),
));
log("Generating Code...");
// Code Gen
let mut generator = CodeGenerator::new(ast);
let result = match generator.generate() {
Ok(code) => code,
Err(e) => {
eprintln!("Parsing error: {:?}", e);
return Err("Code generation error".into());
return Ok(());
}
// Parse the input using the frontend, providing the file extension and data.
let ast = match frontend::compiler_frontend(input_ext, &input) {
Ok(ast) => ast,
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
};
let output_ext = output_path
.extension()
.and_then(|s| s.to_str())
.unwrap_or("");
// Generate the output using the backend with the parsed result.
let result = match backend::compiler_backend(output_ext, &ast) {
Ok(result) => result,
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
};
// println!("{result}");
+1 -1
View File
@@ -6,7 +6,7 @@ fn main() {
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
eprintln!("Usage: c_compiler <src.c> [output.dsa]");
eprintln!("Usage: c_compiler <src.dsc> [output.dsa]");
return;
}
+213
View File
@@ -0,0 +1,213 @@
use core::fmt;
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum CompilerError {
UnexpectedToken(String),
UnexpectedEndOfInput,
UnexpectedCharacter(char),
Undefined(Name),
InvalidSyntax(String),
Generic(String),
}
#[derive(Debug, PartialEq, Clone)]
pub struct Name {
pub name: String,
pub namespace: Option<String>,
}
#[derive(Debug, Clone)]
pub struct Program {
pub declarations: Vec<Declaration>,
}
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum Declaration {
Function {
name: String,
return_type: TypeId,
params: Vec<Variable>,
body: Block,
},
Variable {
var: Variable,
init: Option<ConstExpr>,
is_const: bool,
},
Dependency(Dependency),
}
#[derive(Debug, Clone)]
pub struct Dependency {
pub name: String,
pub path: String,
}
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum TypeId {
U8,
U16,
U32,
I8,
I16,
I32,
Char,
Void,
Ptr(Box<TypeId>),
Ref(Box<TypeId>),
Array(Box<TypeId>, usize),
Struct { name: Name, fields: Vec<Variable> },
}
pub type Block = Vec<Statement>;
#[allow(unused)]
#[derive(Debug, Clone)]
pub struct Variable {
pub name: String,
pub type_id: TypeId,
}
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum Statement {
Block(Block),
Declaration {
var: Variable,
value: Option<Expression>,
},
Assign {
varname: String,
value: Expression,
},
PtrWrite {
ptr: Expression,
value: Expression,
},
Expression {
expr: Expression,
},
If {
condition: Expression,
then_stmt: Block,
else_stmt: Block,
},
While {
condition: Expression,
body: Vec<Statement>,
},
Loop(Block),
Break,
Continue,
Return(Option<Expression>),
}
#[derive(Debug, Clone)]
pub enum ConstExpr {
Number(i32),
String(String),
}
impl fmt::Display for ConstExpr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ConstExpr::Number(n) => write!(f, "{}", n),
ConstExpr::String(s) => write!(f, "\"{}\"", s),
}
}
}
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum Expression {
Empty,
Binary {
op: BinaryOperator,
left: Box<Expression>,
right: Box<Expression>,
},
Unary {
op: UnaryOperator,
operand: Box<Expression>,
},
Variable {
name: Name,
expr_type: Option<TypeId>,
},
Call {
name: Name,
args: Vec<Expression>,
},
Number(isize),
StringLiteral(String),
CharLiteral(char),
}
impl Expression {
pub fn is_pure(&self) -> bool {
match self {
Expression::Number(_) => true,
Expression::StringLiteral(_) => true,
Expression::CharLiteral(_) => true,
Expression::Call { .. } => false,
Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(),
Expression::Unary { operand, .. } => operand.is_pure(),
Expression::Empty => true,
Expression::Variable { .. } => true,
}
}
}
#[allow(unused)]
#[derive(Debug, Clone, PartialEq)]
pub enum BinaryOperator {
Add,
Sub,
Mul,
Div,
Eq,
Ne,
Lt,
Gt,
Le,
Ge,
}
impl fmt::Display for BinaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
BinaryOperator::Add => write!(f, "+"),
BinaryOperator::Sub => write!(f, "-"),
BinaryOperator::Mul => write!(f, "*"),
BinaryOperator::Div => write!(f, "/"),
BinaryOperator::Eq => write!(f, "=="),
BinaryOperator::Ne => write!(f, "!="),
BinaryOperator::Lt => write!(f, "<"),
BinaryOperator::Gt => write!(f, ">"),
BinaryOperator::Le => write!(f, "<="),
BinaryOperator::Ge => write!(f, ">="),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum UnaryOperator {
Plus,
Minus,
Reference,
Dereference,
}
impl fmt::Display for UnaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
UnaryOperator::Plus => write!(f, "+"),
UnaryOperator::Minus => write!(f, "-"),
UnaryOperator::Dereference => write!(f, "*"),
UnaryOperator::Reference => write!(f, "&"),
}
}
}
-13
View File
@@ -1,13 +0,0 @@
use crate::parser::{CompilerError, Program};
pub struct Analyser;
impl Analyser {
pub fn new() -> Self {
Self
}
pub fn analyse(&self, ast: Program) -> Result<(), CompilerError> {
Ok(())
}
}