asm done with parsing and linking. codegen all that's left

This commit is contained in:
2025-06-18 03:54:39 +01:00
parent 6a0b5c617a
commit 1210b19333
6 changed files with 366 additions and 63 deletions
+6
View File
@@ -0,0 +1,6 @@
use common::{
instructions,
prelude::{Instruction, RTypeArgs},
};
use crate::model::{Node, Opcode};
+189
View File
@@ -0,0 +1,189 @@
use common::prelude::Register;
use crate::{
AssembleError, dsa, expect_token, expect_type,
model::{Node, Opcode, Token, TokenType},
node,
};
pub fn expand_pseudo_ops(
mut nodes: Vec<Node>,
module: u64,
) -> Result<Vec<Node>, AssembleError> {
let mut result = Vec::<Node>::with_capacity(nodes.len());
for node in nodes.iter_mut() {
if let Err(_) = try_expand(node.clone(), &mut result, module) {
result.push(node.clone());
}
}
Ok(result)
}
fn try_expand(
mut node: Node,
result: &mut Vec<Node>,
module: u64,
) -> Result<(), AssembleError> {
match node.opcode() {
Opcode::Push => expand_push(node.clone(), result)?,
Opcode::Pop => expand_pop(node.clone(), result)?,
Opcode::Ldb | Opcode::Ldh | Opcode::Ldw => expand_ldx(node.clone(), result)?,
Opcode::Lwi => expand_lwi(node.clone(), result)?,
Opcode::Resb | Opcode::Resh | Opcode::Resw => expand_resx(node.clone(), result)?,
Opcode::Db | Opcode::Dh | Opcode::Dw => expand_dx(node.clone(), result)?,
_ => result.push(node.clone()),
};
Ok(())
}
fn expand_push(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let label = current.label();
let reg = expect_type!(current.arg(0).unwrap(), Register)?;
nodes.extend(vec![
node!(label, Opcode::Iadd, reg.clone(), Token::Immediate(4)),
node!(None, Opcode::Stw, reg, Token::Register(Register::Spr)),
]);
Ok(())
}
fn expand_pop(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let label = current.label();
let reg = expect_type!(current.arg(0).unwrap(), Register)?;
nodes.extend(vec![
node!(label, Opcode::Isub, reg.clone(), Token::Immediate(4)),
node!(None, Opcode::Ldw, reg, Token::Register(Register::Spr)),
]);
Ok(())
}
fn expand_ldx(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let opcode = current.opcode();
let name = expect_type!(current.arg(0).unwrap(), Symbol)?;
let reg = expect_type!(current.arg(2).unwrap(), Register)?;
nodes.extend(vec![
node!(current.label(), Opcode::Lli, name.clone(), reg.clone()),
node!(None, Opcode::Lui, name.clone(), reg.clone()),
node!(None, opcode, reg.clone(), Token::Immediate(0), reg),
]);
Ok(())
}
fn expand_lwi(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let reg = expect_type!(current.arg(0).unwrap(), Register)?;
let name = expect_type!(current.arg(1).unwrap(), Symbol)?;
nodes.extend(vec![
node!(current.label(), Opcode::Lli, name.clone(), reg.clone()),
node!(None, Opcode::Lui, name.clone(), reg.clone()),
]);
Ok(())
}
fn expand_resx(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let region_label = expect_token!(current.arg(0).unwrap(), Symbol)?;
let size = expect_token!(current.arg(1).unwrap(), Immediate)?;
let units_per = match current.opcode() {
Opcode::Resb => 4,
Opcode::Resh => 2,
Opcode::Resw => 1,
_ => unreachable!(),
};
let mut buffer = vec![];
// push the inital node with the label
for _ in 0..size.div_ceil(units_per) {
// push the rest of the nodes
buffer.push(node!(None, Opcode::Data, Token::Immediate(0)));
}
buffer[0].symbol = Some(region_label);
nodes.extend(buffer);
Ok(())
}
fn expand_dx(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let region_label = expect_token!(current.arg(0).unwrap(), Symbol)?;
let size = match current.opcode() {
Opcode::Db => 4,
Opcode::Dh => 2,
Opcode::Dw => 1,
_ => unreachable!(),
};
let mut buffer = vec![];
let mut args = current.args();
let label = args.remove(0);
for word in process_dx_data(args, size)? {
buffer.push(node!(None, Opcode::Data, Token::Immediate(word)));
}
buffer[0].symbol = Some(region_label);
nodes.extend(buffer);
Ok(())
}
fn process_dx_data(args: Vec<Token>, size: usize) -> Result<Vec<u32>, AssembleError> {
assert!(matches!(size, 1 | 2 | 4));
let mut buffer = Vec::<u8>::new();
// Process each token
for token in args {
match token {
Token::StringLit(s) => {
// Split string into chars and write as bytes
for ch in s.chars() {
// Convert char to bytes (UTF-8 encoding)
let mut char_buf = [0u8; 4];
let char_bytes = ch.encode_utf8(&mut char_buf);
buffer.extend_from_slice(char_bytes.as_bytes());
}
}
Token::Immediate(value) => {
// Split u32 into bytes (little-endian)
buffer.extend_from_slice(&value.to_be_bytes());
}
_ => {
return Err(AssembleError::Generic);
}
}
// Pad buffer to alignment boundary with zeros
let remainder = buffer.len() % size;
if remainder != 0 {
let padding = size - remainder;
buffer.resize(buffer.len() + padding, 0);
}
}
// Convert byte buffer to u32 chunks
// Pad final buffer to u32 boundary if needed
let remainder = buffer.len() % 4;
if remainder != 0 {
let padding = 4 - remainder;
buffer.resize(buffer.len() + padding, 0);
}
// Convert bytes to u32s efficiently using chunks_exact
let result = buffer
.chunks_exact(4)
.map(|chunk| {
// Convert 4 bytes to u32 (little-endian)
u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
})
.collect();
Ok(result)
}
+18 -6
View File
@@ -9,11 +9,14 @@ use std::{
use common::prelude::Instruction;
use crate::{
model::{Node, Token, TokenType},
expand::expand_pseudo_ops,
model::{Node, Symbol, Token, TokenType},
parser::{Parser, Program},
resolver::resolve_dependencies,
resolver::{resolve_dependencies, resolve_symbols},
};
pub mod codegen;
pub mod expand;
pub mod lexer;
pub mod model;
pub mod parser;
@@ -31,8 +34,11 @@ pub fn assemble(src: &Path) -> Vec<Instruction> {
Err(err) => println!("BIG ERROR {err:?}"),
}
for node in program.nodes {
println!("{node:?}");
let mut nodes = program.nodes;
resolve_symbols(&mut nodes).unwrap();
for node in nodes {
println!("{node}");
}
vec![]
@@ -71,7 +77,7 @@ fn prepare_dependency(
"{:20} {:20}",
"Expanding PseudoInstructions", filename
));
let nodes = Parser::expand_pseudo_ops(nodes, file_hash)?;
let nodes = expand_pseudo_ops(nodes, file_hash)?;
program.add_module(nodes);
@@ -110,6 +116,8 @@ pub enum AssembleError {
UnexpectedEof,
InvalidFile(PathBuf),
UnexpectedToken(Token, TokenType),
InvalidArg,
UndefinedSymbol(Symbol),
}
impl fmt::Display for AssembleError {
@@ -121,6 +129,10 @@ impl fmt::Display for AssembleError {
}
AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"),
AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"),
AssembleError::InvalidArg => write!(f, "Invalid argument"),
AssembleError::UndefinedSymbol(symbol) => {
write!(f, "Undefined symbol {symbol}")
}
}
}
}
@@ -218,7 +230,7 @@ macro_rules! expect_type {
other => {
let expected_type = expect_type!(@get_first_type $($variant),+);
Err(AssembleError::UnexpectedToken(
other.clone(),
other.clone().clone(),
expected_type,
))
}
+74 -7
View File
@@ -2,23 +2,76 @@ use std::{fmt, str::FromStr};
use common::prelude::Register;
use crate::AssembleError;
#[derive(Debug, Clone)]
pub struct Node(pub Option<Symbol>, pub Opcode, pub Vec<Token>);
pub struct Node {
pub symbol: Option<Symbol>,
pub opcode: Opcode,
pub tokens: Vec<Token>,
}
#[macro_export]
#[macro_use]
macro_rules! node {
($symbol: expr, $opcode: expr, args: $tokens: expr) => {
Node::new($symbol.clone(), $opcode.clone(), $tokens.clone())
};
($symbol: expr, $opcode: expr, $($tokens: expr),+) => {
Node::new($symbol.clone(), $opcode.clone(), vec![$($tokens.clone()),+])
};
}
impl Node {
pub fn new(symbol: Option<Symbol>, opcode: Opcode, tokens: Vec<Token>) -> Node {
Node {
symbol,
opcode,
tokens,
}
}
pub fn label(&self) -> Option<Symbol> {
self.symbol.clone()
}
pub fn opcode(&self) -> Opcode {
self.opcode.clone()
}
pub fn args(&self) -> Vec<Token> {
self.tokens.clone()
}
pub fn arg(&self, index: usize) -> Result<Token, AssembleError> {
self.args()
.get(index)
.cloned()
.ok_or(AssembleError::InvalidArg)
}
}
impl fmt::Display for Node {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let symbol = match &self.0 {
Some(symbol) => format!("{symbol}"),
let symbol = match &self.label() {
Some(symbol) => format!("{}:\n", symbol),
None => "".to_string(),
};
write!(f, "Node: {} {} {:?}", symbol, self.1, self.2)
write!(
f,
"\x1b[93m{} \t\x1b[94m{} \x1b[37m{:?} \x1b[0m",
symbol,
self.opcode(),
self.args()
)
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}::{}", self.module, self.name)
write!(f, "{} ( module: {})", self.name, self.module)
}
}
@@ -82,18 +135,29 @@ impl fmt::Display for Opcode {
Opcode::Push => write!(f, "push"),
Opcode::Pop => write!(f, "pop"),
Opcode::Lwi => write!(f, "lwi"),
// utility - removed at compile time
Opcode::Include => write!(f, "include"),
// special - generated by assembler
Opcode::Data => write!(f, "data"),
}
}
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Eq, Hash)]
pub struct Symbol {
pub name: String,
pub module: Module,
}
#[derive(Debug, Clone)]
impl PartialEq for Symbol {
fn eq(&self, other: &Self) -> bool {
self.name == other.name && self.module == other.module
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Module {
Resolved(u64),
Unresolved(String),
@@ -182,6 +246,9 @@ pub enum Opcode {
Pop,
Lwi,
Include,
// fake instructions (these aren't present in the binary as instructions)
Data,
}
#[derive(Debug)]
+12 -44
View File
@@ -3,7 +3,7 @@ use std::path::PathBuf;
use common::prelude::{Instruction, Register};
use crate::model::{Module, Node, Opcode, Symbol, Token, TokenType};
use crate::{AssembleError, dsa, expect_token, expect_type, quick_hash};
use crate::{AssembleError, dsa, expect_token, expect_type, node, quick_hash};
pub struct Parser {
tokens: Vec<Token>,
@@ -55,52 +55,15 @@ impl Parser {
pub fn get_dependencies(nodes: &Vec<Node>) -> Result<Vec<PathBuf>, AssembleError> {
let mut dependencies = Vec::new();
for node in nodes {
if let Opcode::Include = node.1 {
let path = expect_token!(node.2.get(1).unwrap(), StringLit)?;
if let Opcode::Include = node.opcode() {
let path = expect_token!(node.args().get(1).unwrap(), StringLit)?;
dependencies.push(PathBuf::from(path));
}
}
Ok(dependencies)
}
pub fn expand_pseudo_ops(
mut nodes: Vec<Node>,
module: u64,
) -> Result<Vec<Node>, AssembleError> {
let mut result = Vec::<Node>::with_capacity(nodes.len());
for node in nodes.iter_mut() {
match node.1 {
// Opcode::Db | Opcode::Dh | Opcode::Dw => todo!(),
// Opcode::Resb | Opcode::Resh | Opcode::Resw => todo!(),
Opcode::Push => {
// inc SPR
// STW reg, SPR
let label = node.0.clone();
let reg = expect_token!(node.2.get(0).unwrap(), Register)?;
match label {
Some(label) => result.extend(dsa!(
module,
"{}: iadd spr, 4\n stw {}, spr",
label,
reg
)),
None => {
result.extend(dsa!(module, "iadd spr, 4\n stw {}, spr", reg))
}
}
}
_ => result.push(node.clone()),
}
}
Ok(result)
}
fn parse_instruction(&mut self) -> Result<Node, AssembleError> {
println!("tokens: {:?}", self.tokens);
if self.tokens.is_empty() {
unreachable!();
}
@@ -208,8 +171,9 @@ impl Parser {
// D-type pseudoinstructions (data definition)
Opcode::Resb | Opcode::Resh | Opcode::Resw => {
let name = expect_type!(self.next()?, Symbol)?;
let num = expect_type!(self.next()?, Immediate)?;
args = vec![num];
args = vec![name, num];
}
Opcode::Db | Opcode::Dh | Opcode::Dw => {
@@ -237,9 +201,13 @@ impl Parser {
Opcode::Hlt | Opcode::Nop | Opcode::Irt => {
args = Vec::new();
}
Opcode::Data => {
return Err(AssembleError::Generic);
}
}
Ok(Node(label, opcode, args))
Ok(node!(label, opcode, args: args))
}
fn parse_data_definition(
@@ -248,8 +216,8 @@ impl Parser {
) -> Result<Vec<Token>, AssembleError> {
let mut values = Vec::new();
let _name = self.expect(TokenType::Symbol)?;
values.push(self.tokens.pop().unwrap());
let name = expect_type!(self.next()?, Symbol)?;
values.push(name);
match opcode {
Opcode::Db => {
+67 -6
View File
@@ -1,4 +1,4 @@
use std::path::PathBuf;
use std::{collections::HashMap, path::PathBuf};
use crate::{
AssembleError,
@@ -6,18 +6,77 @@ use crate::{
quick_hash,
};
pub fn resolve_symbols(nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let symbol_table = generate_symbol_table(&nodes)?;
for node in nodes.iter_mut() {
match node.opcode() {
Opcode::Lli => {
if let Token::Symbol(symbol) = node.arg(0).unwrap() {
if let Some(address) = symbol_table.get(&symbol) {
node.tokens[0] = Token::Immediate(*address);
} else {
return Err(AssembleError::UndefinedSymbol(symbol.clone()));
}
}
}
Opcode::Lui => {
if let Token::Symbol(symbol) = node.arg(0).unwrap() {
if let Some(address) = symbol_table.get(&symbol) {
node.tokens[0] = Token::Immediate(*address);
} else {
return Err(AssembleError::UndefinedSymbol(symbol.clone()));
}
}
}
Opcode::Jmp
| Opcode::Jeq
| Opcode::Jne
| Opcode::Jgt
| Opcode::Jge
| Opcode::Jlt
| Opcode::Jle => {
if let Token::Symbol(symbol) = node.arg(0).unwrap() {
if let Some(address) = symbol_table.get(&symbol) {
node.tokens[0] = Token::Immediate(*address);
} else {
return Err(AssembleError::UndefinedSymbol(symbol.clone()));
}
}
}
_ => (),
}
}
Ok(())
}
fn generate_symbol_table(
nodes: &Vec<Node>,
) -> Result<HashMap<Symbol, u32>, AssembleError> {
let mut table = HashMap::new();
for (i, node) in nodes.iter().enumerate() {
if let Some(symbol) = node.label() {
table.insert(symbol, 4 * i as u32);
}
}
Ok(table)
}
pub fn resolve_dependencies(mut nodes: Vec<Node>) -> Result<Vec<Node>, AssembleError> {
// first we get a list of imports
let mut dependencies = Vec::new();
for node in &nodes {
if let Opcode::Include = node.1 {
if let Opcode::Include = node.opcode() {
// we want the path, and the name
let name = if let Token::Symbol(name) = node.2.get(0).unwrap() {
let name = if let Token::Symbol(name) = node.arg(0).unwrap() {
name.name.clone()
} else {
unreachable!()
}; //node.2.get(0).unwrap()
let path = if let Token::StringLit(path) = node.2.get(1).unwrap() {
let path = if let Token::StringLit(path) = node.arg(1).unwrap() {
path
} else {
unreachable!()
@@ -32,7 +91,9 @@ pub fn resolve_dependencies(mut nodes: Vec<Node>) -> Result<Vec<Node>, AssembleE
// now we resolve the symbols on all the nodes
// we need to check all operands for unresolved signals
for (i, node) in nodes.clone().iter().enumerate() {
let Node(_, _, operands) = node;
let Node {
tokens: operands, ..
} = node;
for (j, token) in operands.iter().enumerate() {
if let Token::Symbol(symbol) = token {
for d in &dependencies {
@@ -53,7 +114,7 @@ pub fn resolve_dependencies(mut nodes: Vec<Node>) -> Result<Vec<Node>, AssembleE
}
for (i, j, symbol) in changes {
nodes[i as usize].2[j as usize] = Token::Symbol(symbol);
nodes[i as usize].tokens[j as usize] = Token::Symbol(symbol);
}
Ok(nodes)