assembler updates

This commit is contained in:
2025-06-17 23:48:06 +01:00
parent 87fbd6c362
commit b0670d1e6c
3 changed files with 268 additions and 161 deletions
+111 -6
View File
@@ -11,11 +11,13 @@ use common::prelude::Instruction;
use crate::{
model::{Node, Token, TokenType},
parser::{Parser, Program},
resolver::resolve_dependencies,
};
pub mod lexer;
pub mod model;
pub mod parser;
pub mod resolver;
pub fn assemble(src: &PathBuf) -> Vec<Instruction> {
let mut modules = HashSet::<u64>::new();
@@ -59,14 +61,19 @@ fn prepare_dependency(
let tokens = lexer::lexer(src, file_hash)?;
log(&format!("{:20} {:20}", "Parsing", filename));
let mut parser = Parser::new(tokens);
let parsed = Parser::parse_nodes(tokens)?;
log(&format!("{:20} {:20}", "Resolving Deps", filename));
let deps = parser
.parse_nodes()?
.resolve_dependencies()?
.get_dependencies()?;
program.add_module(parser.get());
let nodes = resolve_dependencies(parsed)?;
let deps = Parser::get_dependencies(&nodes)?;
log(&format!(
"{:20} {:20}",
"Expanding PseudoInstructions", filename
));
let nodes = Parser::expand_pseudo_ops(nodes, file_hash)?;
program.add_module(nodes);
for dep in deps {
log(&format!(
@@ -100,6 +107,7 @@ pub fn disassemble(_: Vec<Instruction>) -> String {
#[derive(Debug)]
pub enum AssembleError {
Generic,
UnexpectedEof,
InvalidFile(PathBuf),
UnexpectedToken(Token, TokenType),
}
@@ -111,6 +119,7 @@ impl fmt::Display for AssembleError {
AssembleError::UnexpectedToken(tok, expected) => {
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
}
AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"),
AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"),
}
}
@@ -125,3 +134,99 @@ fn quick_hash(value: &PathBuf) -> u64 {
fn log(message: &str) {
println!("\x1b[32mINFO:\x1b[0m {}", message);
}
// create a macro that lexes and parses the input string into Nodes
#[macro_export]
#[macro_use]
macro_rules! dsa {
// Version with formatting arguments
($hash:expr, $input:expr, $($args:expr),+) => {{
let input = format!($input, $($args),+);
let tokens = crate::lexer::lexer(input, $hash)?;
let parsed = crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
// Version without formatting
($hash:expr, $input:expr) => {{
let input = String::from($input);
let tokens = crate::lexer::lexer(input, $hash)?;
let parsed = crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
}
#[macro_export]
#[macro_use]
macro_rules! expect_token {
($token:expr, Symbol) => {
match $token {
Token::Symbol(value) => Ok(value.clone()),
other => Err(AssembleError::UnexpectedToken(
other.clone(),
TokenType::Symbol,
)),
}
};
($token:expr, Register) => {
match $token {
Token::Register(value) => Ok(value.clone()),
other => Err(AssembleError::UnexpectedToken(
other.clone(),
TokenType::Register,
)),
}
};
($token:expr, Immediate) => {
match $token {
Token::Immediate(value) => Ok(value.clone()),
other => Err(AssembleError::UnexpectedToken(
other.clone(),
TokenType::Immediate,
)),
}
};
($token:expr, StringLit) => {
match $token {
Token::StringLit(value) => Ok(value.clone()),
other => Err(AssembleError::UnexpectedToken(
other.clone(),
TokenType::StringLit,
)),
}
};
($token:expr, Opcode) => {
match $token {
Token::Opcode(value) => Ok(value.clone()),
other => Err(AssembleError::UnexpectedToken(
other.clone(),
TokenType::Opcode,
)),
}
};
}
#[macro_export]
#[macro_use]
macro_rules! expect_type {
($token:expr, $($variant:ident),+) => {{
let token = $token;
match &token {
$(
Token::$variant(_) => Ok(token.clone()),
)+
other => {
let expected_type = expect_type!(@get_first_type $($variant),+);
Err(AssembleError::UnexpectedToken(
other.clone(),
expected_type,
))
}
}
}};
(@get_first_type Symbol $(, $rest:ident)*) => { TokenType::Symbol };
(@get_first_type Register $(, $rest:ident)*) => { TokenType::Register };
(@get_first_type Immediate $(, $rest:ident)*) => { TokenType::Immediate };
(@get_first_type StringLit $(, $rest:ident)*) => { TokenType::StringLit };
(@get_first_type Opcode $(, $rest:ident)*) => { TokenType::Opcode };
}
+97 -155
View File
@@ -5,7 +5,7 @@ use std::str::FromStr;
use common::prelude::{Instruction, Register};
use crate::model::{Module, Node, Opcode, Symbol, Token, TokenType};
use crate::{AssembleError, quick_hash};
use crate::{AssembleError, dsa, expect_token, expect_type, quick_hash};
pub struct Parser {
tokens: Vec<Token>,
@@ -34,190 +34,116 @@ impl Program {
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Parser {
Parser {
pub fn parse_nodes(tokens: Vec<Token>) -> Result<Vec<Node>, AssembleError> {
let mut self_ = Parser {
tokens: tokens.into_iter().rev().collect(),
nodes: vec![],
}
}
};
pub fn parse_nodes(&mut self) -> Result<&mut Self, AssembleError> {
while !self.tokens.is_empty() {
let ins = self.parse_instruction()?;
self.nodes.push(ins);
while !self_.tokens.is_empty() {
let ins = self_.parse_instruction()?;
self_.nodes.push(ins);
}
Ok(self)
Ok(self_.nodes.clone())
}
pub fn get_dependencies(&mut self) -> Result<Vec<PathBuf>, AssembleError> {
pub fn get_dependencies(nodes: &Vec<Node>) -> Result<Vec<PathBuf>, AssembleError> {
let mut dependencies = Vec::new();
for node in &self.nodes {
for node in nodes {
if let Opcode::Include = node.1 {
if let Token::StringLit(path) = node.2.get(1).unwrap() {
dependencies.push(PathBuf::from(path));
}
let path = expect_token!(node.2.get(1).unwrap(), StringLit)?;
dependencies.push(PathBuf::from(path));
}
}
Ok(dependencies)
}
pub fn resolve_dependencies(&mut self) -> Result<&mut Self, AssembleError> {
// first we get a list of imports
let mut dependencies = Vec::new();
for node in &self.nodes {
if let Opcode::Include = node.1 {
// we want the path, and the name
let name = if let Token::Symbol(name) = node.2.get(0).unwrap() {
name.name.clone()
} else {
unreachable!()
}; //node.2.get(0).unwrap()
let path = if let Token::StringLit(path) = node.2.get(1).unwrap() {
path
} else {
unreachable!()
};
let hash = quick_hash(&PathBuf::from(path).canonicalize().unwrap());
pub fn expand_pseudo_ops(
mut nodes: Vec<Node>,
module: u64,
) -> Result<Vec<Node>, AssembleError> {
let mut result = Vec::<Node>::with_capacity(nodes.len());
dependencies.push((name, hash));
}
}
let mut changes = Vec::<(u32, u32, Symbol)>::new();
// now we resolve the symbols on all the nodes
// we need to check all operands for unresolved signals
for (i, node) in self.nodes.clone().iter().enumerate() {
let Node(_, _, operands) = node;
for (j, token) in operands.iter().enumerate() {
if let Token::Symbol(symbol) = token {
for d in &dependencies {
if let Module::Unresolved(name) = symbol.module.clone() {
if name != d.0 {
continue;
}
let symbol = Symbol {
name: symbol.name.clone(),
module: Module::Resolved(d.1),
};
changes.push((i as u32, j as u32, symbol));
}
}
}
}
}
for (i, j, symbol) in changes {
self.nodes[i as usize].2[j as usize] = Token::Symbol(symbol);
}
Ok(self)
}
pub fn get(&self) -> Vec<Node> {
self.nodes.clone()
}
pub fn expand_pseudo_ops(&mut self) -> Result<&mut Self, AssembleError> {
for node in self.nodes.iter_mut() {
for node in nodes.iter_mut() {
match node.1 {
Opcode::Db | Opcode::Dh | Opcode::Dw => todo!(),
Opcode::Resb | Opcode::Resh | Opcode::Resw => todo!(),
// Opcode::Db | Opcode::Dh | Opcode::Dw => todo!(),
// Opcode::Resb | Opcode::Resh | Opcode::Resw => todo!(),
Opcode::Push => {
// inc SPR
// STW reg, SPR
let label = node.0.clone();
let reg = node.2.get(0).unwrap();
let reg = expect_token!(node.2.get(0).unwrap(), Register)?;
vec![
Node(
label.clone(),
Opcode::Inc,
vec![Token::Register(Register::Spr)],
),
Node(
label.clone(),
Opcode::Stw,
vec![reg.clone(), Token::Register(Register::Spr)],
),
];
match label {
Some(label) => result.extend(dsa!(
module,
"{}: iadd spr, 4\n stw {}, spr",
label,
reg
)),
None => {
result.extend(dsa!(module, "iadd spr, 4\n stw {}, spr", reg))
}
}
}
_ => continue,
_ => result.push(node.clone()),
}
}
Ok(self)
}
pub fn resolve_symbols(&mut self) -> Result<&mut Self, AssembleError> {
Ok(self)
}
pub fn instructions(&mut self) -> Vec<Instruction> {
vec![]
Ok(result)
}
fn parse_instruction(&mut self) -> Result<Node, AssembleError> {
println!("tokens: {:?}", self.tokens);
if self.tokens.is_empty() {
unreachable!();
}
let label = if let Token::Symbol(label) = self
.tokens
.last()
.expect("parse_instruction should not have been called if this is none!!")
{
Some(label.clone())
} else {
None
};
// check if the Node starts with a label
let label = expect_token!(self.peek_next()?, Symbol).ok();
if label.is_some() {
self.tokens.pop();
}
let opcode = match self.expect(TokenType::Opcode)? {
Token::Opcode(opcode) => opcode,
_ => unreachable!(),
};
let opcode = expect_token!(self.next()?, Opcode)?;
let args: Vec<Token>;
match opcode {
// R-type instructions
Opcode::Mov | Opcode::Movs => {
let reg1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg1 = expect_type!(self.next()?, Register, Symbol)?;
let reg2 = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg1, reg2];
}
Opcode::Ldb | Opcode::Ldbs | Opcode::Ldh | Opcode::Ldhs | Opcode::Ldw => {
let base = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let offset =
self.maybe_expect(&[TokenType::Register, TokenType::Immediate]);
if offset.is_some() {
self.tokens.pop();
args = vec![base, offset.unwrap(), dest];
} else {
args = vec![base, Token::Immediate(0), dest];
let base = expect_type!(self.next()?, Register, Symbol)?;
let dest = expect_type!(self.next()?, Register, Symbol)?;
let mut offset = Token::Immediate(0);
if let Ok(next) = self.peek_next() {
if let Ok(_) = expect_type!(next, Register, Immediate) {
offset = self.next()?;
}
}
args = vec![base, offset, dest];
}
Opcode::Stb | Opcode::Sth | Opcode::Stw => {
let base = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let base = expect_type!(self.next()?, Register, Symbol)?;
let dest = expect_type!(self.next()?, Register, Symbol)?;
let offset =
self.maybe_expect(&[TokenType::Register, TokenType::Immediate]);
if offset.is_some() {
self.tokens.pop();
args = vec![base, offset.unwrap(), dest];
} else {
args = vec![base, Token::Immediate(0), dest];
let mut offset = Token::Immediate(0);
if let Ok(next) = self.peek_next() {
if let Ok(_) = expect_type!(next, Register, Immediate) {
offset = self.next()?;
}
}
args = vec![base, offset, dest];
}
Opcode::Add
@@ -228,32 +154,32 @@ impl Parser {
| Opcode::Nand
| Opcode::Nor
| Opcode::Xnor => {
let src1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let src2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let src1 = expect_type!(self.next()?, Register, Symbol)?;
let src2 = expect_type!(self.next()?, Register, Symbol)?;
let dest = expect_type!(self.next()?, Register, Symbol)?;
args = vec![src1, src2, dest];
}
Opcode::Not | Opcode::Cmp => {
let reg1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg1 = expect_type!(self.next()?, Register, Symbol)?;
let reg2 = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg1, reg2];
}
Opcode::Shl | Opcode::Shr => {
let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let num = self.expect(TokenType::Immediate)?;
let reg = expect_type!(self.next()?, Register, Symbol)?;
let num = expect_type!(self.next()?, Immediate)?;
args = vec![reg, num];
}
Opcode::Inc | Opcode::Dec => {
let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg];
}
Opcode::Include => {
let mod_name = self.expect(TokenType::Symbol)?;
let path = self.expect(TokenType::StringLit)?;
let mod_name = expect_type!(self.next()?, Symbol)?;
let path = expect_type!(self.next()?, StringLit)?;
args = vec![mod_name, path];
}
@@ -265,20 +191,20 @@ impl Parser {
| Opcode::Jge
| Opcode::Jlt
| Opcode::Jle => {
let imm = self.expect_any(&[TokenType::Immediate, TokenType::Symbol])?;
let imm = expect_type!(self.next()?, Immediate, Symbol)?;
args = vec![imm];
}
// I-type instructions
Opcode::Lui | Opcode::Lli | Opcode::Lwi | Opcode::Iadd | Opcode::Isub => {
let imm = self.expect_any(&[TokenType::Immediate, TokenType::Symbol])?;
let reg = self.expect(TokenType::Register)?;
let reg = expect_type!(self.next()?, Register)?;
let imm = expect_type!(self.next()?, Immediate, Symbol)?;
args = vec![reg, imm];
}
// D-type pseudoinstructions (data definition)
Opcode::Resb | Opcode::Resh | Opcode::Resw => {
let num = self.expect(TokenType::Immediate)?;
let num = expect_type!(self.next()?, Immediate)?;
args = vec![num];
}
@@ -288,18 +214,18 @@ impl Parser {
// E-type pseudoinstructions (stack operations)
Opcode::Push => {
let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg];
}
Opcode::Pop => {
let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg];
}
// Special instructions
Opcode::Int => {
let val = self.expect(TokenType::Immediate)?;
let val = expect_type!(self.next()?, Immediate)?;
args = vec![val];
}
@@ -369,8 +295,24 @@ impl Parser {
Ok(values)
}
fn next(&mut self) -> Result<Token, AssembleError> {
if self.tokens.is_empty() {
Err(AssembleError::UnexpectedEof)
} else {
Ok(self.tokens.pop().unwrap())
}
}
fn peek_next(&mut self) -> Result<Token, AssembleError> {
if self.tokens.is_empty() {
Err(AssembleError::UnexpectedEof)
} else {
Ok(self.tokens.last().unwrap().clone())
}
}
fn expect(&mut self, type_: TokenType) -> Result<Token, AssembleError> {
let tok = self.tokens.pop().unwrap();
let tok = self.next()?;
if TokenType::from_token(&tok) == type_ {
Ok(tok)
@@ -380,7 +322,7 @@ impl Parser {
}
fn expect_any(&mut self, types: &[TokenType]) -> Result<Token, AssembleError> {
let tok = self.tokens.pop().unwrap();
let tok = self.next()?;
if types.contains(&TokenType::from_token(&tok)) {
Ok(tok)
@@ -390,9 +332,9 @@ impl Parser {
}
fn maybe_expect(&mut self, types: &[TokenType]) -> Option<Token> {
let tok = self.tokens.last().unwrap();
let tok = self.peek_next().ok()?;
if types.contains(&TokenType::from_token(tok)) {
if types.contains(&TokenType::from_token(&tok)) {
Some(tok.clone())
} else {
None
+60
View File
@@ -0,0 +1,60 @@
use std::path::PathBuf;
use crate::{
AssembleError,
model::{Module, Node, Opcode, Symbol, Token},
quick_hash,
};
pub fn resolve_dependencies(mut nodes: Vec<Node>) -> Result<Vec<Node>, AssembleError> {
// first we get a list of imports
let mut dependencies = Vec::new();
for node in &nodes {
if let Opcode::Include = node.1 {
// we want the path, and the name
let name = if let Token::Symbol(name) = node.2.get(0).unwrap() {
name.name.clone()
} else {
unreachable!()
}; //node.2.get(0).unwrap()
let path = if let Token::StringLit(path) = node.2.get(1).unwrap() {
path
} else {
unreachable!()
};
let hash = quick_hash(&PathBuf::from(path).canonicalize().unwrap());
dependencies.push((name, hash));
}
}
let mut changes = Vec::<(u32, u32, Symbol)>::new();
// now we resolve the symbols on all the nodes
// we need to check all operands for unresolved signals
for (i, node) in nodes.clone().iter().enumerate() {
let Node(_, _, operands) = node;
for (j, token) in operands.iter().enumerate() {
if let Token::Symbol(symbol) = token {
for d in &dependencies {
if let Module::Unresolved(name) = symbol.module.clone() {
if name != d.0 {
continue;
}
let symbol = Symbol {
name: symbol.name.clone(),
module: Module::Resolved(d.1),
};
changes.push((i as u32, j as u32, symbol));
}
}
}
}
}
for (i, j, symbol) in changes {
nodes[i as usize].2[j as usize] = Token::Symbol(symbol);
}
Ok(nodes)
}