Files
damn_simple_architecture/assembler/src/parser.rs
T
2025-06-17 23:48:06 +01:00

344 lines
11 KiB
Rust

use core::fmt;
use std::path::PathBuf;
use std::str::FromStr;
use common::prelude::{Instruction, Register};
use crate::model::{Module, Node, Opcode, Symbol, Token, TokenType};
use crate::{AssembleError, dsa, expect_token, expect_type, quick_hash};
pub struct Parser {
tokens: Vec<Token>,
nodes: Vec<Node>,
}
pub struct Program {
pub nodes: Vec<Node>,
}
impl Program {
pub fn new() -> Program {
Program { nodes: vec![] }
}
pub fn add_module(&mut self, module: Vec<Node>) {
self.nodes.extend(module);
}
pub fn parser(&mut self) -> Parser {
Parser {
tokens: vec![],
nodes: self.nodes.clone(),
}
}
}
impl Parser {
pub fn parse_nodes(tokens: Vec<Token>) -> Result<Vec<Node>, AssembleError> {
let mut self_ = Parser {
tokens: tokens.into_iter().rev().collect(),
nodes: vec![],
};
while !self_.tokens.is_empty() {
let ins = self_.parse_instruction()?;
self_.nodes.push(ins);
}
Ok(self_.nodes.clone())
}
pub fn get_dependencies(nodes: &Vec<Node>) -> Result<Vec<PathBuf>, AssembleError> {
let mut dependencies = Vec::new();
for node in nodes {
if let Opcode::Include = node.1 {
let path = expect_token!(node.2.get(1).unwrap(), StringLit)?;
dependencies.push(PathBuf::from(path));
}
}
Ok(dependencies)
}
pub fn expand_pseudo_ops(
mut nodes: Vec<Node>,
module: u64,
) -> Result<Vec<Node>, AssembleError> {
let mut result = Vec::<Node>::with_capacity(nodes.len());
for node in nodes.iter_mut() {
match node.1 {
// Opcode::Db | Opcode::Dh | Opcode::Dw => todo!(),
// Opcode::Resb | Opcode::Resh | Opcode::Resw => todo!(),
Opcode::Push => {
// inc SPR
// STW reg, SPR
let label = node.0.clone();
let reg = expect_token!(node.2.get(0).unwrap(), Register)?;
match label {
Some(label) => result.extend(dsa!(
module,
"{}: iadd spr, 4\n stw {}, spr",
label,
reg
)),
None => {
result.extend(dsa!(module, "iadd spr, 4\n stw {}, spr", reg))
}
}
}
_ => result.push(node.clone()),
}
}
Ok(result)
}
fn parse_instruction(&mut self) -> Result<Node, AssembleError> {
println!("tokens: {:?}", self.tokens);
if self.tokens.is_empty() {
unreachable!();
}
// check if the Node starts with a label
let label = expect_token!(self.peek_next()?, Symbol).ok();
if label.is_some() {
self.tokens.pop();
}
let opcode = expect_token!(self.next()?, Opcode)?;
let args: Vec<Token>;
match opcode {
// R-type instructions
Opcode::Mov | Opcode::Movs => {
let reg1 = expect_type!(self.next()?, Register, Symbol)?;
let reg2 = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg1, reg2];
}
Opcode::Ldb | Opcode::Ldbs | Opcode::Ldh | Opcode::Ldhs | Opcode::Ldw => {
let base = expect_type!(self.next()?, Register, Symbol)?;
let dest = expect_type!(self.next()?, Register, Symbol)?;
let mut offset = Token::Immediate(0);
if let Ok(next) = self.peek_next() {
if let Ok(_) = expect_type!(next, Register, Immediate) {
offset = self.next()?;
}
}
args = vec![base, offset, dest];
}
Opcode::Stb | Opcode::Sth | Opcode::Stw => {
let base = expect_type!(self.next()?, Register, Symbol)?;
let dest = expect_type!(self.next()?, Register, Symbol)?;
let mut offset = Token::Immediate(0);
if let Ok(next) = self.peek_next() {
if let Ok(_) = expect_type!(next, Register, Immediate) {
offset = self.next()?;
}
}
args = vec![base, offset, dest];
}
Opcode::Add
| Opcode::Sub
| Opcode::And
| Opcode::Or
| Opcode::Xor
| Opcode::Nand
| Opcode::Nor
| Opcode::Xnor => {
let src1 = expect_type!(self.next()?, Register, Symbol)?;
let src2 = expect_type!(self.next()?, Register, Symbol)?;
let dest = expect_type!(self.next()?, Register, Symbol)?;
args = vec![src1, src2, dest];
}
Opcode::Not | Opcode::Cmp => {
let reg1 = expect_type!(self.next()?, Register, Symbol)?;
let reg2 = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg1, reg2];
}
Opcode::Shl | Opcode::Shr => {
let reg = expect_type!(self.next()?, Register, Symbol)?;
let num = expect_type!(self.next()?, Immediate)?;
args = vec![reg, num];
}
Opcode::Inc | Opcode::Dec => {
let reg = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg];
}
Opcode::Include => {
let mod_name = expect_type!(self.next()?, Symbol)?;
let path = expect_type!(self.next()?, StringLit)?;
args = vec![mod_name, path];
}
// J-type instructions
Opcode::Jmp
| Opcode::Jeq
| Opcode::Jne
| Opcode::Jgt
| Opcode::Jge
| Opcode::Jlt
| Opcode::Jle => {
let imm = expect_type!(self.next()?, Immediate, Symbol)?;
args = vec![imm];
}
// I-type instructions
Opcode::Lui | Opcode::Lli | Opcode::Lwi | Opcode::Iadd | Opcode::Isub => {
let reg = expect_type!(self.next()?, Register)?;
let imm = expect_type!(self.next()?, Immediate, Symbol)?;
args = vec![reg, imm];
}
// D-type pseudoinstructions (data definition)
Opcode::Resb | Opcode::Resh | Opcode::Resw => {
let num = expect_type!(self.next()?, Immediate)?;
args = vec![num];
}
Opcode::Db | Opcode::Dh | Opcode::Dw => {
args = self.parse_data_definition(opcode.clone())?;
}
// E-type pseudoinstructions (stack operations)
Opcode::Push => {
let reg = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg];
}
Opcode::Pop => {
let reg = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg];
}
// Special instructions
Opcode::Int => {
let val = expect_type!(self.next()?, Immediate)?;
args = vec![val];
}
// Instructions with no arguments
Opcode::Hlt | Opcode::Nop | Opcode::Irt => {
args = Vec::new();
}
}
Ok(Node(label, opcode, args))
}
fn parse_data_definition(
&mut self,
opcode: Opcode,
) -> Result<Vec<Token>, AssembleError> {
let mut values = Vec::new();
let _name = self.expect(TokenType::Symbol)?;
values.push(self.tokens.pop().unwrap());
match opcode {
Opcode::Db => {
// db can take string literals or u8 immediates
while !self.tokens.is_empty() {
match self.tokens.last().unwrap() {
Token::StringLit(_) => {
values.push(self.tokens.pop().unwrap());
}
Token::Immediate(val) if *val <= u8::MAX as u32 => {
values.push(self.tokens.pop().unwrap());
}
_ => break,
}
}
}
Opcode::Dh => {
// dh can take u16 immediates
while !self.tokens.is_empty() {
if let Token::Immediate(val) = self.tokens.last().unwrap() {
if *val <= u16::MAX as u32 {
values.push(self.tokens.pop().unwrap());
} else {
break;
}
} else {
break;
}
}
}
Opcode::Dw => {
// dw can take u32 immediates
while !self.tokens.is_empty() {
if let Token::Immediate(_) = self.tokens.last().unwrap() {
values.push(self.tokens.pop().unwrap());
} else {
break;
}
}
}
_ => unreachable!(),
}
Ok(values)
}
fn next(&mut self) -> Result<Token, AssembleError> {
if self.tokens.is_empty() {
Err(AssembleError::UnexpectedEof)
} else {
Ok(self.tokens.pop().unwrap())
}
}
fn peek_next(&mut self) -> Result<Token, AssembleError> {
if self.tokens.is_empty() {
Err(AssembleError::UnexpectedEof)
} else {
Ok(self.tokens.last().unwrap().clone())
}
}
fn expect(&mut self, type_: TokenType) -> Result<Token, AssembleError> {
let tok = self.next()?;
if TokenType::from_token(&tok) == type_ {
Ok(tok)
} else {
Err(AssembleError::UnexpectedToken(tok, type_))
}
}
fn expect_any(&mut self, types: &[TokenType]) -> Result<Token, AssembleError> {
let tok = self.next()?;
if types.contains(&TokenType::from_token(&tok)) {
Ok(tok)
} else {
Err(AssembleError::UnexpectedToken(tok, types[0]))
}
}
fn maybe_expect(&mut self, types: &[TokenType]) -> Option<Token> {
let tok = self.peek_next().ok()?;
if types.contains(&TokenType::from_token(&tok)) {
Some(tok.clone())
} else {
None
}
}
}