Files
damn_simple_architecture/assembler/src/parser.rs
T

491 lines
15 KiB
Rust

use std::path::Iter;
use common::prelude::Register;
use crate::AssembleError;
use crate::lexer::{Symbol, Token};
pub struct Parser {
tokens: Vec<Token>,
}
#[derive(Debug, PartialEq, Copy, Clone)]
pub enum TokenType {
Symbol,
Register,
Immediate,
StringLit,
Opcode,
}
impl TokenType {
fn from_token(token: &Token) -> TokenType {
match token {
Token::Symbol(_) => TokenType::Symbol,
Token::Register(_) => TokenType::Register,
Token::Immediate(_) => TokenType::Immediate,
Token::StringLit(_) => TokenType::StringLit,
Token::Opcode(_) => TokenType::Opcode,
}
}
}
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
#[derive(Debug)]
pub struct Node(Option<Symbol>, Opcode, Vec<Token>);
impl Iterator for Parser {
type Item = Result<Node, AssembleError>;
fn next(&mut self) -> Option<Result<Node, AssembleError>> {
if self.tokens.is_empty() {
return None;
}
Some(self.parse_instruction())
}
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Parser {
Parser {
tokens: tokens.into_iter().rev().collect(),
}
}
fn parse_instruction(&mut self) -> Result<Node, AssembleError> {
if self.tokens.is_empty() {
unreachable!();
}
let label = if let Token::Symbol(label) = self
.tokens
.last()
.expect("parse_instruction should not have been called if this is none!!")
{
Some(label.clone())
} else {
None
};
if label.is_some() {
self.tokens.pop();
}
let opcode = match self.expect(TokenType::Opcode)? {
Token::Opcode(opcode) => opcode,
_ => unreachable!(),
};
let args: Vec<Token>;
match opcode {
// R-type instructions
Opcode::Mov | Opcode::Movs => {
let reg1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
args = vec![reg1, reg2];
}
Opcode::Ldb | Opcode::Ldbs | Opcode::Ldh | Opcode::Ldhs | Opcode::Ldw => {
let base = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let offset = self.maybe_expect(&[TokenType::Register, TokenType::Immediate]);
if offset.is_some() {
self.tokens.pop();
args = vec![base, offset.unwrap(), dest];
} else {
args = vec![base, Token::Immediate(0), dest];
}
}
Opcode::Stb | Opcode::Sth | Opcode::Stw => {
let base = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let offset = self.maybe_expect(&[TokenType::Register, TokenType::Immediate]);
if offset.is_some() {
self.tokens.pop();
args = vec![base, offset.unwrap(), dest];
} else {
args = vec![base, Token::Immediate(0), dest];
}
}
Opcode::Add
| Opcode::Sub
| Opcode::And
| Opcode::Or
| Opcode::Xor
| Opcode::Nand
| Opcode::Nor
| Opcode::Xnor => {
let src1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let src2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
args = vec![src1, src2, dest];
}
Opcode::Not | Opcode::Cmp => {
let reg1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let reg2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
args = vec![reg1, reg2];
}
Opcode::Shl | Opcode::Shr => {
let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
let num = self.expect(TokenType::Immediate)?;
args = vec![reg, num];
}
Opcode::Inc | Opcode::Dec => {
let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
args = vec![reg];
}
// J-type instructions
Opcode::Jmp
| Opcode::Jeq
| Opcode::Jne
| Opcode::Jgt
| Opcode::Jge
| Opcode::Jlt
| Opcode::Jle => {
let imm = self.expect_any(&[TokenType::Immediate, TokenType::Symbol])?;
args = vec![imm];
}
// I-type instructions
Opcode::Lui | Opcode::Lli | Opcode::Lwi | Opcode::Iadd | Opcode::Isub => {
let imm = self.expect_any(&[TokenType::Immediate, TokenType::Symbol])?;
let reg = self.expect(TokenType::Register)?;
args = vec![reg, imm];
}
// D-type pseudoinstructions (data definition)
Opcode::Resb | Opcode::Resh | Opcode::Resw => {
let num = self.expect(TokenType::Immediate)?;
args = vec![num];
}
Opcode::Db | Opcode::Dh | Opcode::Dw => {
args = self.parse_data_definition(opcode.clone())?;
}
// E-type pseudoinstructions (stack operations)
Opcode::Push => {
let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
args = vec![reg];
}
Opcode::Pop => {
let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?;
args = vec![reg];
}
// Special instructions
Opcode::Int => {
let val = self.expect(TokenType::Immediate)?;
args = vec![val];
}
// Instructions with no arguments
Opcode::Hlt | Opcode::Nop | Opcode::Irt => {
args = Vec::new();
}
}
Ok(Node(label, opcode, args))
}
fn parse_data_definition(&mut self, opcode: Opcode) -> Result<Vec<Token>, AssembleError> {
let mut values = Vec::new();
let name = self.expect(TokenType::Symbol)?;
values.push(self.tokens.pop().unwrap());
match opcode {
Opcode::Db => {
// db can take string literals or u8 immediates
while !self.tokens.is_empty() {
match self.tokens.last().unwrap() {
Token::StringLit(_) => {
values.push(self.tokens.pop().unwrap());
}
Token::Immediate(val) if *val <= u8::MAX as u32 => {
values.push(self.tokens.pop().unwrap());
}
_ => break,
}
}
}
Opcode::Dh => {
// dh can take u16 immediates
while !self.tokens.is_empty() {
if let Token::Immediate(val) = self.tokens.last().unwrap() {
if *val <= u16::MAX as u32 {
values.push(self.tokens.pop().unwrap());
} else {
break;
}
} else {
break;
}
}
}
Opcode::Dw => {
// dw can take u32 immediates
while !self.tokens.is_empty() {
if let Token::Immediate(_) = self.tokens.last().unwrap() {
values.push(self.tokens.pop().unwrap());
} else {
break;
}
}
}
_ => unreachable!(),
}
Ok(values)
}
fn expect(&mut self, type_: TokenType) -> Result<Token, AssembleError> {
let tok = self.tokens.pop().unwrap();
if TokenType::from_token(&tok) == type_ {
Ok(tok)
} else {
Err(AssembleError::UnexpectedToken(tok, type_))
}
}
fn expect_any(&mut self, types: &[TokenType]) -> Result<Token, AssembleError> {
let tok = self.tokens.pop().unwrap();
if types.contains(&TokenType::from_token(&tok)) {
Ok(tok)
} else {
Err(AssembleError::UnexpectedToken(tok, types[0]))
}
}
fn maybe_expect(&mut self, types: &[TokenType]) -> Option<Token> {
let tok = self.tokens.last().unwrap();
if types.contains(&TokenType::from_token(&tok)) {
Some(tok.clone())
} else {
None
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Opcode {
// Real instructions (0x00-0x26)
Nop,
Mov,
Movs,
Ldb,
Ldbs,
Ldh,
Ldhs,
Ldw,
Stb,
Sth,
Stw,
Lli,
Lui,
Jmp,
Jeq,
Jne,
Jgt,
Jge,
Jlt,
Jle,
Cmp,
Inc,
Dec,
Shl,
Shr,
Add,
Sub,
And,
Or,
Not,
Xor,
Nand,
Nor,
Xnor,
Int,
Irt,
Hlt,
Iadd,
Isub,
// Pseudo-instructions
Db,
Dh,
Dw,
Resb,
Resh,
Resw,
Push,
Pop,
Lwi,
}
impl Opcode {
pub const OPCODES: &[&str] = &[
// Real instructions (0x00-0x26)
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw", "lli",
"lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc", "dec", "shl", "shr",
"add", "sub", "and", "or", "not", "xor", "nand", "nor", "xnor", "int", "irt", "hlt",
"iadd", "isub", // Pseudo-instructions
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi",
];
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"nop" => Some(Self::Nop),
"mov" => Some(Self::Mov),
"movs" => Some(Self::Movs),
"ldb" => Some(Self::Ldb),
"ldbs" => Some(Self::Ldbs),
"ldh" => Some(Self::Ldh),
"ldhs" => Some(Self::Ldhs),
"ldw" => Some(Self::Ldw),
"stb" => Some(Self::Stb),
"sth" => Some(Self::Sth),
"stw" => Some(Self::Stw),
"lli" => Some(Self::Lli),
"lui" => Some(Self::Lui),
"jmp" => Some(Self::Jmp),
"jeq" => Some(Self::Jeq),
"jne" => Some(Self::Jne),
"jgt" => Some(Self::Jgt),
"jge" => Some(Self::Jge),
"jlt" => Some(Self::Jlt),
"jle" => Some(Self::Jle),
"cmp" => Some(Self::Cmp),
"inc" => Some(Self::Inc),
"dec" => Some(Self::Dec),
"shl" => Some(Self::Shl),
"shr" => Some(Self::Shr),
"add" => Some(Self::Add),
"sub" => Some(Self::Sub),
"and" => Some(Self::And),
"or" => Some(Self::Or),
"not" => Some(Self::Not),
"xor" => Some(Self::Xor),
"nand" => Some(Self::Nand),
"nor" => Some(Self::Nor),
"xnor" => Some(Self::Xnor),
"int" => Some(Self::Int),
"irt" => Some(Self::Irt),
"hlt" => Some(Self::Hlt),
"iadd" => Some(Self::Iadd),
"isub" => Some(Self::Isub),
"db" => Some(Self::Db),
"dh" => Some(Self::Dh),
"dw" => Some(Self::Dw),
"resb" => Some(Self::Resb),
"resh" => Some(Self::Resh),
"resw" => Some(Self::Resw),
"push" => Some(Self::Push),
"pop" => Some(Self::Pop),
"lwi" => Some(Self::Lwi),
_ => None,
}
}
pub fn to_opcode_value(&self) -> Option<u8> {
match self {
Self::Nop => Some(0x00),
Self::Mov => Some(0x01),
Self::Movs => Some(0x02),
Self::Ldb => Some(0x03),
Self::Ldbs => Some(0x04),
Self::Ldh => Some(0x05),
Self::Ldhs => Some(0x06),
Self::Ldw => Some(0x07),
Self::Stb => Some(0x08),
Self::Sth => Some(0x09),
Self::Stw => Some(0x0A),
Self::Lli => Some(0x0B),
Self::Lui => Some(0x0C),
Self::Jmp => Some(0x0D),
Self::Jeq => Some(0x0E),
Self::Jne => Some(0x0F),
Self::Jgt => Some(0x10),
Self::Jge => Some(0x11),
Self::Jlt => Some(0x12),
Self::Jle => Some(0x13),
Self::Cmp => Some(0x14),
Self::Inc => Some(0x15),
Self::Dec => Some(0x16),
Self::Shl => Some(0x17),
Self::Shr => Some(0x18),
Self::Add => Some(0x19),
Self::Sub => Some(0x1A),
Self::And => Some(0x1B),
Self::Or => Some(0x1C),
Self::Not => Some(0x1D),
Self::Xor => Some(0x1E),
Self::Nand => Some(0x1F),
Self::Nor => Some(0x20),
Self::Xnor => Some(0x21),
Self::Int => Some(0x22),
Self::Irt => Some(0x23),
Self::Hlt => Some(0x24),
Self::Iadd => Some(0x25),
Self::Isub => Some(0x26),
// Pseudo-instructions don't have opcode values
_ => None,
}
}
pub fn is_pseudo_instruction(&self) -> bool {
matches!(
self,
Self::Db
| Self::Dh
| Self::Dw
| Self::Resb
| Self::Resh
| Self::Resw
| Self::Push
| Self::Pop
| Self::Lwi
)
}
}