diff --git a/assembler/src/error.rs b/assembler/src/error.rs index c5ad53b..c5627cc 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -34,14 +34,49 @@ impl AssembleError { kind, } } + + /// Prints a parser error to the screen. + fn print_parser_error( + &self, + f: &mut std::fmt::Formatter<'_>, + parse_error: &ParserError, + ) -> std::fmt::Result { + let Some(source_info) = &self.source_info else { + write!( + f, + "Parse error thrown with no source information. Error: {parse_error}" + )?; + + return Ok(()); + }; + + write!(f, "Parser error, {parse_error} at {source_info}")?; + + // Prints out the context for our error. + source_info.print_context_with_underline().map_err(|e| { + _ = writeln!(f, "Print context error: {e}"); + + std::fmt::Error {} + })?; + + Ok(()) + } } impl Display for AssembleError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { if let Some(info) = &self.source_info { write!(f, "at {info}")?; + + match &self.kind { + AssembleErrorKind::Parser(err) => self.print_parser_error(f, err)?, + _ => write!(f, "{}", self.kind)?, + } + + return Ok(()); } + // Handle errors without SourceInfo. write!(f, "{}", self.kind)?; Ok(()) @@ -68,13 +103,7 @@ pub enum AssembleErrorKind { } #[derive(Debug, Clone)] -pub struct ParserError { - error_type: ParserErrorType, - source_info: SourceInfo, -} - -#[derive(Debug, Clone)] -pub enum ParserErrorType { +pub enum ParserError { UnexpectedToken, MissingOperand, InvalidInstruction, @@ -82,7 +111,7 @@ pub enum ParserErrorType { DuplicateLabel, } -impl Display for ParserErrorType { +impl Display for ParserError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::UnexpectedToken => write!(f, "unexpected token"), @@ -94,28 +123,6 @@ impl Display for ParserErrorType { } } -impl Display for ParserError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO: Print the path/to/filename.dsa:line_no, column col_no. - write!( - f, - "Parser error, {} at {}", - self.error_type, self.source_info - )?; - - // Prints out the context for our error. - self.source_info - .print_context_with_underline() - .map_err(|e| { - _ = writeln!(f, "Print context error: {e}"); - - std::fmt::Error {} - })?; - - Ok(()) - } -} - #[derive(Debug, Clone)] pub enum SymbolError { Undefined, diff --git a/assembler/src/source.rs b/assembler/src/source.rs index 96a054e..2f455a7 100644 --- a/assembler/src/source.rs +++ b/assembler/src/source.rs @@ -9,6 +9,7 @@ use std::{ use crate::error::AssembleError; pub mod lines; +pub mod opcode; pub mod source_info; pub mod token; pub mod token_info; diff --git a/assembler/src/source/opcode.rs b/assembler/src/source/opcode.rs new file mode 100644 index 0000000..5056515 --- /dev/null +++ b/assembler/src/source/opcode.rs @@ -0,0 +1,349 @@ +//! This module contains instructions for tokenisation. + +use std::{fmt, str::FromStr}; + +use common::prelude::{ITypeArgs, Instruction, Interrupt, RTypeArgs}; + +use crate::{ + error::{AssembleError, AssembleErrorKind}, + source::source_info::SourceInfo, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Opcode { + Nop, + Mov, + Movs, + Ldb, + Ldbs, + Ldh, + Ldhs, + Ldw, + Stb, + Sth, + Stw, + Lli, + Lui, + Jmp, + Jeq, + Jne, + Jgt, + Jge, + Jlt, + Jle, + Cmp, + Inc, + Dec, + Shl, + Shr, + Add, + Sub, + And, + Or, + Not, + Xor, + Nand, + Nor, + Xnor, + Int, + Irt, + Hlt, + AddI, + SubI, + + // Pseudo-instructions + Db, + Dh, + Dw, + Resb, + Resh, + Resw, + Push, + Pop, + Pusha, + Popa, + Lwi, + Call, + Return, + + // Meta instructions (these aren't present in the binary as instructions) + Include, + Data, + Segment, +} + +#[derive(Debug)] +pub enum OpcodeFromStrError { + InvalidRegister(&'static str), + InvalidOpcode(String), +} + +impl std::fmt::Display for OpcodeFromStrError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::InvalidRegister(reg) => write!(f, "register does not exist: {reg}"), + Self::InvalidOpcode(op) => write!(f, "instruction does not exist: {op}"), + } + } +} + +impl std::error::Error for OpcodeFromStrError {} + +impl Opcode { + pub const OPCODES: &[&str] = &[ + // Real instructions (0x00-0x26) + "nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw", + "lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc", + "dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor", + "xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions + "db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "call", "return", + "pusha", "popa", // meta instructions + "include", + ]; + + pub fn to_instruction( + &self, + source_info: SourceInfo, + ) -> Result { + match self { + Self::Nop => Ok(Instruction::Nop), + Self::Mov => Ok(Instruction::Mov(RTypeArgs::default())), + Self::Movs => Ok(Instruction::MovSigned(RTypeArgs::default())), + Self::Ldb => Ok(Instruction::LoadByte(ITypeArgs::default())), + Self::Ldbs => Ok(Instruction::LoadByteSigned(ITypeArgs::default())), + Self::Ldh => Ok(Instruction::LoadHalfword(ITypeArgs::default())), + Self::Ldhs => Ok(Instruction::LoadHalfwordSigned(ITypeArgs::default())), + Self::Ldw => Ok(Instruction::LoadWord(ITypeArgs::default())), + Self::Stb => Ok(Instruction::StoreByte(ITypeArgs::default())), + Self::Sth => Ok(Instruction::StoreHalfword(ITypeArgs::default())), + Self::Stw => Ok(Instruction::StoreWord(ITypeArgs::default())), + Self::Lli => Ok(Instruction::LoadLowerImmediate(ITypeArgs::default())), + Self::Lui => Ok(Instruction::LoadUpperImmediate(ITypeArgs::default())), + Self::Jmp => Ok(Instruction::Jump(ITypeArgs::default())), + Self::Jeq => Ok(Instruction::JumpEq(ITypeArgs::default())), + Self::Jne => Ok(Instruction::JumpNeq(ITypeArgs::default())), + Self::Jgt => Ok(Instruction::JumpGt(ITypeArgs::default())), + Self::Jge => Ok(Instruction::JumpGe(ITypeArgs::default())), + Self::Jlt => Ok(Instruction::JumpLt(ITypeArgs::default())), + Self::Jle => Ok(Instruction::JumpLe(ITypeArgs::default())), + Self::Cmp => Ok(Instruction::Compare(RTypeArgs::default())), + Self::Inc => Ok(Instruction::Increment(RTypeArgs::default())), + Self::Dec => Ok(Instruction::Decrement(RTypeArgs::default())), + Self::Shl => Ok(Instruction::ShiftLeft(RTypeArgs::default())), + Self::Shr => Ok(Instruction::ShiftRight(RTypeArgs::default())), + Self::Add => Ok(Instruction::Add(RTypeArgs::default())), + Self::Sub => Ok(Instruction::Sub(RTypeArgs::default())), + Self::And => Ok(Instruction::And(RTypeArgs::default())), + Self::Or => Ok(Instruction::Or(RTypeArgs::default())), + Self::Not => Ok(Instruction::Not(RTypeArgs::default())), + Self::Xor => Ok(Instruction::Xor(RTypeArgs::default())), + Self::Nand => Ok(Instruction::Nand(RTypeArgs::default())), + Self::Nor => Ok(Instruction::Nor(RTypeArgs::default())), + Self::Xnor => Ok(Instruction::Xnor(RTypeArgs::default())), + Self::Int => Ok(Instruction::Interrupt(Interrupt::default())), + Self::Irt => Ok(Instruction::IntReturn), + Self::Hlt => Ok(Instruction::Halt), + Self::AddI => Ok(Instruction::AddImmediate(ITypeArgs::default())), + Self::SubI => Ok(Instruction::SubImmediate(ITypeArgs::default())), + Self::Segment => Ok(Instruction::Segment(0)), + _ => Err(AssembleError::new_source_error( + source_info, + AssembleErrorKind::Unimplemented( + "Opcode::to_instruction called on an instruction that does not exist in common.", + ), + )), + } + } + + #[must_use] + pub const fn to_opcode_value(&self) -> Option { + match self { + Self::Nop => Some(0x00), + Self::Mov => Some(0x01), + Self::Movs => Some(0x02), + Self::Ldb => Some(0x03), + Self::Ldbs => Some(0x04), + Self::Ldh => Some(0x05), + Self::Ldhs => Some(0x06), + Self::Ldw => Some(0x07), + Self::Stb => Some(0x08), + Self::Sth => Some(0x09), + Self::Stw => Some(0x0A), + Self::Lli => Some(0x0B), + Self::Lui => Some(0x0C), + Self::Jmp => Some(0x0D), + Self::Jeq => Some(0x0E), + Self::Jne => Some(0x0F), + Self::Jgt => Some(0x10), + Self::Jge => Some(0x11), + Self::Jlt => Some(0x12), + Self::Jle => Some(0x13), + Self::Cmp => Some(0x14), + Self::Inc => Some(0x15), + Self::Dec => Some(0x16), + Self::Shl => Some(0x17), + Self::Shr => Some(0x18), + Self::Add => Some(0x19), + Self::Sub => Some(0x1A), + Self::And => Some(0x1B), + Self::Or => Some(0x1C), + Self::Not => Some(0x1D), + Self::Xor => Some(0x1E), + Self::Nand => Some(0x1F), + Self::Nor => Some(0x20), + Self::Xnor => Some(0x21), + Self::Int => Some(0x22), + Self::Irt => Some(0x23), + Self::Hlt => Some(0x24), + Self::AddI => Some(0x25), + Self::SubI => Some(0x26), + // TODO: Maybe recombine pseudos? + Self::Segment => Some(0x27), + // Pseudo-instructions don't have opcode values + _ => None, + } + } + + #[must_use] + pub const fn is_pseudo_instruction(&self) -> bool { + matches!( + self, + Self::Db + | Self::Dh + | Self::Dw + | Self::Resb + | Self::Resh + | Self::Resw + | Self::Push + | Self::Pop + | Self::Lwi + ) + } +} + +impl FromStr for Opcode { + type Err = OpcodeFromStrError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "nop" => Ok(Self::Nop), + "mov" => Ok(Self::Mov), + "movs" => Ok(Self::Movs), + "ldb" => Ok(Self::Ldb), + "ldbs" => Ok(Self::Ldbs), + "ldh" => Ok(Self::Ldh), + "ldhs" => Ok(Self::Ldhs), + "ldw" => Ok(Self::Ldw), + "stb" => Ok(Self::Stb), + "sth" => Ok(Self::Sth), + "stw" => Ok(Self::Stw), + "lli" => Ok(Self::Lli), + "lui" => Ok(Self::Lui), + "jmp" => Ok(Self::Jmp), + "jeq" => Ok(Self::Jeq), + "jne" => Ok(Self::Jne), + "jgt" => Ok(Self::Jgt), + "jge" => Ok(Self::Jge), + "jlt" => Ok(Self::Jlt), + "jle" => Ok(Self::Jle), + "cmp" => Ok(Self::Cmp), + "inc" => Ok(Self::Inc), + "dec" => Ok(Self::Dec), + "shl" => Ok(Self::Shl), + "shr" => Ok(Self::Shr), + "add" => Ok(Self::Add), + "sub" => Ok(Self::Sub), + "and" => Ok(Self::And), + "or" => Ok(Self::Or), + "not" => Ok(Self::Not), + "xor" => Ok(Self::Xor), + "nand" => Ok(Self::Nand), + "nor" => Ok(Self::Nor), + "xnor" => Ok(Self::Xnor), + "int" => Ok(Self::Int), + "irt" => Ok(Self::Irt), + "hlt" => Ok(Self::Hlt), + "addi" => Ok(Self::AddI), + "subi" => Ok(Self::SubI), + "db" => Ok(Self::Db), + "dh" => Ok(Self::Dh), + "dw" => Ok(Self::Dw), + "resb" => Ok(Self::Resb), + "resh" => Ok(Self::Resh), + "resw" => Ok(Self::Resw), + "push" => Ok(Self::Push), + "pop" => Ok(Self::Pop), + "lwi" => Ok(Self::Lwi), + "include" => Ok(Self::Include), + "call" => Ok(Self::Call), + "return" => Ok(Self::Return), + "pusha" => Ok(Self::Pusha), + "popa" => Ok(Self::Popa), + _ => Err(OpcodeFromStrError::InvalidOpcode(s.to_string())), + } + } +} + +impl fmt::Display for Opcode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Nop => write!(f, "nop"), + Self::Mov => write!(f, "mov"), + Self::Movs => write!(f, "movs"), + Self::Ldb => write!(f, "ldb"), + Self::Ldbs => write!(f, "ldbs"), + Self::Ldh => write!(f, "ldh"), + Self::Ldhs => write!(f, "ldhs"), + Self::Ldw => write!(f, "ldw"), + Self::Stb => write!(f, "stb"), + Self::Sth => write!(f, "sth"), + Self::Stw => write!(f, "stw"), + Self::Lli => write!(f, "lli"), + Self::Lui => write!(f, "lui"), + Self::Jmp => write!(f, "jmp"), + Self::Jeq => write!(f, "jeq"), + Self::Jne => write!(f, "jne"), + Self::Jgt => write!(f, "jgt"), + Self::Jge => write!(f, "jge"), + Self::Jlt => write!(f, "jlt"), + Self::Jle => write!(f, "jle"), + Self::Cmp => write!(f, "cmp"), + Self::Inc => write!(f, "inc"), + Self::Dec => write!(f, "dec"), + Self::Shl => write!(f, "shl"), + Self::Shr => write!(f, "shr"), + Self::Add => write!(f, "add"), + Self::Sub => write!(f, "sub"), + Self::And => write!(f, "and"), + Self::Or => write!(f, "or"), + Self::Not => write!(f, "not"), + Self::Xor => write!(f, "xor"), + Self::Nand => write!(f, "nand"), + Self::Nor => write!(f, "nor"), + Self::Xnor => write!(f, "xnor"), + Self::Int => write!(f, "int"), + Self::Irt => write!(f, "irt"), + Self::Hlt => write!(f, "hlt"), + Self::AddI => write!(f, "addi"), + Self::SubI => write!(f, "subi"), + Self::Db => write!(f, "db"), + Self::Dh => write!(f, "dh"), + Self::Dw => write!(f, "dw"), + Self::Resb => write!(f, "resb"), + Self::Resh => write!(f, "resh"), + Self::Resw => write!(f, "resw"), + Self::Push => write!(f, "push"), + Self::Pop => write!(f, "pop"), + Self::Lwi => write!(f, "lwi"), + Self::Call => write!(f, "call"), + Self::Return => write!(f, "return"), + Self::Pusha => write!(f, "pusha"), + Self::Popa => write!(f, "popa"), + + // meta instructions + Self::Include => write!(f, "include"), + Self::Data => write!(f, "data"), + Self::Segment => write!(f, "[SEGMENT]"), + } + } +} diff --git a/assembler/src/source/pseudo_opcode.rs b/assembler/src/source/pseudo_opcode.rs new file mode 100644 index 0000000..1f75b1e --- /dev/null +++ b/assembler/src/source/pseudo_opcode.rs @@ -0,0 +1,4 @@ +//! This module contains code for handling pseudo opcodes. + +/// Pseudo instructions that cannot simply be lowered to ISA instructions. +pub enum PseudoOpcode {} diff --git a/assembler/src/source/token.rs b/assembler/src/source/token.rs index 2303961..899de4d 100644 --- a/assembler/src/source/token.rs +++ b/assembler/src/source/token.rs @@ -5,10 +5,9 @@ use common::prelude::*; use crate::source::{ + opcode::Opcode, source_info::SourceInfo, - token_info::{ - DirectiveToken, InstructionToken, LabelToken, RegisterToken, SymbolToken, - }, + token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken}, }; #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -22,7 +21,7 @@ pub enum TokenType { /// String literal (e.g., `"hello world"`). String(String), /// Assembly instruction (e.g., `add`, `jmp`, `nop`). - Instruction(InstructionToken), + Instruction(Opcode), /// Label definition (e.g., `loop_start:`). Label(LabelToken), /// Assembler directive (e.g., `.global`, `.section`, `.dw`, `.resb`). @@ -65,11 +64,8 @@ impl Token { } #[must_use] - pub const fn instruction(mnemonic: String, source_info: SourceInfo) -> Self { - Self::new( - TokenType::Instruction(InstructionToken { mnemonic }), - source_info, - ) + pub const fn instruction(op: Opcode, source_info: SourceInfo) -> Self { + Self::new(TokenType::Instruction(op), source_info) } #[must_use] diff --git a/assembler/src/source/token_info.rs b/assembler/src/source/token_info.rs index 1d5dc81..557e74b 100644 --- a/assembler/src/source/token_info.rs +++ b/assembler/src/source/token_info.rs @@ -21,14 +21,14 @@ pub struct RegisterToken { } impl RegisterToken { + #[must_use] + pub const fn new(reg: Register) -> Self { + Self { reg } + } + /// Returns the name of a valid [`Register`] #[must_use] pub fn name(&self) -> String { self.reg.to_string() } } - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct InstructionToken { - pub mnemonic: String, -} diff --git a/assembler/src/source/tokeniser.rs b/assembler/src/source/tokeniser.rs index 1266a83..08c0ba0 100644 --- a/assembler/src/source/tokeniser.rs +++ b/assembler/src/source/tokeniser.rs @@ -3,6 +3,7 @@ use std::{ path::{Path, PathBuf}, + str::FromStr, sync::Arc, }; @@ -15,13 +16,12 @@ use crate::{ error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind}, model::module::Module, source::{ - lines::lines_with_spans, + lines::{LineSpan, lines_with_spans}, load_source_bytes, + opcode::Opcode, source_info::SourceInfo, token::{Token, TokenType}, - token_info::{ - DirectiveToken, LabelToken, RegisterToken, SymbolToken, - }, + token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken}, }, }; @@ -38,10 +38,10 @@ pub struct Tokeniser { // Pre-compiled regex patterns label_regex: Regex, - // register_regex: Regex, + register_regex: Regex, immediate_regex: Regex, directive_regex: Regex, - // instruction_regex: Regex, + instruction_regex: Regex, symbol_regex: Regex, string_regex: Regex, comment_regex: Regex, @@ -56,23 +56,25 @@ impl Tokeniser { label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):") .expect("Failed to compile label regex pattern"), - // register_regex: Regex::new(r"^(r[0-9]+|sp|fp|pc)") - // .expect("Failed to compile register regex pattern"), + register_regex: Regex::new( + r"^(rg([0-9]|[a-f])|acc|spr|bpr|ret|idr|mmr|zero|noreg|pcx)\b", + ) + .expect("Failed to compile register regex pattern"), immediate_regex: Regex::new( r"^(0x[0-9a-fA-F_]+|0b[0-1_]+|0o[0-7_]+|[0-9_]+)", ) .expect("Failed to compile immediate regex pattern"), - directive_regex: Regex::new(r"^\.([a-zA-Z]+)") + directive_regex: Regex::new(r"^(res[bwh]|d[bwh]|include|section|global|local)\b") .expect("Failed to compile directive regex pattern"), - // instruction_regex: Regex::new( - // r"^(add|sub|mul|div|jmp|call|ret|lli|nop|halt)", - // ) - // .expect("Failed to compile instruction regex pattern"), + instruction_regex: Regex::new( + r"^(nop|movs?|ld[bhw]s?|st[bhw]|l[lu]i|j(mp|[egl][qte])|cmp|[id]nc|sh[lr]|add[i]?|sub[i]?|x?n?or|and|not|i[rd]t|hlt|lhwmm|lidt|push[a]?|pop[a]?|lwi|return|call)\b", + ) + .expect("Failed to compile instruction regex pattern"), symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)") .expect("Failed to compile symbol regex pattern"), string_regex: Regex::new(r#"^"([^"]*)"#) .expect("Failed to compile string regex pattern"), - comment_regex: Regex::new("//.*") + comment_regex: Regex::new("^//.*") .expect("Failed to compile comment regex pattern"), } } @@ -137,7 +139,7 @@ impl Tokeniser { fn tokenize_line( &self, - line_span: &crate::source::lines::LineSpan, + line_span: &LineSpan, module: &Arc, ) -> Result, AssembleError> { let mut tokens = Vec::new(); @@ -188,9 +190,11 @@ impl Tokeniser { } fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> { - _ = self; + let caps = self.register_regex.captures(input)?; + let reg = caps.get(1)?.as_str(); + let len = caps.get(0)?.len(); - let reg = match Register::try_from(input) { + let reg = match Register::try_from(reg) { Ok(reg) => reg, Err(_why) => { // Probably ignore the error. @@ -198,8 +202,6 @@ impl Tokeniser { } }; - let len = input.len(); - Some((TokenType::Register(RegisterToken { reg }), len)) } @@ -234,15 +236,14 @@ impl Tokeniser { Some((TokenType::Directive(DirectiveToken { directive }), len)) } - const fn try_match_instruction(&self, _input: &str) -> Option<(TokenType, usize)> { - _ = self; + fn try_match_instruction(&self, input: &str) -> Option<(TokenType, usize)> { + let caps = self.instruction_regex.captures(input)?; + let mnemonic = caps.get(1)?.as_str().to_string(); + let len = caps.get(0)?.len(); - // let instruction = - // Some((TokenType::Instruction(InstructionToken { mnemonic }), len)) + let op = Opcode::from_str(&mnemonic).ok()?; - // TODO: fix me. - - None + Some((TokenType::Instruction(op), len)) } fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> { @@ -262,6 +263,14 @@ impl Tokeniser { } fn match_token(&self, input: &str) -> Result<(TokenType, usize), AssembleError> { + if let Some(m) = self.try_match_directive(input) { + return Ok(m); + } + + if let Some(m) = self.try_match_instruction(input) { + return Ok(m); + } + if let Some(m) = self.try_match_comment(input) { return Ok(m); } @@ -278,14 +287,6 @@ impl Tokeniser { return Ok(m); } - if let Some(m) = self.try_match_directive(input) { - return Ok(m); - } - - if let Some(m) = self.try_match_instruction(input) { - return Ok(m); - } - if let Some(m) = self.try_match_string(input) { return Ok(m); } diff --git a/assembler/src/source/tokeniser/error.rs b/assembler/src/source/tokeniser/error.rs index 84127ed..f238713 100644 --- a/assembler/src/source/tokeniser/error.rs +++ b/assembler/src/source/tokeniser/error.rs @@ -3,6 +3,8 @@ #[derive(Debug, Clone, Copy)] pub enum TokeniserError {} +impl TokeniserError {} + impl std::fmt::Display for TokeniserError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "TODO!!!!!!") diff --git a/assembler/src/source/tokeniser/tests.rs b/assembler/src/source/tokeniser/tests.rs index da73fba..6aab304 100644 --- a/assembler/src/source/tokeniser/tests.rs +++ b/assembler/src/source/tokeniser/tests.rs @@ -1,9 +1,13 @@ //! Unit tests for the tokenizer +use common::prelude::Register; + use crate::{ context::AssemblerContext, source::{ + opcode::Opcode, token::{Token, TokenType}, + token_info::RegisterToken, tokeniser::Tokeniser, }, }; @@ -65,7 +69,7 @@ fn test_single_instruction() { .any(|t| matches!(t, TokenType::Instruction(_))) ); if let TokenType::Instruction(instr) = &tokens[0].token_type { - assert_eq!(instr.mnemonic, "add"); + assert_eq!(instr.to_string(), "add"); } else { panic!("Expected instruction token"); } @@ -73,15 +77,13 @@ fn test_single_instruction() { #[test] fn test_all_instructions() { - let instructions = [ - "add", "sub", "mul", "div", "jmp", "call", "ret", "lli", "nop", "halt", - ]; + let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"]; for instr in &instructions { let tokens = tokenize_source(instr).expect("Failed to tokenize instruction"); if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type { - assert_eq!(parsed_instr.mnemonic, *instr); + assert_eq!(parsed_instr.to_string(), *instr); } else { panic!("Expected instruction token for {instr}"); } @@ -90,7 +92,7 @@ fn test_all_instructions() { #[test] fn test_registers() { - let test_cases = [("rg0", "r0"), ("rgf", "rgf"), ("pcx", "pcx")]; + let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize register"); @@ -149,10 +151,9 @@ fn test_labels() { #[test] fn test_directives() { let test_cases = [ - (".global", "global"), - (".section", "section"), - (".data", "data"), - (".text", "text"), + ("global", "global"), + ("section", "section"), + ("local", "local"), ]; for (input, expected) in &test_cases { @@ -185,3 +186,52 @@ fn test_symbols() { } } } + +#[test] +fn test_complex_instruction_line() { + let source = "addi rg1, rg2, 0xFF"; + let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction"); + + // Should have: instruction, register, comma, register, comma, immediate, newline, EOF + assert!(tokens.len() >= 6); + assert!(matches!(tokens[0].token_type, TokenType::Instruction(_))); + assert!(matches!(tokens[1].token_type, TokenType::Register(_))); + assert!(matches!(tokens[2].token_type, TokenType::Comma)); + assert!(matches!(tokens[3].token_type, TokenType::Register(_))); + assert!(matches!(tokens[4].token_type, TokenType::Comma)); + assert!(matches!(tokens[5].token_type, TokenType::Immediate(_))); +} + +#[test] +fn test_multiline_with_comments() { + const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [ + TokenType::Instruction(Opcode::Add), + TokenType::Register(RegisterToken::new(Register::Rg0)), + TokenType::Comma, + TokenType::Register(RegisterToken::new(Register::Rg1)), + TokenType::Newline, + TokenType::Instruction(Opcode::SubI), + TokenType::Register(RegisterToken::new(Register::Rg2)), + TokenType::Comma, + TokenType::Immediate(10), + TokenType::Newline, + TokenType::Eof, + ]; + + const SOURCE: &str = r"add rg0, rg1 // Another comment + subi rg2, 10"; + + let tokens = + tokenize_source(SOURCE).expect("Failed to tokenise source with comments"); + let token_types = extract_token_types(&tokens); + + assert_eq!( + token_types.len(), + EXPECTED_TOKEN_TYPES.len(), + "{token_types:#?}" + ); + + for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) { + assert!(!(expected != *got), "Expected {expected:?}, got {got:?}"); + } +} diff --git a/common/src/instructions.rs b/common/src/instructions.rs index deeaddb..64ea686 100644 --- a/common/src/instructions.rs +++ b/common/src/instructions.rs @@ -1,9 +1,10 @@ use crate::{instructions::encode::Encode, prelude::*}; -#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)] pub enum Interrupt { Software(u8), Breakpoint, + #[default] HardFault, } diff --git a/common/src/instructions/args.rs b/common/src/instructions/args.rs index 664c31a..1dc440a 100644 --- a/common/src/instructions/args.rs +++ b/common/src/instructions/args.rs @@ -1,4 +1,5 @@ -//! Various types of arguments that instructions can take, alongside encoding and decoding logic. +//! Various types of arguments that instructions can take, alongside encoding and decoding +//! logic. use crate::{ instructions::{RegisterParseError, encode::Encode}, @@ -35,18 +36,20 @@ impl std::fmt::Display for ArgsDecodeError { impl std::error::Error for ArgsDecodeError {} -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] /// Used by instructions with 2 registers and an immediate argument. pub struct ITypeArgs { pub immediate: u16, pub r1: Register, - /// May not actually be used by some instructions taking an immediate e.g. LUI. This is solved by making the constructor take Options. + /// May not actually be used by some instructions taking an immediate e.g. LUI. This + /// is solved by making the constructor take Options. pub r2: Register, } impl ITypeArgs { #[must_use] - /// Creates a new [`ITypeArgs`]. If r1 or r2 is unset, they will be replaced with [`Register::NoReg`]. + /// Creates a new [`ITypeArgs`]. If r1 or r2 is unset, they will be replaced with + /// [`Register::NoReg`]. pub fn new(immediate: u16, r1: Option, r2: Option) -> Self { let r1 = r1.unwrap_or_default(); let r2 = r2.unwrap_or_default(); @@ -56,8 +59,8 @@ impl ITypeArgs { } impl Encode for ITypeArgs { - /// Encodes an I-type instruction from its fields. These must have some unused high-order - /// bits set to 0 else the bit shifting logic gets fucked. + /// Encodes an I-type instruction from its fields. These must have some unused + /// high-order bits set to 0 else the bit shifting logic gets fucked. fn encode(self, opcode: u8) -> u32 { let opcode = u32::from(opcode); let r1 = self.r1 as u32; @@ -84,7 +87,7 @@ impl TryFrom for ITypeArgs { } /// Used by instructions not using immediates (besides 5 bit shift values). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub struct RTypeArgs { pub sr1: Register, pub sr2: Register, @@ -95,7 +98,8 @@ pub struct RTypeArgs { impl RTypeArgs { #[must_use] - /// Creates a new [`RTypeArgs`]. If any registers are unset, they will be replaced with [`Register::NoReg`]. If `shamt` is unset, it will be set to 0. + /// Creates a new [`RTypeArgs`]. If any registers are unset, they will be replaced + /// with [`Register::NoReg`]. If `shamt` is unset, it will be set to 0. pub fn new( sr1: Option, sr2: Option, @@ -122,7 +126,8 @@ impl Encode for RTypeArgs { /// /// # Arguments /// - /// - `shamt`: The amount to shift value (used only in shift instructions, otherwise 0). + /// - `shamt`: The amount to shift value (used only in shift instructions, otherwise + /// 0). fn encode(self, opcode: u8) -> u32 { let opcode = u32::from(opcode); let sr1 = self.sr1 as u32;