assembler: enhance error handling and tokenization logic

This commit is contained in:
2025-06-26 17:00:14 +01:00
parent 40f8b1d57b
commit ed4fcc8495
11 changed files with 514 additions and 98 deletions
+37 -30
View File
@@ -34,14 +34,49 @@ impl AssembleError {
kind, kind,
} }
} }
/// Prints a parser error to the screen.
fn print_parser_error(
&self,
f: &mut std::fmt::Formatter<'_>,
parse_error: &ParserError,
) -> std::fmt::Result {
let Some(source_info) = &self.source_info else {
write!(
f,
"Parse error thrown with no source information. Error: {parse_error}"
)?;
return Ok(());
};
write!(f, "Parser error, {parse_error} at {source_info}")?;
// Prints out the context for our error.
source_info.print_context_with_underline().map_err(|e| {
_ = writeln!(f, "Print context error: {e}");
std::fmt::Error {}
})?;
Ok(())
}
} }
impl Display for AssembleError { impl Display for AssembleError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(info) = &self.source_info { if let Some(info) = &self.source_info {
write!(f, "at {info}")?; write!(f, "at {info}")?;
match &self.kind {
AssembleErrorKind::Parser(err) => self.print_parser_error(f, err)?,
_ => write!(f, "{}", self.kind)?,
}
return Ok(());
} }
// Handle errors without SourceInfo.
write!(f, "{}", self.kind)?; write!(f, "{}", self.kind)?;
Ok(()) Ok(())
@@ -68,13 +103,7 @@ pub enum AssembleErrorKind {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct ParserError { pub enum ParserError {
error_type: ParserErrorType,
source_info: SourceInfo,
}
#[derive(Debug, Clone)]
pub enum ParserErrorType {
UnexpectedToken, UnexpectedToken,
MissingOperand, MissingOperand,
InvalidInstruction, InvalidInstruction,
@@ -82,7 +111,7 @@ pub enum ParserErrorType {
DuplicateLabel, DuplicateLabel,
} }
impl Display for ParserErrorType { impl Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Self::UnexpectedToken => write!(f, "unexpected token"), Self::UnexpectedToken => write!(f, "unexpected token"),
@@ -94,28 +123,6 @@ impl Display for ParserErrorType {
} }
} }
impl Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// TODO: Print the path/to/filename.dsa:line_no, column col_no.
write!(
f,
"Parser error, {} at {}",
self.error_type, self.source_info
)?;
// Prints out the context for our error.
self.source_info
.print_context_with_underline()
.map_err(|e| {
_ = writeln!(f, "Print context error: {e}");
std::fmt::Error {}
})?;
Ok(())
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum SymbolError { pub enum SymbolError {
Undefined, Undefined,
+1
View File
@@ -9,6 +9,7 @@ use std::{
use crate::error::AssembleError; use crate::error::AssembleError;
pub mod lines; pub mod lines;
pub mod opcode;
pub mod source_info; pub mod source_info;
pub mod token; pub mod token;
pub mod token_info; pub mod token_info;
+349
View File
@@ -0,0 +1,349 @@
//! This module contains instructions for tokenisation.
use std::{fmt, str::FromStr};
use common::prelude::{ITypeArgs, Instruction, Interrupt, RTypeArgs};
use crate::{
error::{AssembleError, AssembleErrorKind},
source::source_info::SourceInfo,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Opcode {
Nop,
Mov,
Movs,
Ldb,
Ldbs,
Ldh,
Ldhs,
Ldw,
Stb,
Sth,
Stw,
Lli,
Lui,
Jmp,
Jeq,
Jne,
Jgt,
Jge,
Jlt,
Jle,
Cmp,
Inc,
Dec,
Shl,
Shr,
Add,
Sub,
And,
Or,
Not,
Xor,
Nand,
Nor,
Xnor,
Int,
Irt,
Hlt,
AddI,
SubI,
// Pseudo-instructions
Db,
Dh,
Dw,
Resb,
Resh,
Resw,
Push,
Pop,
Pusha,
Popa,
Lwi,
Call,
Return,
// Meta instructions (these aren't present in the binary as instructions)
Include,
Data,
Segment,
}
#[derive(Debug)]
pub enum OpcodeFromStrError {
InvalidRegister(&'static str),
InvalidOpcode(String),
}
impl std::fmt::Display for OpcodeFromStrError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidRegister(reg) => write!(f, "register does not exist: {reg}"),
Self::InvalidOpcode(op) => write!(f, "instruction does not exist: {op}"),
}
}
}
impl std::error::Error for OpcodeFromStrError {}
impl Opcode {
pub const OPCODES: &[&str] = &[
// Real instructions (0x00-0x26)
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw",
"lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc",
"dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor",
"xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "call", "return",
"pusha", "popa", // meta instructions
"include",
];
pub fn to_instruction(
&self,
source_info: SourceInfo,
) -> Result<Instruction, AssembleError> {
match self {
Self::Nop => Ok(Instruction::Nop),
Self::Mov => Ok(Instruction::Mov(RTypeArgs::default())),
Self::Movs => Ok(Instruction::MovSigned(RTypeArgs::default())),
Self::Ldb => Ok(Instruction::LoadByte(ITypeArgs::default())),
Self::Ldbs => Ok(Instruction::LoadByteSigned(ITypeArgs::default())),
Self::Ldh => Ok(Instruction::LoadHalfword(ITypeArgs::default())),
Self::Ldhs => Ok(Instruction::LoadHalfwordSigned(ITypeArgs::default())),
Self::Ldw => Ok(Instruction::LoadWord(ITypeArgs::default())),
Self::Stb => Ok(Instruction::StoreByte(ITypeArgs::default())),
Self::Sth => Ok(Instruction::StoreHalfword(ITypeArgs::default())),
Self::Stw => Ok(Instruction::StoreWord(ITypeArgs::default())),
Self::Lli => Ok(Instruction::LoadLowerImmediate(ITypeArgs::default())),
Self::Lui => Ok(Instruction::LoadUpperImmediate(ITypeArgs::default())),
Self::Jmp => Ok(Instruction::Jump(ITypeArgs::default())),
Self::Jeq => Ok(Instruction::JumpEq(ITypeArgs::default())),
Self::Jne => Ok(Instruction::JumpNeq(ITypeArgs::default())),
Self::Jgt => Ok(Instruction::JumpGt(ITypeArgs::default())),
Self::Jge => Ok(Instruction::JumpGe(ITypeArgs::default())),
Self::Jlt => Ok(Instruction::JumpLt(ITypeArgs::default())),
Self::Jle => Ok(Instruction::JumpLe(ITypeArgs::default())),
Self::Cmp => Ok(Instruction::Compare(RTypeArgs::default())),
Self::Inc => Ok(Instruction::Increment(RTypeArgs::default())),
Self::Dec => Ok(Instruction::Decrement(RTypeArgs::default())),
Self::Shl => Ok(Instruction::ShiftLeft(RTypeArgs::default())),
Self::Shr => Ok(Instruction::ShiftRight(RTypeArgs::default())),
Self::Add => Ok(Instruction::Add(RTypeArgs::default())),
Self::Sub => Ok(Instruction::Sub(RTypeArgs::default())),
Self::And => Ok(Instruction::And(RTypeArgs::default())),
Self::Or => Ok(Instruction::Or(RTypeArgs::default())),
Self::Not => Ok(Instruction::Not(RTypeArgs::default())),
Self::Xor => Ok(Instruction::Xor(RTypeArgs::default())),
Self::Nand => Ok(Instruction::Nand(RTypeArgs::default())),
Self::Nor => Ok(Instruction::Nor(RTypeArgs::default())),
Self::Xnor => Ok(Instruction::Xnor(RTypeArgs::default())),
Self::Int => Ok(Instruction::Interrupt(Interrupt::default())),
Self::Irt => Ok(Instruction::IntReturn),
Self::Hlt => Ok(Instruction::Halt),
Self::AddI => Ok(Instruction::AddImmediate(ITypeArgs::default())),
Self::SubI => Ok(Instruction::SubImmediate(ITypeArgs::default())),
Self::Segment => Ok(Instruction::Segment(0)),
_ => Err(AssembleError::new_source_error(
source_info,
AssembleErrorKind::Unimplemented(
"Opcode::to_instruction called on an instruction that does not exist in common.",
),
)),
}
}
#[must_use]
pub const fn to_opcode_value(&self) -> Option<u8> {
match self {
Self::Nop => Some(0x00),
Self::Mov => Some(0x01),
Self::Movs => Some(0x02),
Self::Ldb => Some(0x03),
Self::Ldbs => Some(0x04),
Self::Ldh => Some(0x05),
Self::Ldhs => Some(0x06),
Self::Ldw => Some(0x07),
Self::Stb => Some(0x08),
Self::Sth => Some(0x09),
Self::Stw => Some(0x0A),
Self::Lli => Some(0x0B),
Self::Lui => Some(0x0C),
Self::Jmp => Some(0x0D),
Self::Jeq => Some(0x0E),
Self::Jne => Some(0x0F),
Self::Jgt => Some(0x10),
Self::Jge => Some(0x11),
Self::Jlt => Some(0x12),
Self::Jle => Some(0x13),
Self::Cmp => Some(0x14),
Self::Inc => Some(0x15),
Self::Dec => Some(0x16),
Self::Shl => Some(0x17),
Self::Shr => Some(0x18),
Self::Add => Some(0x19),
Self::Sub => Some(0x1A),
Self::And => Some(0x1B),
Self::Or => Some(0x1C),
Self::Not => Some(0x1D),
Self::Xor => Some(0x1E),
Self::Nand => Some(0x1F),
Self::Nor => Some(0x20),
Self::Xnor => Some(0x21),
Self::Int => Some(0x22),
Self::Irt => Some(0x23),
Self::Hlt => Some(0x24),
Self::AddI => Some(0x25),
Self::SubI => Some(0x26),
// TODO: Maybe recombine pseudos?
Self::Segment => Some(0x27),
// Pseudo-instructions don't have opcode values
_ => None,
}
}
#[must_use]
pub const fn is_pseudo_instruction(&self) -> bool {
matches!(
self,
Self::Db
| Self::Dh
| Self::Dw
| Self::Resb
| Self::Resh
| Self::Resw
| Self::Push
| Self::Pop
| Self::Lwi
)
}
}
impl FromStr for Opcode {
type Err = OpcodeFromStrError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"nop" => Ok(Self::Nop),
"mov" => Ok(Self::Mov),
"movs" => Ok(Self::Movs),
"ldb" => Ok(Self::Ldb),
"ldbs" => Ok(Self::Ldbs),
"ldh" => Ok(Self::Ldh),
"ldhs" => Ok(Self::Ldhs),
"ldw" => Ok(Self::Ldw),
"stb" => Ok(Self::Stb),
"sth" => Ok(Self::Sth),
"stw" => Ok(Self::Stw),
"lli" => Ok(Self::Lli),
"lui" => Ok(Self::Lui),
"jmp" => Ok(Self::Jmp),
"jeq" => Ok(Self::Jeq),
"jne" => Ok(Self::Jne),
"jgt" => Ok(Self::Jgt),
"jge" => Ok(Self::Jge),
"jlt" => Ok(Self::Jlt),
"jle" => Ok(Self::Jle),
"cmp" => Ok(Self::Cmp),
"inc" => Ok(Self::Inc),
"dec" => Ok(Self::Dec),
"shl" => Ok(Self::Shl),
"shr" => Ok(Self::Shr),
"add" => Ok(Self::Add),
"sub" => Ok(Self::Sub),
"and" => Ok(Self::And),
"or" => Ok(Self::Or),
"not" => Ok(Self::Not),
"xor" => Ok(Self::Xor),
"nand" => Ok(Self::Nand),
"nor" => Ok(Self::Nor),
"xnor" => Ok(Self::Xnor),
"int" => Ok(Self::Int),
"irt" => Ok(Self::Irt),
"hlt" => Ok(Self::Hlt),
"addi" => Ok(Self::AddI),
"subi" => Ok(Self::SubI),
"db" => Ok(Self::Db),
"dh" => Ok(Self::Dh),
"dw" => Ok(Self::Dw),
"resb" => Ok(Self::Resb),
"resh" => Ok(Self::Resh),
"resw" => Ok(Self::Resw),
"push" => Ok(Self::Push),
"pop" => Ok(Self::Pop),
"lwi" => Ok(Self::Lwi),
"include" => Ok(Self::Include),
"call" => Ok(Self::Call),
"return" => Ok(Self::Return),
"pusha" => Ok(Self::Pusha),
"popa" => Ok(Self::Popa),
_ => Err(OpcodeFromStrError::InvalidOpcode(s.to_string())),
}
}
}
impl fmt::Display for Opcode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Nop => write!(f, "nop"),
Self::Mov => write!(f, "mov"),
Self::Movs => write!(f, "movs"),
Self::Ldb => write!(f, "ldb"),
Self::Ldbs => write!(f, "ldbs"),
Self::Ldh => write!(f, "ldh"),
Self::Ldhs => write!(f, "ldhs"),
Self::Ldw => write!(f, "ldw"),
Self::Stb => write!(f, "stb"),
Self::Sth => write!(f, "sth"),
Self::Stw => write!(f, "stw"),
Self::Lli => write!(f, "lli"),
Self::Lui => write!(f, "lui"),
Self::Jmp => write!(f, "jmp"),
Self::Jeq => write!(f, "jeq"),
Self::Jne => write!(f, "jne"),
Self::Jgt => write!(f, "jgt"),
Self::Jge => write!(f, "jge"),
Self::Jlt => write!(f, "jlt"),
Self::Jle => write!(f, "jle"),
Self::Cmp => write!(f, "cmp"),
Self::Inc => write!(f, "inc"),
Self::Dec => write!(f, "dec"),
Self::Shl => write!(f, "shl"),
Self::Shr => write!(f, "shr"),
Self::Add => write!(f, "add"),
Self::Sub => write!(f, "sub"),
Self::And => write!(f, "and"),
Self::Or => write!(f, "or"),
Self::Not => write!(f, "not"),
Self::Xor => write!(f, "xor"),
Self::Nand => write!(f, "nand"),
Self::Nor => write!(f, "nor"),
Self::Xnor => write!(f, "xnor"),
Self::Int => write!(f, "int"),
Self::Irt => write!(f, "irt"),
Self::Hlt => write!(f, "hlt"),
Self::AddI => write!(f, "addi"),
Self::SubI => write!(f, "subi"),
Self::Db => write!(f, "db"),
Self::Dh => write!(f, "dh"),
Self::Dw => write!(f, "dw"),
Self::Resb => write!(f, "resb"),
Self::Resh => write!(f, "resh"),
Self::Resw => write!(f, "resw"),
Self::Push => write!(f, "push"),
Self::Pop => write!(f, "pop"),
Self::Lwi => write!(f, "lwi"),
Self::Call => write!(f, "call"),
Self::Return => write!(f, "return"),
Self::Pusha => write!(f, "pusha"),
Self::Popa => write!(f, "popa"),
// meta instructions
Self::Include => write!(f, "include"),
Self::Data => write!(f, "data"),
Self::Segment => write!(f, "[SEGMENT]"),
}
}
}
+4
View File
@@ -0,0 +1,4 @@
//! This module contains code for handling pseudo opcodes.
/// Pseudo instructions that cannot simply be lowered to ISA instructions.
pub enum PseudoOpcode {}
+5 -9
View File
@@ -5,10 +5,9 @@
use common::prelude::*; use common::prelude::*;
use crate::source::{ use crate::source::{
opcode::Opcode,
source_info::SourceInfo, source_info::SourceInfo,
token_info::{ token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken},
DirectiveToken, InstructionToken, LabelToken, RegisterToken, SymbolToken,
},
}; };
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -22,7 +21,7 @@ pub enum TokenType {
/// String literal (e.g., `"hello world"`). /// String literal (e.g., `"hello world"`).
String(String), String(String),
/// Assembly instruction (e.g., `add`, `jmp`, `nop`). /// Assembly instruction (e.g., `add`, `jmp`, `nop`).
Instruction(InstructionToken), Instruction(Opcode),
/// Label definition (e.g., `loop_start:`). /// Label definition (e.g., `loop_start:`).
Label(LabelToken), Label(LabelToken),
/// Assembler directive (e.g., `.global`, `.section`, `.dw`, `.resb`). /// Assembler directive (e.g., `.global`, `.section`, `.dw`, `.resb`).
@@ -65,11 +64,8 @@ impl Token {
} }
#[must_use] #[must_use]
pub const fn instruction(mnemonic: String, source_info: SourceInfo) -> Self { pub const fn instruction(op: Opcode, source_info: SourceInfo) -> Self {
Self::new( Self::new(TokenType::Instruction(op), source_info)
TokenType::Instruction(InstructionToken { mnemonic }),
source_info,
)
} }
#[must_use] #[must_use]
+5 -5
View File
@@ -21,14 +21,14 @@ pub struct RegisterToken {
} }
impl RegisterToken { impl RegisterToken {
#[must_use]
pub const fn new(reg: Register) -> Self {
Self { reg }
}
/// Returns the name of a valid [`Register`] /// Returns the name of a valid [`Register`]
#[must_use] #[must_use]
pub fn name(&self) -> String { pub fn name(&self) -> String {
self.reg.to_string() self.reg.to_string()
} }
} }
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct InstructionToken {
pub mnemonic: String,
}
+35 -34
View File
@@ -3,6 +3,7 @@
use std::{ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
str::FromStr,
sync::Arc, sync::Arc,
}; };
@@ -15,13 +16,12 @@ use crate::{
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind}, error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
model::module::Module, model::module::Module,
source::{ source::{
lines::lines_with_spans, lines::{LineSpan, lines_with_spans},
load_source_bytes, load_source_bytes,
opcode::Opcode,
source_info::SourceInfo, source_info::SourceInfo,
token::{Token, TokenType}, token::{Token, TokenType},
token_info::{ token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken},
DirectiveToken, LabelToken, RegisterToken, SymbolToken,
},
}, },
}; };
@@ -38,10 +38,10 @@ pub struct Tokeniser {
// Pre-compiled regex patterns // Pre-compiled regex patterns
label_regex: Regex, label_regex: Regex,
// register_regex: Regex, register_regex: Regex,
immediate_regex: Regex, immediate_regex: Regex,
directive_regex: Regex, directive_regex: Regex,
// instruction_regex: Regex, instruction_regex: Regex,
symbol_regex: Regex, symbol_regex: Regex,
string_regex: Regex, string_regex: Regex,
comment_regex: Regex, comment_regex: Regex,
@@ -56,23 +56,25 @@ impl Tokeniser {
label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):") label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):")
.expect("Failed to compile label regex pattern"), .expect("Failed to compile label regex pattern"),
// register_regex: Regex::new(r"^(r[0-9]+|sp|fp|pc)") register_regex: Regex::new(
// .expect("Failed to compile register regex pattern"), r"^(rg([0-9]|[a-f])|acc|spr|bpr|ret|idr|mmr|zero|noreg|pcx)\b",
)
.expect("Failed to compile register regex pattern"),
immediate_regex: Regex::new( immediate_regex: Regex::new(
r"^(0x[0-9a-fA-F_]+|0b[0-1_]+|0o[0-7_]+|[0-9_]+)", r"^(0x[0-9a-fA-F_]+|0b[0-1_]+|0o[0-7_]+|[0-9_]+)",
) )
.expect("Failed to compile immediate regex pattern"), .expect("Failed to compile immediate regex pattern"),
directive_regex: Regex::new(r"^\.([a-zA-Z]+)") directive_regex: Regex::new(r"^(res[bwh]|d[bwh]|include|section|global|local)\b")
.expect("Failed to compile directive regex pattern"), .expect("Failed to compile directive regex pattern"),
// instruction_regex: Regex::new( instruction_regex: Regex::new(
// r"^(add|sub|mul|div|jmp|call|ret|lli|nop|halt)", r"^(nop|movs?|ld[bhw]s?|st[bhw]|l[lu]i|j(mp|[egl][qte])|cmp|[id]nc|sh[lr]|add[i]?|sub[i]?|x?n?or|and|not|i[rd]t|hlt|lhwmm|lidt|push[a]?|pop[a]?|lwi|return|call)\b",
// ) )
// .expect("Failed to compile instruction regex pattern"), .expect("Failed to compile instruction regex pattern"),
symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)") symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)")
.expect("Failed to compile symbol regex pattern"), .expect("Failed to compile symbol regex pattern"),
string_regex: Regex::new(r#"^"([^"]*)"#) string_regex: Regex::new(r#"^"([^"]*)"#)
.expect("Failed to compile string regex pattern"), .expect("Failed to compile string regex pattern"),
comment_regex: Regex::new("//.*") comment_regex: Regex::new("^//.*")
.expect("Failed to compile comment regex pattern"), .expect("Failed to compile comment regex pattern"),
} }
} }
@@ -137,7 +139,7 @@ impl Tokeniser {
fn tokenize_line( fn tokenize_line(
&self, &self,
line_span: &crate::source::lines::LineSpan, line_span: &LineSpan,
module: &Arc<Module>, module: &Arc<Module>,
) -> Result<Vec<Token>, AssembleError> { ) -> Result<Vec<Token>, AssembleError> {
let mut tokens = Vec::new(); let mut tokens = Vec::new();
@@ -188,9 +190,11 @@ impl Tokeniser {
} }
fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> { fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> {
_ = self; let caps = self.register_regex.captures(input)?;
let reg = caps.get(1)?.as_str();
let len = caps.get(0)?.len();
let reg = match Register::try_from(input) { let reg = match Register::try_from(reg) {
Ok(reg) => reg, Ok(reg) => reg,
Err(_why) => { Err(_why) => {
// Probably ignore the error. // Probably ignore the error.
@@ -198,8 +202,6 @@ impl Tokeniser {
} }
}; };
let len = input.len();
Some((TokenType::Register(RegisterToken { reg }), len)) Some((TokenType::Register(RegisterToken { reg }), len))
} }
@@ -234,15 +236,14 @@ impl Tokeniser {
Some((TokenType::Directive(DirectiveToken { directive }), len)) Some((TokenType::Directive(DirectiveToken { directive }), len))
} }
const fn try_match_instruction(&self, _input: &str) -> Option<(TokenType, usize)> { fn try_match_instruction(&self, input: &str) -> Option<(TokenType, usize)> {
_ = self; let caps = self.instruction_regex.captures(input)?;
let mnemonic = caps.get(1)?.as_str().to_string();
let len = caps.get(0)?.len();
// let instruction = let op = Opcode::from_str(&mnemonic).ok()?;
// Some((TokenType::Instruction(InstructionToken { mnemonic }), len))
// TODO: fix me. Some((TokenType::Instruction(op), len))
None
} }
fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> { fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> {
@@ -262,6 +263,14 @@ impl Tokeniser {
} }
fn match_token(&self, input: &str) -> Result<(TokenType, usize), AssembleError> { fn match_token(&self, input: &str) -> Result<(TokenType, usize), AssembleError> {
if let Some(m) = self.try_match_directive(input) {
return Ok(m);
}
if let Some(m) = self.try_match_instruction(input) {
return Ok(m);
}
if let Some(m) = self.try_match_comment(input) { if let Some(m) = self.try_match_comment(input) {
return Ok(m); return Ok(m);
} }
@@ -278,14 +287,6 @@ impl Tokeniser {
return Ok(m); return Ok(m);
} }
if let Some(m) = self.try_match_directive(input) {
return Ok(m);
}
if let Some(m) = self.try_match_instruction(input) {
return Ok(m);
}
if let Some(m) = self.try_match_string(input) { if let Some(m) = self.try_match_string(input) {
return Ok(m); return Ok(m);
} }
+2
View File
@@ -3,6 +3,8 @@
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum TokeniserError {} pub enum TokeniserError {}
impl TokeniserError {}
impl std::fmt::Display for TokeniserError { impl std::fmt::Display for TokeniserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "TODO!!!!!!") write!(f, "TODO!!!!!!")
+60 -10
View File
@@ -1,9 +1,13 @@
//! Unit tests for the tokenizer //! Unit tests for the tokenizer
use common::prelude::Register;
use crate::{ use crate::{
context::AssemblerContext, context::AssemblerContext,
source::{ source::{
opcode::Opcode,
token::{Token, TokenType}, token::{Token, TokenType},
token_info::RegisterToken,
tokeniser::Tokeniser, tokeniser::Tokeniser,
}, },
}; };
@@ -65,7 +69,7 @@ fn test_single_instruction() {
.any(|t| matches!(t, TokenType::Instruction(_))) .any(|t| matches!(t, TokenType::Instruction(_)))
); );
if let TokenType::Instruction(instr) = &tokens[0].token_type { if let TokenType::Instruction(instr) = &tokens[0].token_type {
assert_eq!(instr.mnemonic, "add"); assert_eq!(instr.to_string(), "add");
} else { } else {
panic!("Expected instruction token"); panic!("Expected instruction token");
} }
@@ -73,15 +77,13 @@ fn test_single_instruction() {
#[test] #[test]
fn test_all_instructions() { fn test_all_instructions() {
let instructions = [ let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"];
"add", "sub", "mul", "div", "jmp", "call", "ret", "lli", "nop", "halt",
];
for instr in &instructions { for instr in &instructions {
let tokens = tokenize_source(instr).expect("Failed to tokenize instruction"); let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");
if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type { if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
assert_eq!(parsed_instr.mnemonic, *instr); assert_eq!(parsed_instr.to_string(), *instr);
} else { } else {
panic!("Expected instruction token for {instr}"); panic!("Expected instruction token for {instr}");
} }
@@ -90,7 +92,7 @@ fn test_all_instructions() {
#[test] #[test]
fn test_registers() { fn test_registers() {
let test_cases = [("rg0", "r0"), ("rgf", "rgf"), ("pcx", "pcx")]; let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")];
for (input, expected) in &test_cases { for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize register"); let tokens = tokenize_source(input).expect("Failed to tokenize register");
@@ -149,10 +151,9 @@ fn test_labels() {
#[test] #[test]
fn test_directives() { fn test_directives() {
let test_cases = [ let test_cases = [
(".global", "global"), ("global", "global"),
(".section", "section"), ("section", "section"),
(".data", "data"), ("local", "local"),
(".text", "text"),
]; ];
for (input, expected) in &test_cases { for (input, expected) in &test_cases {
@@ -185,3 +186,52 @@ fn test_symbols() {
} }
} }
} }
#[test]
fn test_complex_instruction_line() {
let source = "addi rg1, rg2, 0xFF";
let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction");
// Should have: instruction, register, comma, register, comma, immediate, newline, EOF
assert!(tokens.len() >= 6);
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
assert!(matches!(tokens[2].token_type, TokenType::Comma));
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
assert!(matches!(tokens[4].token_type, TokenType::Comma));
assert!(matches!(tokens[5].token_type, TokenType::Immediate(_)));
}
#[test]
fn test_multiline_with_comments() {
const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [
TokenType::Instruction(Opcode::Add),
TokenType::Register(RegisterToken::new(Register::Rg0)),
TokenType::Comma,
TokenType::Register(RegisterToken::new(Register::Rg1)),
TokenType::Newline,
TokenType::Instruction(Opcode::SubI),
TokenType::Register(RegisterToken::new(Register::Rg2)),
TokenType::Comma,
TokenType::Immediate(10),
TokenType::Newline,
TokenType::Eof,
];
const SOURCE: &str = r"add rg0, rg1 // Another comment
subi rg2, 10";
let tokens =
tokenize_source(SOURCE).expect("Failed to tokenise source with comments");
let token_types = extract_token_types(&tokens);
assert_eq!(
token_types.len(),
EXPECTED_TOKEN_TYPES.len(),
"{token_types:#?}"
);
for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) {
assert!(!(expected != *got), "Expected {expected:?}, got {got:?}");
}
}
+2 -1
View File
@@ -1,9 +1,10 @@
use crate::{instructions::encode::Encode, prelude::*}; use crate::{instructions::encode::Encode, prelude::*};
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
pub enum Interrupt { pub enum Interrupt {
Software(u8), Software(u8),
Breakpoint, Breakpoint,
#[default]
HardFault, HardFault,
} }
+14 -9
View File
@@ -1,4 +1,5 @@
//! Various types of arguments that instructions can take, alongside encoding and decoding logic. //! Various types of arguments that instructions can take, alongside encoding and decoding
//! logic.
use crate::{ use crate::{
instructions::{RegisterParseError, encode::Encode}, instructions::{RegisterParseError, encode::Encode},
@@ -35,18 +36,20 @@ impl std::fmt::Display for ArgsDecodeError {
impl std::error::Error for ArgsDecodeError {} impl std::error::Error for ArgsDecodeError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
/// Used by instructions with 2 registers and an immediate argument. /// Used by instructions with 2 registers and an immediate argument.
pub struct ITypeArgs { pub struct ITypeArgs {
pub immediate: u16, pub immediate: u16,
pub r1: Register, pub r1: Register,
/// May not actually be used by some instructions taking an immediate e.g. LUI. This is solved by making the constructor take Options. /// May not actually be used by some instructions taking an immediate e.g. LUI. This
/// is solved by making the constructor take Options.
pub r2: Register, pub r2: Register,
} }
impl ITypeArgs { impl ITypeArgs {
#[must_use] #[must_use]
/// Creates a new [`ITypeArgs`]. If r1 or r2 is unset, they will be replaced with [`Register::NoReg`]. /// Creates a new [`ITypeArgs`]. If r1 or r2 is unset, they will be replaced with
/// [`Register::NoReg`].
pub fn new(immediate: u16, r1: Option<Register>, r2: Option<Register>) -> Self { pub fn new(immediate: u16, r1: Option<Register>, r2: Option<Register>) -> Self {
let r1 = r1.unwrap_or_default(); let r1 = r1.unwrap_or_default();
let r2 = r2.unwrap_or_default(); let r2 = r2.unwrap_or_default();
@@ -56,8 +59,8 @@ impl ITypeArgs {
} }
impl Encode for ITypeArgs { impl Encode for ITypeArgs {
/// Encodes an I-type instruction from its fields. These must have some unused high-order /// Encodes an I-type instruction from its fields. These must have some unused
/// bits set to 0 else the bit shifting logic gets fucked. /// high-order bits set to 0 else the bit shifting logic gets fucked.
fn encode(self, opcode: u8) -> u32 { fn encode(self, opcode: u8) -> u32 {
let opcode = u32::from(opcode); let opcode = u32::from(opcode);
let r1 = self.r1 as u32; let r1 = self.r1 as u32;
@@ -84,7 +87,7 @@ impl TryFrom<u32> for ITypeArgs {
} }
/// Used by instructions not using immediates (besides 5 bit shift values). /// Used by instructions not using immediates (besides 5 bit shift values).
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct RTypeArgs { pub struct RTypeArgs {
pub sr1: Register, pub sr1: Register,
pub sr2: Register, pub sr2: Register,
@@ -95,7 +98,8 @@ pub struct RTypeArgs {
impl RTypeArgs { impl RTypeArgs {
#[must_use] #[must_use]
/// Creates a new [`RTypeArgs`]. If any registers are unset, they will be replaced with [`Register::NoReg`]. If `shamt` is unset, it will be set to 0. /// Creates a new [`RTypeArgs`]. If any registers are unset, they will be replaced
/// with [`Register::NoReg`]. If `shamt` is unset, it will be set to 0.
pub fn new( pub fn new(
sr1: Option<Register>, sr1: Option<Register>,
sr2: Option<Register>, sr2: Option<Register>,
@@ -122,7 +126,8 @@ impl Encode for RTypeArgs {
/// ///
/// # Arguments /// # Arguments
/// ///
/// - `shamt`: The amount to shift value (used only in shift instructions, otherwise 0). /// - `shamt`: The amount to shift value (used only in shift instructions, otherwise
/// 0).
fn encode(self, opcode: u8) -> u32 { fn encode(self, opcode: u8) -> u32 {
let opcode = u32::from(opcode); let opcode = u32::from(opcode);
let sr1 = self.sr1 as u32; let sr1 = self.sr1 as u32;