assembler: enhance error handling and tokenization logic
This commit is contained in:
+37
-30
@@ -34,14 +34,49 @@ impl AssembleError {
|
||||
kind,
|
||||
}
|
||||
}
|
||||
|
||||
/// Prints a parser error to the screen.
|
||||
fn print_parser_error(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
parse_error: &ParserError,
|
||||
) -> std::fmt::Result {
|
||||
let Some(source_info) = &self.source_info else {
|
||||
write!(
|
||||
f,
|
||||
"Parse error thrown with no source information. Error: {parse_error}"
|
||||
)?;
|
||||
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
write!(f, "Parser error, {parse_error} at {source_info}")?;
|
||||
|
||||
// Prints out the context for our error.
|
||||
source_info.print_context_with_underline().map_err(|e| {
|
||||
_ = writeln!(f, "Print context error: {e}");
|
||||
|
||||
std::fmt::Error {}
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for AssembleError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(info) = &self.source_info {
|
||||
write!(f, "at {info}")?;
|
||||
|
||||
match &self.kind {
|
||||
AssembleErrorKind::Parser(err) => self.print_parser_error(f, err)?,
|
||||
_ => write!(f, "{}", self.kind)?,
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Handle errors without SourceInfo.
|
||||
write!(f, "{}", self.kind)?;
|
||||
|
||||
Ok(())
|
||||
@@ -68,13 +103,7 @@ pub enum AssembleErrorKind {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParserError {
|
||||
error_type: ParserErrorType,
|
||||
source_info: SourceInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ParserErrorType {
|
||||
pub enum ParserError {
|
||||
UnexpectedToken,
|
||||
MissingOperand,
|
||||
InvalidInstruction,
|
||||
@@ -82,7 +111,7 @@ pub enum ParserErrorType {
|
||||
DuplicateLabel,
|
||||
}
|
||||
|
||||
impl Display for ParserErrorType {
|
||||
impl Display for ParserError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::UnexpectedToken => write!(f, "unexpected token"),
|
||||
@@ -94,28 +123,6 @@ impl Display for ParserErrorType {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ParserError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// TODO: Print the path/to/filename.dsa:line_no, column col_no.
|
||||
write!(
|
||||
f,
|
||||
"Parser error, {} at {}",
|
||||
self.error_type, self.source_info
|
||||
)?;
|
||||
|
||||
// Prints out the context for our error.
|
||||
self.source_info
|
||||
.print_context_with_underline()
|
||||
.map_err(|e| {
|
||||
_ = writeln!(f, "Print context error: {e}");
|
||||
|
||||
std::fmt::Error {}
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SymbolError {
|
||||
Undefined,
|
||||
|
||||
@@ -9,6 +9,7 @@ use std::{
|
||||
use crate::error::AssembleError;
|
||||
|
||||
pub mod lines;
|
||||
pub mod opcode;
|
||||
pub mod source_info;
|
||||
pub mod token;
|
||||
pub mod token_info;
|
||||
|
||||
@@ -0,0 +1,349 @@
|
||||
//! This module contains instructions for tokenisation.
|
||||
|
||||
use std::{fmt, str::FromStr};
|
||||
|
||||
use common::prelude::{ITypeArgs, Instruction, Interrupt, RTypeArgs};
|
||||
|
||||
use crate::{
|
||||
error::{AssembleError, AssembleErrorKind},
|
||||
source::source_info::SourceInfo,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum Opcode {
|
||||
Nop,
|
||||
Mov,
|
||||
Movs,
|
||||
Ldb,
|
||||
Ldbs,
|
||||
Ldh,
|
||||
Ldhs,
|
||||
Ldw,
|
||||
Stb,
|
||||
Sth,
|
||||
Stw,
|
||||
Lli,
|
||||
Lui,
|
||||
Jmp,
|
||||
Jeq,
|
||||
Jne,
|
||||
Jgt,
|
||||
Jge,
|
||||
Jlt,
|
||||
Jle,
|
||||
Cmp,
|
||||
Inc,
|
||||
Dec,
|
||||
Shl,
|
||||
Shr,
|
||||
Add,
|
||||
Sub,
|
||||
And,
|
||||
Or,
|
||||
Not,
|
||||
Xor,
|
||||
Nand,
|
||||
Nor,
|
||||
Xnor,
|
||||
Int,
|
||||
Irt,
|
||||
Hlt,
|
||||
AddI,
|
||||
SubI,
|
||||
|
||||
// Pseudo-instructions
|
||||
Db,
|
||||
Dh,
|
||||
Dw,
|
||||
Resb,
|
||||
Resh,
|
||||
Resw,
|
||||
Push,
|
||||
Pop,
|
||||
Pusha,
|
||||
Popa,
|
||||
Lwi,
|
||||
Call,
|
||||
Return,
|
||||
|
||||
// Meta instructions (these aren't present in the binary as instructions)
|
||||
Include,
|
||||
Data,
|
||||
Segment,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum OpcodeFromStrError {
|
||||
InvalidRegister(&'static str),
|
||||
InvalidOpcode(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for OpcodeFromStrError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::InvalidRegister(reg) => write!(f, "register does not exist: {reg}"),
|
||||
Self::InvalidOpcode(op) => write!(f, "instruction does not exist: {op}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for OpcodeFromStrError {}
|
||||
|
||||
impl Opcode {
|
||||
pub const OPCODES: &[&str] = &[
|
||||
// Real instructions (0x00-0x26)
|
||||
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw",
|
||||
"lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc",
|
||||
"dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor",
|
||||
"xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions
|
||||
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "call", "return",
|
||||
"pusha", "popa", // meta instructions
|
||||
"include",
|
||||
];
|
||||
|
||||
pub fn to_instruction(
|
||||
&self,
|
||||
source_info: SourceInfo,
|
||||
) -> Result<Instruction, AssembleError> {
|
||||
match self {
|
||||
Self::Nop => Ok(Instruction::Nop),
|
||||
Self::Mov => Ok(Instruction::Mov(RTypeArgs::default())),
|
||||
Self::Movs => Ok(Instruction::MovSigned(RTypeArgs::default())),
|
||||
Self::Ldb => Ok(Instruction::LoadByte(ITypeArgs::default())),
|
||||
Self::Ldbs => Ok(Instruction::LoadByteSigned(ITypeArgs::default())),
|
||||
Self::Ldh => Ok(Instruction::LoadHalfword(ITypeArgs::default())),
|
||||
Self::Ldhs => Ok(Instruction::LoadHalfwordSigned(ITypeArgs::default())),
|
||||
Self::Ldw => Ok(Instruction::LoadWord(ITypeArgs::default())),
|
||||
Self::Stb => Ok(Instruction::StoreByte(ITypeArgs::default())),
|
||||
Self::Sth => Ok(Instruction::StoreHalfword(ITypeArgs::default())),
|
||||
Self::Stw => Ok(Instruction::StoreWord(ITypeArgs::default())),
|
||||
Self::Lli => Ok(Instruction::LoadLowerImmediate(ITypeArgs::default())),
|
||||
Self::Lui => Ok(Instruction::LoadUpperImmediate(ITypeArgs::default())),
|
||||
Self::Jmp => Ok(Instruction::Jump(ITypeArgs::default())),
|
||||
Self::Jeq => Ok(Instruction::JumpEq(ITypeArgs::default())),
|
||||
Self::Jne => Ok(Instruction::JumpNeq(ITypeArgs::default())),
|
||||
Self::Jgt => Ok(Instruction::JumpGt(ITypeArgs::default())),
|
||||
Self::Jge => Ok(Instruction::JumpGe(ITypeArgs::default())),
|
||||
Self::Jlt => Ok(Instruction::JumpLt(ITypeArgs::default())),
|
||||
Self::Jle => Ok(Instruction::JumpLe(ITypeArgs::default())),
|
||||
Self::Cmp => Ok(Instruction::Compare(RTypeArgs::default())),
|
||||
Self::Inc => Ok(Instruction::Increment(RTypeArgs::default())),
|
||||
Self::Dec => Ok(Instruction::Decrement(RTypeArgs::default())),
|
||||
Self::Shl => Ok(Instruction::ShiftLeft(RTypeArgs::default())),
|
||||
Self::Shr => Ok(Instruction::ShiftRight(RTypeArgs::default())),
|
||||
Self::Add => Ok(Instruction::Add(RTypeArgs::default())),
|
||||
Self::Sub => Ok(Instruction::Sub(RTypeArgs::default())),
|
||||
Self::And => Ok(Instruction::And(RTypeArgs::default())),
|
||||
Self::Or => Ok(Instruction::Or(RTypeArgs::default())),
|
||||
Self::Not => Ok(Instruction::Not(RTypeArgs::default())),
|
||||
Self::Xor => Ok(Instruction::Xor(RTypeArgs::default())),
|
||||
Self::Nand => Ok(Instruction::Nand(RTypeArgs::default())),
|
||||
Self::Nor => Ok(Instruction::Nor(RTypeArgs::default())),
|
||||
Self::Xnor => Ok(Instruction::Xnor(RTypeArgs::default())),
|
||||
Self::Int => Ok(Instruction::Interrupt(Interrupt::default())),
|
||||
Self::Irt => Ok(Instruction::IntReturn),
|
||||
Self::Hlt => Ok(Instruction::Halt),
|
||||
Self::AddI => Ok(Instruction::AddImmediate(ITypeArgs::default())),
|
||||
Self::SubI => Ok(Instruction::SubImmediate(ITypeArgs::default())),
|
||||
Self::Segment => Ok(Instruction::Segment(0)),
|
||||
_ => Err(AssembleError::new_source_error(
|
||||
source_info,
|
||||
AssembleErrorKind::Unimplemented(
|
||||
"Opcode::to_instruction called on an instruction that does not exist in common.",
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn to_opcode_value(&self) -> Option<u8> {
|
||||
match self {
|
||||
Self::Nop => Some(0x00),
|
||||
Self::Mov => Some(0x01),
|
||||
Self::Movs => Some(0x02),
|
||||
Self::Ldb => Some(0x03),
|
||||
Self::Ldbs => Some(0x04),
|
||||
Self::Ldh => Some(0x05),
|
||||
Self::Ldhs => Some(0x06),
|
||||
Self::Ldw => Some(0x07),
|
||||
Self::Stb => Some(0x08),
|
||||
Self::Sth => Some(0x09),
|
||||
Self::Stw => Some(0x0A),
|
||||
Self::Lli => Some(0x0B),
|
||||
Self::Lui => Some(0x0C),
|
||||
Self::Jmp => Some(0x0D),
|
||||
Self::Jeq => Some(0x0E),
|
||||
Self::Jne => Some(0x0F),
|
||||
Self::Jgt => Some(0x10),
|
||||
Self::Jge => Some(0x11),
|
||||
Self::Jlt => Some(0x12),
|
||||
Self::Jle => Some(0x13),
|
||||
Self::Cmp => Some(0x14),
|
||||
Self::Inc => Some(0x15),
|
||||
Self::Dec => Some(0x16),
|
||||
Self::Shl => Some(0x17),
|
||||
Self::Shr => Some(0x18),
|
||||
Self::Add => Some(0x19),
|
||||
Self::Sub => Some(0x1A),
|
||||
Self::And => Some(0x1B),
|
||||
Self::Or => Some(0x1C),
|
||||
Self::Not => Some(0x1D),
|
||||
Self::Xor => Some(0x1E),
|
||||
Self::Nand => Some(0x1F),
|
||||
Self::Nor => Some(0x20),
|
||||
Self::Xnor => Some(0x21),
|
||||
Self::Int => Some(0x22),
|
||||
Self::Irt => Some(0x23),
|
||||
Self::Hlt => Some(0x24),
|
||||
Self::AddI => Some(0x25),
|
||||
Self::SubI => Some(0x26),
|
||||
// TODO: Maybe recombine pseudos?
|
||||
Self::Segment => Some(0x27),
|
||||
// Pseudo-instructions don't have opcode values
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn is_pseudo_instruction(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::Db
|
||||
| Self::Dh
|
||||
| Self::Dw
|
||||
| Self::Resb
|
||||
| Self::Resh
|
||||
| Self::Resw
|
||||
| Self::Push
|
||||
| Self::Pop
|
||||
| Self::Lwi
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Opcode {
|
||||
type Err = OpcodeFromStrError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"nop" => Ok(Self::Nop),
|
||||
"mov" => Ok(Self::Mov),
|
||||
"movs" => Ok(Self::Movs),
|
||||
"ldb" => Ok(Self::Ldb),
|
||||
"ldbs" => Ok(Self::Ldbs),
|
||||
"ldh" => Ok(Self::Ldh),
|
||||
"ldhs" => Ok(Self::Ldhs),
|
||||
"ldw" => Ok(Self::Ldw),
|
||||
"stb" => Ok(Self::Stb),
|
||||
"sth" => Ok(Self::Sth),
|
||||
"stw" => Ok(Self::Stw),
|
||||
"lli" => Ok(Self::Lli),
|
||||
"lui" => Ok(Self::Lui),
|
||||
"jmp" => Ok(Self::Jmp),
|
||||
"jeq" => Ok(Self::Jeq),
|
||||
"jne" => Ok(Self::Jne),
|
||||
"jgt" => Ok(Self::Jgt),
|
||||
"jge" => Ok(Self::Jge),
|
||||
"jlt" => Ok(Self::Jlt),
|
||||
"jle" => Ok(Self::Jle),
|
||||
"cmp" => Ok(Self::Cmp),
|
||||
"inc" => Ok(Self::Inc),
|
||||
"dec" => Ok(Self::Dec),
|
||||
"shl" => Ok(Self::Shl),
|
||||
"shr" => Ok(Self::Shr),
|
||||
"add" => Ok(Self::Add),
|
||||
"sub" => Ok(Self::Sub),
|
||||
"and" => Ok(Self::And),
|
||||
"or" => Ok(Self::Or),
|
||||
"not" => Ok(Self::Not),
|
||||
"xor" => Ok(Self::Xor),
|
||||
"nand" => Ok(Self::Nand),
|
||||
"nor" => Ok(Self::Nor),
|
||||
"xnor" => Ok(Self::Xnor),
|
||||
"int" => Ok(Self::Int),
|
||||
"irt" => Ok(Self::Irt),
|
||||
"hlt" => Ok(Self::Hlt),
|
||||
"addi" => Ok(Self::AddI),
|
||||
"subi" => Ok(Self::SubI),
|
||||
"db" => Ok(Self::Db),
|
||||
"dh" => Ok(Self::Dh),
|
||||
"dw" => Ok(Self::Dw),
|
||||
"resb" => Ok(Self::Resb),
|
||||
"resh" => Ok(Self::Resh),
|
||||
"resw" => Ok(Self::Resw),
|
||||
"push" => Ok(Self::Push),
|
||||
"pop" => Ok(Self::Pop),
|
||||
"lwi" => Ok(Self::Lwi),
|
||||
"include" => Ok(Self::Include),
|
||||
"call" => Ok(Self::Call),
|
||||
"return" => Ok(Self::Return),
|
||||
"pusha" => Ok(Self::Pusha),
|
||||
"popa" => Ok(Self::Popa),
|
||||
_ => Err(OpcodeFromStrError::InvalidOpcode(s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Opcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Nop => write!(f, "nop"),
|
||||
Self::Mov => write!(f, "mov"),
|
||||
Self::Movs => write!(f, "movs"),
|
||||
Self::Ldb => write!(f, "ldb"),
|
||||
Self::Ldbs => write!(f, "ldbs"),
|
||||
Self::Ldh => write!(f, "ldh"),
|
||||
Self::Ldhs => write!(f, "ldhs"),
|
||||
Self::Ldw => write!(f, "ldw"),
|
||||
Self::Stb => write!(f, "stb"),
|
||||
Self::Sth => write!(f, "sth"),
|
||||
Self::Stw => write!(f, "stw"),
|
||||
Self::Lli => write!(f, "lli"),
|
||||
Self::Lui => write!(f, "lui"),
|
||||
Self::Jmp => write!(f, "jmp"),
|
||||
Self::Jeq => write!(f, "jeq"),
|
||||
Self::Jne => write!(f, "jne"),
|
||||
Self::Jgt => write!(f, "jgt"),
|
||||
Self::Jge => write!(f, "jge"),
|
||||
Self::Jlt => write!(f, "jlt"),
|
||||
Self::Jle => write!(f, "jle"),
|
||||
Self::Cmp => write!(f, "cmp"),
|
||||
Self::Inc => write!(f, "inc"),
|
||||
Self::Dec => write!(f, "dec"),
|
||||
Self::Shl => write!(f, "shl"),
|
||||
Self::Shr => write!(f, "shr"),
|
||||
Self::Add => write!(f, "add"),
|
||||
Self::Sub => write!(f, "sub"),
|
||||
Self::And => write!(f, "and"),
|
||||
Self::Or => write!(f, "or"),
|
||||
Self::Not => write!(f, "not"),
|
||||
Self::Xor => write!(f, "xor"),
|
||||
Self::Nand => write!(f, "nand"),
|
||||
Self::Nor => write!(f, "nor"),
|
||||
Self::Xnor => write!(f, "xnor"),
|
||||
Self::Int => write!(f, "int"),
|
||||
Self::Irt => write!(f, "irt"),
|
||||
Self::Hlt => write!(f, "hlt"),
|
||||
Self::AddI => write!(f, "addi"),
|
||||
Self::SubI => write!(f, "subi"),
|
||||
Self::Db => write!(f, "db"),
|
||||
Self::Dh => write!(f, "dh"),
|
||||
Self::Dw => write!(f, "dw"),
|
||||
Self::Resb => write!(f, "resb"),
|
||||
Self::Resh => write!(f, "resh"),
|
||||
Self::Resw => write!(f, "resw"),
|
||||
Self::Push => write!(f, "push"),
|
||||
Self::Pop => write!(f, "pop"),
|
||||
Self::Lwi => write!(f, "lwi"),
|
||||
Self::Call => write!(f, "call"),
|
||||
Self::Return => write!(f, "return"),
|
||||
Self::Pusha => write!(f, "pusha"),
|
||||
Self::Popa => write!(f, "popa"),
|
||||
|
||||
// meta instructions
|
||||
Self::Include => write!(f, "include"),
|
||||
Self::Data => write!(f, "data"),
|
||||
Self::Segment => write!(f, "[SEGMENT]"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
//! This module contains code for handling pseudo opcodes.
|
||||
|
||||
/// Pseudo instructions that cannot simply be lowered to ISA instructions.
|
||||
pub enum PseudoOpcode {}
|
||||
@@ -5,10 +5,9 @@
|
||||
use common::prelude::*;
|
||||
|
||||
use crate::source::{
|
||||
opcode::Opcode,
|
||||
source_info::SourceInfo,
|
||||
token_info::{
|
||||
DirectiveToken, InstructionToken, LabelToken, RegisterToken, SymbolToken,
|
||||
},
|
||||
token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
@@ -22,7 +21,7 @@ pub enum TokenType {
|
||||
/// String literal (e.g., `"hello world"`).
|
||||
String(String),
|
||||
/// Assembly instruction (e.g., `add`, `jmp`, `nop`).
|
||||
Instruction(InstructionToken),
|
||||
Instruction(Opcode),
|
||||
/// Label definition (e.g., `loop_start:`).
|
||||
Label(LabelToken),
|
||||
/// Assembler directive (e.g., `.global`, `.section`, `.dw`, `.resb`).
|
||||
@@ -65,11 +64,8 @@ impl Token {
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn instruction(mnemonic: String, source_info: SourceInfo) -> Self {
|
||||
Self::new(
|
||||
TokenType::Instruction(InstructionToken { mnemonic }),
|
||||
source_info,
|
||||
)
|
||||
pub const fn instruction(op: Opcode, source_info: SourceInfo) -> Self {
|
||||
Self::new(TokenType::Instruction(op), source_info)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
|
||||
@@ -21,14 +21,14 @@ pub struct RegisterToken {
|
||||
}
|
||||
|
||||
impl RegisterToken {
|
||||
#[must_use]
|
||||
pub const fn new(reg: Register) -> Self {
|
||||
Self { reg }
|
||||
}
|
||||
|
||||
/// Returns the name of a valid [`Register`]
|
||||
#[must_use]
|
||||
pub fn name(&self) -> String {
|
||||
self.reg.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct InstructionToken {
|
||||
pub mnemonic: String,
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
@@ -15,13 +16,12 @@ use crate::{
|
||||
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
|
||||
model::module::Module,
|
||||
source::{
|
||||
lines::lines_with_spans,
|
||||
lines::{LineSpan, lines_with_spans},
|
||||
load_source_bytes,
|
||||
opcode::Opcode,
|
||||
source_info::SourceInfo,
|
||||
token::{Token, TokenType},
|
||||
token_info::{
|
||||
DirectiveToken, LabelToken, RegisterToken, SymbolToken,
|
||||
},
|
||||
token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -38,10 +38,10 @@ pub struct Tokeniser {
|
||||
|
||||
// Pre-compiled regex patterns
|
||||
label_regex: Regex,
|
||||
// register_regex: Regex,
|
||||
register_regex: Regex,
|
||||
immediate_regex: Regex,
|
||||
directive_regex: Regex,
|
||||
// instruction_regex: Regex,
|
||||
instruction_regex: Regex,
|
||||
symbol_regex: Regex,
|
||||
string_regex: Regex,
|
||||
comment_regex: Regex,
|
||||
@@ -56,23 +56,25 @@ impl Tokeniser {
|
||||
|
||||
label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):")
|
||||
.expect("Failed to compile label regex pattern"),
|
||||
// register_regex: Regex::new(r"^(r[0-9]+|sp|fp|pc)")
|
||||
// .expect("Failed to compile register regex pattern"),
|
||||
register_regex: Regex::new(
|
||||
r"^(rg([0-9]|[a-f])|acc|spr|bpr|ret|idr|mmr|zero|noreg|pcx)\b",
|
||||
)
|
||||
.expect("Failed to compile register regex pattern"),
|
||||
immediate_regex: Regex::new(
|
||||
r"^(0x[0-9a-fA-F_]+|0b[0-1_]+|0o[0-7_]+|[0-9_]+)",
|
||||
)
|
||||
.expect("Failed to compile immediate regex pattern"),
|
||||
directive_regex: Regex::new(r"^\.([a-zA-Z]+)")
|
||||
directive_regex: Regex::new(r"^(res[bwh]|d[bwh]|include|section|global|local)\b")
|
||||
.expect("Failed to compile directive regex pattern"),
|
||||
// instruction_regex: Regex::new(
|
||||
// r"^(add|sub|mul|div|jmp|call|ret|lli|nop|halt)",
|
||||
// )
|
||||
// .expect("Failed to compile instruction regex pattern"),
|
||||
instruction_regex: Regex::new(
|
||||
r"^(nop|movs?|ld[bhw]s?|st[bhw]|l[lu]i|j(mp|[egl][qte])|cmp|[id]nc|sh[lr]|add[i]?|sub[i]?|x?n?or|and|not|i[rd]t|hlt|lhwmm|lidt|push[a]?|pop[a]?|lwi|return|call)\b",
|
||||
)
|
||||
.expect("Failed to compile instruction regex pattern"),
|
||||
symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)")
|
||||
.expect("Failed to compile symbol regex pattern"),
|
||||
string_regex: Regex::new(r#"^"([^"]*)"#)
|
||||
.expect("Failed to compile string regex pattern"),
|
||||
comment_regex: Regex::new("//.*")
|
||||
comment_regex: Regex::new("^//.*")
|
||||
.expect("Failed to compile comment regex pattern"),
|
||||
}
|
||||
}
|
||||
@@ -137,7 +139,7 @@ impl Tokeniser {
|
||||
|
||||
fn tokenize_line(
|
||||
&self,
|
||||
line_span: &crate::source::lines::LineSpan,
|
||||
line_span: &LineSpan,
|
||||
module: &Arc<Module>,
|
||||
) -> Result<Vec<Token>, AssembleError> {
|
||||
let mut tokens = Vec::new();
|
||||
@@ -188,9 +190,11 @@ impl Tokeniser {
|
||||
}
|
||||
|
||||
fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
_ = self;
|
||||
let caps = self.register_regex.captures(input)?;
|
||||
let reg = caps.get(1)?.as_str();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
let reg = match Register::try_from(input) {
|
||||
let reg = match Register::try_from(reg) {
|
||||
Ok(reg) => reg,
|
||||
Err(_why) => {
|
||||
// Probably ignore the error.
|
||||
@@ -198,8 +202,6 @@ impl Tokeniser {
|
||||
}
|
||||
};
|
||||
|
||||
let len = input.len();
|
||||
|
||||
Some((TokenType::Register(RegisterToken { reg }), len))
|
||||
}
|
||||
|
||||
@@ -234,15 +236,14 @@ impl Tokeniser {
|
||||
Some((TokenType::Directive(DirectiveToken { directive }), len))
|
||||
}
|
||||
|
||||
const fn try_match_instruction(&self, _input: &str) -> Option<(TokenType, usize)> {
|
||||
_ = self;
|
||||
fn try_match_instruction(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.instruction_regex.captures(input)?;
|
||||
let mnemonic = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
// let instruction =
|
||||
// Some((TokenType::Instruction(InstructionToken { mnemonic }), len))
|
||||
let op = Opcode::from_str(&mnemonic).ok()?;
|
||||
|
||||
// TODO: fix me.
|
||||
|
||||
None
|
||||
Some((TokenType::Instruction(op), len))
|
||||
}
|
||||
|
||||
fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
@@ -262,6 +263,14 @@ impl Tokeniser {
|
||||
}
|
||||
|
||||
fn match_token(&self, input: &str) -> Result<(TokenType, usize), AssembleError> {
|
||||
if let Some(m) = self.try_match_directive(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_instruction(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_comment(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
@@ -278,14 +287,6 @@ impl Tokeniser {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_directive(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_instruction(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_string(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum TokeniserError {}
|
||||
|
||||
impl TokeniserError {}
|
||||
|
||||
impl std::fmt::Display for TokeniserError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "TODO!!!!!!")
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
//! Unit tests for the tokenizer
|
||||
|
||||
use common::prelude::Register;
|
||||
|
||||
use crate::{
|
||||
context::AssemblerContext,
|
||||
source::{
|
||||
opcode::Opcode,
|
||||
token::{Token, TokenType},
|
||||
token_info::RegisterToken,
|
||||
tokeniser::Tokeniser,
|
||||
},
|
||||
};
|
||||
@@ -65,7 +69,7 @@ fn test_single_instruction() {
|
||||
.any(|t| matches!(t, TokenType::Instruction(_)))
|
||||
);
|
||||
if let TokenType::Instruction(instr) = &tokens[0].token_type {
|
||||
assert_eq!(instr.mnemonic, "add");
|
||||
assert_eq!(instr.to_string(), "add");
|
||||
} else {
|
||||
panic!("Expected instruction token");
|
||||
}
|
||||
@@ -73,15 +77,13 @@ fn test_single_instruction() {
|
||||
|
||||
#[test]
|
||||
fn test_all_instructions() {
|
||||
let instructions = [
|
||||
"add", "sub", "mul", "div", "jmp", "call", "ret", "lli", "nop", "halt",
|
||||
];
|
||||
let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"];
|
||||
|
||||
for instr in &instructions {
|
||||
let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");
|
||||
|
||||
if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
|
||||
assert_eq!(parsed_instr.mnemonic, *instr);
|
||||
assert_eq!(parsed_instr.to_string(), *instr);
|
||||
} else {
|
||||
panic!("Expected instruction token for {instr}");
|
||||
}
|
||||
@@ -90,7 +92,7 @@ fn test_all_instructions() {
|
||||
|
||||
#[test]
|
||||
fn test_registers() {
|
||||
let test_cases = [("rg0", "r0"), ("rgf", "rgf"), ("pcx", "pcx")];
|
||||
let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize register");
|
||||
@@ -149,10 +151,9 @@ fn test_labels() {
|
||||
#[test]
|
||||
fn test_directives() {
|
||||
let test_cases = [
|
||||
(".global", "global"),
|
||||
(".section", "section"),
|
||||
(".data", "data"),
|
||||
(".text", "text"),
|
||||
("global", "global"),
|
||||
("section", "section"),
|
||||
("local", "local"),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
@@ -185,3 +186,52 @@ fn test_symbols() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_instruction_line() {
|
||||
let source = "addi rg1, rg2, 0xFF";
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction");
|
||||
|
||||
// Should have: instruction, register, comma, register, comma, immediate, newline, EOF
|
||||
assert!(tokens.len() >= 6);
|
||||
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
|
||||
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
|
||||
assert!(matches!(tokens[2].token_type, TokenType::Comma));
|
||||
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
|
||||
assert!(matches!(tokens[4].token_type, TokenType::Comma));
|
||||
assert!(matches!(tokens[5].token_type, TokenType::Immediate(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiline_with_comments() {
|
||||
const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [
|
||||
TokenType::Instruction(Opcode::Add),
|
||||
TokenType::Register(RegisterToken::new(Register::Rg0)),
|
||||
TokenType::Comma,
|
||||
TokenType::Register(RegisterToken::new(Register::Rg1)),
|
||||
TokenType::Newline,
|
||||
TokenType::Instruction(Opcode::SubI),
|
||||
TokenType::Register(RegisterToken::new(Register::Rg2)),
|
||||
TokenType::Comma,
|
||||
TokenType::Immediate(10),
|
||||
TokenType::Newline,
|
||||
TokenType::Eof,
|
||||
];
|
||||
|
||||
const SOURCE: &str = r"add rg0, rg1 // Another comment
|
||||
subi rg2, 10";
|
||||
|
||||
let tokens =
|
||||
tokenize_source(SOURCE).expect("Failed to tokenise source with comments");
|
||||
let token_types = extract_token_types(&tokens);
|
||||
|
||||
assert_eq!(
|
||||
token_types.len(),
|
||||
EXPECTED_TOKEN_TYPES.len(),
|
||||
"{token_types:#?}"
|
||||
);
|
||||
|
||||
for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) {
|
||||
assert!(!(expected != *got), "Expected {expected:?}, got {got:?}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
use crate::{instructions::encode::Encode, prelude::*};
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
|
||||
pub enum Interrupt {
|
||||
Software(u8),
|
||||
Breakpoint,
|
||||
#[default]
|
||||
HardFault,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//! Various types of arguments that instructions can take, alongside encoding and decoding logic.
|
||||
//! Various types of arguments that instructions can take, alongside encoding and decoding
|
||||
//! logic.
|
||||
|
||||
use crate::{
|
||||
instructions::{RegisterParseError, encode::Encode},
|
||||
@@ -35,18 +36,20 @@ impl std::fmt::Display for ArgsDecodeError {
|
||||
|
||||
impl std::error::Error for ArgsDecodeError {}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
/// Used by instructions with 2 registers and an immediate argument.
|
||||
pub struct ITypeArgs {
|
||||
pub immediate: u16,
|
||||
pub r1: Register,
|
||||
/// May not actually be used by some instructions taking an immediate e.g. LUI. This is solved by making the constructor take Options.
|
||||
/// May not actually be used by some instructions taking an immediate e.g. LUI. This
|
||||
/// is solved by making the constructor take Options.
|
||||
pub r2: Register,
|
||||
}
|
||||
|
||||
impl ITypeArgs {
|
||||
#[must_use]
|
||||
/// Creates a new [`ITypeArgs`]. If r1 or r2 is unset, they will be replaced with [`Register::NoReg`].
|
||||
/// Creates a new [`ITypeArgs`]. If r1 or r2 is unset, they will be replaced with
|
||||
/// [`Register::NoReg`].
|
||||
pub fn new(immediate: u16, r1: Option<Register>, r2: Option<Register>) -> Self {
|
||||
let r1 = r1.unwrap_or_default();
|
||||
let r2 = r2.unwrap_or_default();
|
||||
@@ -56,8 +59,8 @@ impl ITypeArgs {
|
||||
}
|
||||
|
||||
impl Encode for ITypeArgs {
|
||||
/// Encodes an I-type instruction from its fields. These must have some unused high-order
|
||||
/// bits set to 0 else the bit shifting logic gets fucked.
|
||||
/// Encodes an I-type instruction from its fields. These must have some unused
|
||||
/// high-order bits set to 0 else the bit shifting logic gets fucked.
|
||||
fn encode(self, opcode: u8) -> u32 {
|
||||
let opcode = u32::from(opcode);
|
||||
let r1 = self.r1 as u32;
|
||||
@@ -84,7 +87,7 @@ impl TryFrom<u32> for ITypeArgs {
|
||||
}
|
||||
|
||||
/// Used by instructions not using immediates (besides 5 bit shift values).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub struct RTypeArgs {
|
||||
pub sr1: Register,
|
||||
pub sr2: Register,
|
||||
@@ -95,7 +98,8 @@ pub struct RTypeArgs {
|
||||
|
||||
impl RTypeArgs {
|
||||
#[must_use]
|
||||
/// Creates a new [`RTypeArgs`]. If any registers are unset, they will be replaced with [`Register::NoReg`]. If `shamt` is unset, it will be set to 0.
|
||||
/// Creates a new [`RTypeArgs`]. If any registers are unset, they will be replaced
|
||||
/// with [`Register::NoReg`]. If `shamt` is unset, it will be set to 0.
|
||||
pub fn new(
|
||||
sr1: Option<Register>,
|
||||
sr2: Option<Register>,
|
||||
@@ -122,7 +126,8 @@ impl Encode for RTypeArgs {
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `shamt`: The amount to shift value (used only in shift instructions, otherwise 0).
|
||||
/// - `shamt`: The amount to shift value (used only in shift instructions, otherwise
|
||||
/// 0).
|
||||
fn encode(self, opcode: u8) -> u32 {
|
||||
let opcode = u32::from(opcode);
|
||||
let sr1 = self.sr1 as u32;
|
||||
|
||||
Reference in New Issue
Block a user