Files
damn_simple_architecture/assembler/src/assembler/lexer.rs
T
2026-02-04 01:56:15 +00:00

174 lines
5.0 KiB
Rust

use std::str::FromStr;
use crate::assembler::AssembleError;
use crate::assembler::model::{Module, Opcode, Symbol, Token};
use common::prelude::Register;
pub fn lexer(mut program: String, module: u64) -> Result<Vec<Token>, AssembleError> {
let mut tokens = Vec::new();
let lines = program.lines();
let mut literal = String::new();
for line in lines {
for (i, token) in line.split_whitespace().enumerate() {
if token.starts_with("//") {
break;
}
if let Some(stripped) = token.strip_prefix('"') {
literal.push_str(stripped);
}
if !literal.is_empty() {
if !token.starts_with('"') {
if i > 0 {
literal.push(' ');
}
literal.push_str(token);
}
if token.ends_with('"') {
literal.pop(); // remove the closing quote
tokens.push(Token::StringLit(literal));
literal = String::new();
}
continue;
}
let token = token.trim_end_matches(',');
if token.is_empty() {
continue;
}
if let Some(token) = parse_register(token)? {
tokens.push(token);
} else if let Some(token) = parse_opcode(token)? {
tokens.push(token);
} else if let Some(token) = parse_hex(token)? {
tokens.push(token);
} else if let Some(token) = parse_octal(token)? {
tokens.push(token);
} else if let Some(token) = parse_binary(token)? {
tokens.push(token);
} else if let Some(token) = parse_decimal(token)? {
tokens.push(token);
} else if let Some(token) = parse_label(token, module)? {
tokens.push(token);
} else if let Some(token) = parse_symbol(token, module)? {
tokens.push(token);
} else {
return Err(AssembleError::Generic);
}
}
}
// println!("{:#?}", tokens);
Ok(tokens)
}
pub fn parse_register(token: &str) -> Result<Option<Token>, AssembleError> {
Ok(Register::try_from(token).map(Token::Register).ok())
}
pub fn parse_opcode(token: &str) -> Result<Option<Token>, AssembleError> {
if Opcode::OPCODES.contains(&token) {
Ok(Some(Token::Opcode(Opcode::from_str(token).expect(
"Opcode::from_str failed for a valid opcode token",
))))
} else {
Ok(None)
}
}
pub fn parse_hex(token: &str) -> Result<Option<Token>, AssembleError> {
if (token.len() < 3) | !token.starts_with("0x") {
return Ok(None);
}
let Some(lit) = &token.get(2..) else {
return Err(AssembleError::InvalidArg);
};
u32::from_str_radix(lit, 16).map_or(Err(AssembleError::Generic), |value| {
Ok(Some(Token::Immediate(value)))
})
}
pub fn parse_octal(token: &str) -> Result<Option<Token>, AssembleError> {
if (token.len() < 3) | !token.starts_with("0o") {
return Ok(None);
}
let Some(lit) = &token.get(2..) else {
return Err(AssembleError::InvalidArg);
};
u32::from_str_radix(lit, 8).map_or(Err(AssembleError::Generic), |value| {
Ok(Some(Token::Immediate(value)))
})
}
pub fn parse_binary(token: &str) -> Result<Option<Token>, AssembleError> {
if (token.len() < 3) | !token.starts_with("0b") {
return Ok(None);
}
let Some(lit) = &token.get(2..) else {
return Err(AssembleError::InvalidArg);
};
u32::from_str_radix(lit, 2).map_or(Err(AssembleError::Generic), |value| {
Ok(Some(Token::Immediate(value)))
})
}
pub fn parse_decimal(token: &str) -> Result<Option<Token>, AssembleError> {
let Ok(tok) = token.parse::<u32>() else {
return Ok(None);
};
Ok(Some(Token::Immediate(tok)))
}
pub fn parse_label(token: &str, module: u64) -> Result<Option<Token>, AssembleError> {
if token.ends_with(':') {
Ok(Some(Token::Symbol(Symbol {
name: token[0..token.len() - 1].to_string(),
module: Module::Resolved(module),
})))
} else {
Ok(None)
}
}
pub fn parse_symbol(token: &str, module: u64) -> Result<Option<Token>, AssembleError> {
let Some(tokc) = token.chars().next() else {
return Err(AssembleError::Generic); // TODO: What is this error?
};
if tokc.is_numeric() {
return Ok(None);
}
let mut split = token.splitn(2, "::");
let Some(symbol1) = split.next() else {
return Err(AssembleError::InvalidArg);
};
let symbol1 = symbol1.to_string();
if let Some(symbol2) = split.next() {
Ok(Some(Token::Symbol(Symbol {
name: symbol2.to_string(),
module: Module::Unresolved(symbol1),
})))
} else {
Ok(Some(Token::Symbol(Symbol {
name: symbol1,
module: Module::Resolved(module),
})))
}
}