174 lines
5.0 KiB
Rust
174 lines
5.0 KiB
Rust
use std::str::FromStr;
|
|
|
|
use crate::assembler::AssembleError;
|
|
use crate::assembler::model::{Module, Opcode, Symbol, Token};
|
|
use common::prelude::Register;
|
|
|
|
pub fn lexer(mut program: String, module: u64) -> Result<Vec<Token>, AssembleError> {
|
|
let mut tokens = Vec::new();
|
|
|
|
let lines = program.lines();
|
|
let mut literal = String::new();
|
|
|
|
for line in lines {
|
|
for (i, token) in line.split_whitespace().enumerate() {
|
|
if token.starts_with("//") {
|
|
break;
|
|
}
|
|
|
|
if let Some(stripped) = token.strip_prefix('"') {
|
|
literal.push_str(stripped);
|
|
}
|
|
|
|
if !literal.is_empty() {
|
|
if !token.starts_with('"') {
|
|
if i > 0 {
|
|
literal.push(' ');
|
|
}
|
|
literal.push_str(token);
|
|
}
|
|
|
|
if token.ends_with('"') {
|
|
literal.pop(); // remove the closing quote
|
|
|
|
tokens.push(Token::StringLit(literal));
|
|
literal = String::new();
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
let token = token.trim_end_matches(',');
|
|
if token.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
if let Some(token) = parse_register(token)? {
|
|
tokens.push(token);
|
|
} else if let Some(token) = parse_opcode(token)? {
|
|
tokens.push(token);
|
|
} else if let Some(token) = parse_hex(token)? {
|
|
tokens.push(token);
|
|
} else if let Some(token) = parse_octal(token)? {
|
|
tokens.push(token);
|
|
} else if let Some(token) = parse_binary(token)? {
|
|
tokens.push(token);
|
|
} else if let Some(token) = parse_decimal(token)? {
|
|
tokens.push(token);
|
|
} else if let Some(token) = parse_label(token, module)? {
|
|
tokens.push(token);
|
|
} else if let Some(token) = parse_symbol(token, module)? {
|
|
tokens.push(token);
|
|
} else {
|
|
return Err(AssembleError::Generic);
|
|
}
|
|
}
|
|
}
|
|
|
|
// println!("{:#?}", tokens);
|
|
|
|
Ok(tokens)
|
|
}
|
|
pub fn parse_register(token: &str) -> Result<Option<Token>, AssembleError> {
|
|
Ok(Register::try_from(token).map(Token::Register).ok())
|
|
}
|
|
|
|
pub fn parse_opcode(token: &str) -> Result<Option<Token>, AssembleError> {
|
|
if Opcode::OPCODES.contains(&token) {
|
|
Ok(Some(Token::Opcode(Opcode::from_str(token).expect(
|
|
"Opcode::from_str failed for a valid opcode token",
|
|
))))
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
pub fn parse_hex(token: &str) -> Result<Option<Token>, AssembleError> {
|
|
if (token.len() < 3) | !token.starts_with("0x") {
|
|
return Ok(None);
|
|
}
|
|
|
|
let Some(lit) = &token.get(2..) else {
|
|
return Err(AssembleError::InvalidArg);
|
|
};
|
|
|
|
u32::from_str_radix(lit, 16).map_or(Err(AssembleError::Generic), |value| {
|
|
Ok(Some(Token::Immediate(value)))
|
|
})
|
|
}
|
|
|
|
pub fn parse_octal(token: &str) -> Result<Option<Token>, AssembleError> {
|
|
if (token.len() < 3) | !token.starts_with("0o") {
|
|
return Ok(None);
|
|
}
|
|
|
|
let Some(lit) = &token.get(2..) else {
|
|
return Err(AssembleError::InvalidArg);
|
|
};
|
|
|
|
u32::from_str_radix(lit, 8).map_or(Err(AssembleError::Generic), |value| {
|
|
Ok(Some(Token::Immediate(value)))
|
|
})
|
|
}
|
|
|
|
pub fn parse_binary(token: &str) -> Result<Option<Token>, AssembleError> {
|
|
if (token.len() < 3) | !token.starts_with("0b") {
|
|
return Ok(None);
|
|
}
|
|
|
|
let Some(lit) = &token.get(2..) else {
|
|
return Err(AssembleError::InvalidArg);
|
|
};
|
|
|
|
u32::from_str_radix(lit, 2).map_or(Err(AssembleError::Generic), |value| {
|
|
Ok(Some(Token::Immediate(value)))
|
|
})
|
|
}
|
|
|
|
pub fn parse_decimal(token: &str) -> Result<Option<Token>, AssembleError> {
|
|
let Ok(tok) = token.parse::<u32>() else {
|
|
return Ok(None);
|
|
};
|
|
|
|
Ok(Some(Token::Immediate(tok)))
|
|
}
|
|
|
|
pub fn parse_label(token: &str, module: u64) -> Result<Option<Token>, AssembleError> {
|
|
if token.ends_with(':') {
|
|
Ok(Some(Token::Symbol(Symbol {
|
|
name: token[0..token.len() - 1].to_string(),
|
|
module: Module::Resolved(module),
|
|
})))
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
pub fn parse_symbol(token: &str, module: u64) -> Result<Option<Token>, AssembleError> {
|
|
let Some(tokc) = token.chars().next() else {
|
|
return Err(AssembleError::Generic); // TODO: What is this error?
|
|
};
|
|
|
|
if tokc.is_numeric() {
|
|
return Ok(None);
|
|
}
|
|
|
|
let mut split = token.splitn(2, "::");
|
|
let Some(symbol1) = split.next() else {
|
|
return Err(AssembleError::InvalidArg);
|
|
};
|
|
let symbol1 = symbol1.to_string();
|
|
|
|
if let Some(symbol2) = split.next() {
|
|
Ok(Some(Token::Symbol(Symbol {
|
|
name: symbol2.to_string(),
|
|
module: Module::Unresolved(symbol1),
|
|
})))
|
|
} else {
|
|
Ok(Some(Token::Symbol(Symbol {
|
|
name: symbol1,
|
|
module: Module::Resolved(module),
|
|
})))
|
|
}
|
|
}
|