refactoring assembler
This commit is contained in:
+11
-243
@@ -1,253 +1,21 @@
|
||||
use core::fmt;
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
fs,
|
||||
hash::{DefaultHasher, Hash, Hasher},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use assembler::codegen::codegen;
|
||||
use assembler::expand::expand_pseudo_ops;
|
||||
use assembler::model::{Node, Opcode, Symbol, Token, TokenType};
|
||||
use assembler::parser::{Parser, Program};
|
||||
use assembler::resolver::{create_sections, resolve_dependencies, resolve_symbols};
|
||||
use common::prelude::*;
|
||||
use core::fmt;
|
||||
|
||||
use crate::{
|
||||
codegen::codegen,
|
||||
expand::expand_pseudo_ops,
|
||||
model::{Node, Opcode, Symbol, Token, TokenType},
|
||||
parser::{Parser, Program},
|
||||
resolver::{create_sections, resolve_dependencies, resolve_symbols},
|
||||
};
|
||||
pub mod assembler;
|
||||
|
||||
pub mod codegen;
|
||||
pub mod expand;
|
||||
pub mod lexer;
|
||||
pub mod model;
|
||||
pub mod parser;
|
||||
pub mod resolver;
|
||||
use crate::assembler::lexer;
|
||||
|
||||
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
|
||||
let mut modules = HashSet::<u64>::new();
|
||||
let mut program = Program::new();
|
||||
|
||||
let hash = quick_hash(src);
|
||||
modules.insert(hash);
|
||||
|
||||
prepare_dependency(src, &mut modules, &mut program)?;
|
||||
let mut nodes = program.nodes;
|
||||
|
||||
create_sections(&mut nodes)?;
|
||||
resolve_symbols(&mut nodes)?;
|
||||
|
||||
let instructions = codegen(nodes)?;
|
||||
for inst in instructions.iter() {
|
||||
println!("{inst}");
|
||||
}
|
||||
|
||||
Ok(instructions)
|
||||
}
|
||||
|
||||
fn prepare_dependency(
|
||||
path: &Path,
|
||||
modules: &mut HashSet<u64>,
|
||||
program: &mut Program,
|
||||
) -> Result<(), AssembleError> {
|
||||
let filename = path.file_name().unwrap().to_str().unwrap();
|
||||
if let Ok(path) = path.canonicalize() {
|
||||
log(&format!(
|
||||
"{:20} {:20} [{}]",
|
||||
"Building",
|
||||
filename,
|
||||
path.display()
|
||||
));
|
||||
}
|
||||
|
||||
let src = fs::read_to_string(path)
|
||||
.map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?;
|
||||
let file_hash = quick_hash(path);
|
||||
|
||||
log(&format!("{:20} {:20}", "Tokenising", filename));
|
||||
let tokens = lexer::lexer(src, file_hash)?;
|
||||
|
||||
log(&format!("{:20} {:20}", "Parsing", filename));
|
||||
let parsed = Parser::parse_nodes(tokens)?;
|
||||
|
||||
log(&format!("{:20} {:20}", "Resolving Deps", filename));
|
||||
let nodes = resolve_dependencies(parsed)?;
|
||||
|
||||
let deps = Parser::get_dependencies(&nodes)?;
|
||||
|
||||
log(&format!(
|
||||
"{:20} {:20}",
|
||||
"Expanding PseudoInstructions", filename
|
||||
));
|
||||
let mut nodes = expand_pseudo_ops(nodes, file_hash)?;
|
||||
|
||||
// add a section instruction
|
||||
nodes.insert(
|
||||
0,
|
||||
node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)),
|
||||
);
|
||||
|
||||
for n in nodes.iter() {
|
||||
println!("{n}");
|
||||
}
|
||||
|
||||
program.add_module(nodes);
|
||||
|
||||
for dep in deps {
|
||||
log(&format!(
|
||||
"{:20} {:20}",
|
||||
"Including",
|
||||
dep.file_name().unwrap().to_str().unwrap()
|
||||
));
|
||||
|
||||
if !modules.contains(&quick_hash(&dep)) {
|
||||
modules.insert(quick_hash(&dep));
|
||||
prepare_dependency(dep.as_path(), modules, program)?
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn _build(_src: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
/// TODO: disassembling functionality
|
||||
/// - We probably don't need to implement this for a while yet.
|
||||
/// - This method should recover symbols such as labels and variables from the human
|
||||
/// written assembly, recognising sequences that are expansions of pseudo-instructions
|
||||
/// and reversing this to produce near enough the original source code.
|
||||
pub fn disassemble(_: Vec<Instruction>) -> String {
|
||||
todo!()
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum AssembleError {
|
||||
Generic,
|
||||
UnexpectedEof,
|
||||
InvalidFile(PathBuf),
|
||||
UnexpectedToken(Token, TokenType),
|
||||
InvalidArg,
|
||||
UndefinedSymbol(Symbol),
|
||||
}
|
||||
|
||||
impl fmt::Display for AssembleError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
AssembleError::Generic => write!(f, "Generic error"),
|
||||
AssembleError::UnexpectedToken(tok, expected) => {
|
||||
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
|
||||
}
|
||||
AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"),
|
||||
AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"),
|
||||
AssembleError::InvalidArg => write!(f, "Invalid argument"),
|
||||
AssembleError::UndefinedSymbol(symbol) => {
|
||||
write!(f, "Undefined symbol {symbol}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn quick_hash(value: &Path) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
value.canonicalize().unwrap().to_str().hash(&mut hasher);
|
||||
hasher.finish()
|
||||
pub mod prelude {
|
||||
pub use crate::assembler::assemble;
|
||||
pub use crate::assembler::disassemble;
|
||||
}
|
||||
|
||||
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
|
||||
fn log(message: &str) {
|
||||
println!("\x1b[32mINFO:\x1b[0m {message}");
|
||||
}
|
||||
|
||||
// create a macro that lexes and parses the input string into Nodes
|
||||
#[macro_export]
|
||||
macro_rules! dsa {
|
||||
// Version with formatting arguments
|
||||
($hash:expr, $input:expr, $($args:expr),+) => {{
|
||||
let input = format!($input, $($args),+);
|
||||
let tokens = $crate::lexer::lexer(input, $hash)?;
|
||||
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
|
||||
parsed
|
||||
}};
|
||||
// Version without formatting
|
||||
($hash:expr, $input:expr) => {{
|
||||
let input = String::from($input);
|
||||
let tokens = $crate::lexer::lexer(input, $hash)?;
|
||||
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
|
||||
parsed
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! expect_token {
|
||||
($token:expr, Symbol) => {
|
||||
match $token {
|
||||
$crate::model::Token::Symbol(value) => Ok(value.clone()),
|
||||
other => Err($crate::AssembleError::UnexpectedToken(
|
||||
other.clone(),
|
||||
$crate::model::TokenType::Symbol,
|
||||
)),
|
||||
}
|
||||
};
|
||||
($token:expr, Register) => {
|
||||
match $token {
|
||||
$crate::model::Token::Register(value) => Ok(value.clone()),
|
||||
other => Err($crate::AssembleError::UnexpectedToken(
|
||||
other.clone(),
|
||||
$crate::model::TokenType::Register,
|
||||
)),
|
||||
}
|
||||
};
|
||||
($token:expr, Immediate) => {
|
||||
match $token {
|
||||
$crate::model::Token::Immediate(value) => Ok(value.clone()),
|
||||
other => Err($crate::AssembleError::UnexpectedToken(
|
||||
other.clone(),
|
||||
$crate::model::TokenType::Immediate,
|
||||
)),
|
||||
}
|
||||
};
|
||||
($token:expr, StringLit) => {
|
||||
match $token {
|
||||
$crate::model::Token::StringLit(value) => Ok(value.clone()),
|
||||
other => Err($crate::AssembleError::UnexpectedToken(
|
||||
other.clone(),
|
||||
$crate::model::TokenType::StringLit,
|
||||
)),
|
||||
}
|
||||
};
|
||||
($token:expr, Opcode) => {
|
||||
match $token {
|
||||
$crate::model::Token::Opcode(value) => Ok(value.clone()),
|
||||
other => Err($crate::AssembleError::UnexpectedToken(
|
||||
other.clone(),
|
||||
$crate::model::TokenType::Opcode,
|
||||
)),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! expect_type {
|
||||
($token:expr, $($variant:ident),+) => {{
|
||||
let token = $token;
|
||||
match &token {
|
||||
$(
|
||||
$crate::model::Token::$variant(_) => Ok(token.clone()),
|
||||
)+
|
||||
other => {
|
||||
let expected_type = expect_type!(@get_first_type $($variant),+);
|
||||
Err($crate::AssembleError::UnexpectedToken(
|
||||
other.clone().clone(),
|
||||
expected_type,
|
||||
))
|
||||
}
|
||||
}
|
||||
}};
|
||||
|
||||
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::model::TokenType::Symbol };
|
||||
(@get_first_type Register $(, $rest:ident)*) => { $crate::model::TokenType::Register };
|
||||
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::model::TokenType::Immediate };
|
||||
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::model::TokenType::StringLit };
|
||||
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::model::TokenType::Opcode };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user