refactor & fixed assembler path handling

This commit is contained in:
2025-06-21 04:05:22 +01:00
parent 42c26d4184
commit 528ceddade
17 changed files with 447 additions and 184 deletions
+3 -1
View File
@@ -13,4 +13,6 @@ name = "assembler"
path = "src/lib.rs"
[dependencies]
common = { path = "../common" }
common = { path = "../common" }
num_cpus = "1.17.0"
threadpool = "1.8.1"
+131
View File
@@ -0,0 +1,131 @@
//! Macros used throughout the assembler
use crate::assembler::model::{Node, Opcode, Symbol, Token};
/// Parse DSA assembly code with optional formatting
///
/// # Examples
/// ```
/// // With formatting:
/// let nodes = dsa!(hash, "mov r1, {}", 42)?;
///
/// // Without formatting:
/// let nodes = dsa!(hash, "mov r1, 42")?;
/// ```
#[macro_export]
macro_rules! dsa {
// Version with formatting arguments
($hash:expr, $input:expr, $($args:expr),+) => {{
let input = format!($input, $($args),+);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
// Version without formatting
($hash:expr, $input:expr) => {{
let input = String::from($input);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
}
/// Creates a new Node with the given symbol, opcode, and tokens
#[macro_export]
macro_rules! node {
($symbol: expr, $opcode: expr, args: $tokens: expr) => {
$crate::assembler::model::Node::new($symbol.clone(), $opcode.clone(), $tokens.clone())
};
($symbol: expr, $opcode: expr, $($tokens: expr),+) => {
$crate::assembler::model::Node::new(
$symbol.clone(),
$opcode.clone(),
vec![$(node!(@convert_token $tokens)),+]
)
};
(@convert_token $token: literal) => {
$crate::assembler::model::Token::Immediate($token)
};
(@convert_token $token: expr) => {
$token.clone()
};
}
/// Extracts a specific token type from a token
#[macro_export]
macro_rules! expect_token {
($token:expr, Symbol) => {
match $token {
$crate::assembler::model::Token::Symbol(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Symbol,
)),
}
};
($token:expr, Register) => {
match $token {
$crate::assembler::model::Token::Register(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Register,
)),
}
};
($token:expr, Immediate) => {
match $token {
$crate::assembler::model::Token::Immediate(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Immediate,
)),
}
};
($token:expr, StringLit) => {
match $token {
$crate::assembler::model::Token::StringLit(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::StringLit,
)),
}
};
($token:expr, Opcode) => {
match $token {
$crate::assembler::model::Token::Opcode(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Opcode,
)),
}
};
}
/// Checks if a token matches any of the specified types
#[macro_export]
macro_rules! expect_type {
($token:expr, $($variant:ident),+) => {{
let token = $token;
match &token {
$(
$crate::assembler::model::Token::$variant(_) => Ok(token.clone()),
)+
other => {
let expected_type = expect_type!(@get_first_type $($variant),+);
Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone().clone(),
expected_type,
))
}
}
}};
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Symbol };
(@get_first_type Register $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Register };
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Immediate };
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::assembler::model::TokenType::StringLit };
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Opcode };
}
+98 -118
View File
@@ -1,21 +1,23 @@
#![allow(dead_code, unused)]
use std::{
collections::HashSet,
fmt, fs,
hash::{DefaultHasher, Hash, Hasher},
path::{Path, PathBuf},
sync::mpsc,
};
use common::prelude::Instruction;
use crate::{
assembler::{
expand::expand_pseudo_ops,
model::{Node, Opcode, Symbol, Token, TokenType},
parser::{Parser, Program},
resolver::{create_sections, resolve_dependencies, resolve_symbols},
},
codegen, log, node,
};
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
fn log(message: &str) {
println!("\x1b[32mINFO:\x1b[0m {message}");
}
// Module declarations
#[macro_use]
pub mod macros;
pub mod codegen;
pub mod expand;
@@ -24,25 +26,91 @@ pub mod model;
pub mod parser;
pub mod resolver;
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
let mut modules = HashSet::<u64>::new();
let mut program = Program::new();
// Re-exports
pub use self::{
codegen::codegen,
expand::expand_pseudo_ops,
lexer::lexer,
model::{Module, Node, Opcode, Symbol, Token, TokenType},
parser::{Parser, Program},
resolver::{create_sections, resolve_dependencies, resolve_symbols},
};
let hash = quick_hash(src);
modules.insert(hash);
use crate::util::logging::{Entry, Logger};
prepare_dependency(src, &mut modules, &mut program)?;
let mut nodes = program.nodes;
pub struct CompilerEngine {
modules: HashSet<u64>,
program: Program,
logger: Option<Logger>,
receiver: Option<mpsc::Receiver<Entry>>,
result: Option<Result<Vec<Instruction>, AssembleError>>,
}
create_sections(&mut nodes)?;
resolve_symbols(&mut nodes)?;
let instructions = codegen(nodes)?;
for inst in instructions.iter() {
println!("{inst}");
impl CompilerEngine {
pub fn new() -> CompilerEngine {
let (tx, rx) = mpsc::channel::<Entry>();
CompilerEngine {
program: Program::new(),
modules: HashSet::new(),
logger: Some(Logger::new(tx)),
receiver: Some(rx),
result: None,
}
}
Ok(instructions)
pub fn get_log() -> Option<Entry> {
None
}
pub fn get_logs() -> Vec<Entry> {
vec![]
}
pub fn is_ready(&self) -> bool {
self.result.is_some()
}
pub fn result(&self) -> Option<Result<Vec<Instruction>, AssembleError>> {
self.result.clone()
}
pub fn assemble(&mut self, src: &Path) -> Result<(), AssembleError> {
let hash = quick_hash(src);
if self.modules.contains(&hash) {
return Ok(());
}
prepare_dependency(src, &mut self.modules, &mut self.program)?;
self.result = Some(self.build());
Ok(())
}
fn load_module(&mut self, path: &Path) -> Result<(), AssembleError> {
Ok(())
}
fn build(&self) -> Result<Vec<Instruction>, AssembleError> {
let mut nodes = self.program.nodes.clone();
create_sections(&mut nodes)?;
resolve_symbols(&mut nodes)?;
let instructions = codegen(nodes)?;
for inst in instructions.iter() {
println!("{inst}");
}
Ok(instructions)
}
}
impl Default for CompilerEngine {
fn default() -> Self {
Self::new()
}
}
fn prepare_dependency(
@@ -71,9 +139,13 @@ fn prepare_dependency(
let parsed = Parser::parse_nodes(tokens)?;
log(&format!("{:20} {:20}", "Resolving Deps", filename));
let nodes = resolve_dependencies(parsed)?;
// Get the parent directory of the source file to use as the base directory
let base_dir = path
.parent()
.ok_or_else(|| AssembleError::InvalidFile(path.to_path_buf()))?;
let nodes = resolve_dependencies(parsed, base_dir)?;
let deps = Parser::get_dependencies(&nodes)?;
let deps = Parser::get_dependencies(&nodes, path)?;
log(&format!(
"{:20} {:20}",
@@ -122,7 +194,7 @@ pub fn disassemble(_: Vec<Instruction>) -> String {
todo!()
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum AssembleError {
Generic,
UnexpectedEof,
@@ -154,95 +226,3 @@ fn quick_hash(value: &Path) -> u64 {
value.canonicalize().unwrap().to_str().hash(&mut hasher);
hasher.finish()
}
#[macro_export]
macro_rules! dsa {
// Version with formatting arguments
($hash:expr, $input:expr, $($args:expr),+) => {{
let input = format!($input, $($args),+);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
// Version without formatting
($hash:expr, $input:expr) => {{
let input = String::from($input);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
}
#[macro_export]
macro_rules! expect_token {
($token:expr, Symbol) => {
match $token {
$crate::assembler::model::Token::Symbol(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Symbol,
)),
}
};
($token:expr, Register) => {
match $token {
$crate::assembler::model::Token::Register(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Register,
)),
}
};
($token:expr, Immediate) => {
match $token {
$crate::assembler::model::Token::Immediate(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Immediate,
)),
}
};
($token:expr, StringLit) => {
match $token {
$crate::assembler::model::Token::StringLit(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::StringLit,
)),
}
};
($token:expr, Opcode) => {
match $token {
$crate::assembler::model::Token::Opcode(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Opcode,
)),
}
};
}
#[macro_export]
macro_rules! expect_type {
($token:expr, $($variant:ident),+) => {{
let token = $token;
match &token {
$(
$crate::assembler::model::Token::$variant(_) => Ok(token.clone()),
)+
other => {
let expected_type = expect_type!(@get_first_type $($variant),+);
Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone().clone(),
expected_type,
))
}
}
}};
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Symbol };
(@get_first_type Register $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Register };
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Immediate };
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::assembler::model::TokenType::StringLit };
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Opcode };
}
-23
View File
@@ -11,29 +11,6 @@ pub struct Node {
pub tokens: Vec<Token>,
}
#[macro_export]
macro_rules! node {
($symbol: expr, $opcode: expr, args: $tokens: expr) => {
Node::new($symbol.clone(), $opcode.clone(), $tokens.clone())
};
($symbol: expr, $opcode: expr, $($tokens: expr),+) => {
Node::new(
$symbol.clone(),
$opcode.clone(),
vec![$(node!(@convert_token $tokens)),+]
)
};
(@convert_token $token: literal) => {
Token::Immediate($token)
};
(@convert_token $token: expr) => {
$token.clone()
};
}
impl Node {
pub fn new(symbol: Option<Symbol>, opcode: Opcode, tokens: Vec<Token>) -> Node {
Node {
+22 -6
View File
@@ -1,4 +1,4 @@
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use crate::{assembler::AssembleError, expect_token, expect_type, node};
@@ -10,6 +10,7 @@ pub struct Parser {
nodes: Vec<Node>,
}
#[derive(Debug)]
pub struct Program {
pub nodes: Vec<Node>,
}
@@ -44,8 +45,6 @@ impl Parser {
nodes: vec![],
};
println!("{:#?}", tokens);
while !self_.tokens.is_empty() {
let ins = self_.parse_instruction()?;
self_.nodes.push(ins);
@@ -54,12 +53,29 @@ impl Parser {
Ok(self_.nodes.clone())
}
pub fn get_dependencies(nodes: &Vec<Node>) -> Result<Vec<PathBuf>, AssembleError> {
pub fn get_dependencies(
nodes: &Vec<Node>,
source_path: &Path,
) -> Result<Vec<PathBuf>, AssembleError> {
let mut dependencies = Vec::new();
// Get the parent directory of the source file to use as the base directory
let base_dir = source_path
.parent()
.ok_or_else(|| AssembleError::InvalidFile(source_path.to_path_buf()))?;
for node in nodes {
if let Opcode::Include = node.opcode() {
let path = expect_token!(node.args().get(1).unwrap(), StringLit)?;
dependencies.push(PathBuf::from(path));
let path_str = expect_token!(node.args().get(1).unwrap(), StringLit)?;
let path = PathBuf::from(path_str);
// If the path is not absolute, make it relative to the base directory
let full_path = if path.is_absolute() {
path
} else {
base_dir.join(path)
};
dependencies.push(full_path);
}
}
Ok(dependencies)
+20 -8
View File
@@ -1,9 +1,16 @@
use std::{collections::HashMap, path::PathBuf};
use std::{
collections::HashMap,
fs::canonicalize,
path::{Path, PathBuf},
};
use common::prelude::Register;
use crate::assembler::model::{Module, Node, Opcode, Symbol, Token};
use crate::assembler::quick_hash;
use crate::assembler::{
log,
model::{Module, Node, Opcode, Symbol, Token},
};
use crate::{assembler::AssembleError, node};
pub fn resolve_symbols(nodes: &mut [Node]) -> Result<(), AssembleError> {
@@ -63,7 +70,10 @@ fn generate_symbol_table(nodes: &[Node]) -> Result<HashMap<Symbol, u32>, Assembl
Ok(table)
}
pub fn resolve_dependencies(mut nodes: Vec<Node>) -> Result<Vec<Node>, AssembleError> {
pub fn resolve_dependencies(
mut nodes: Vec<Node>,
base_dir: &Path,
) -> Result<Vec<Node>, AssembleError> {
// First we get a list of imports.
let mut dependencies = Vec::new();
for node in &nodes {
@@ -79,11 +89,13 @@ pub fn resolve_dependencies(mut nodes: Vec<Node>) -> Result<Vec<Node>, AssembleE
} else {
unreachable!()
};
let hash = quick_hash(
&PathBuf::from(path)
.canonicalize()
.expect("ERROR: Invalid import path."),
);
let full_path = base_dir.join(path);
let canonical_path = full_path
.canonicalize()
.map_err(|_| AssembleError::InvalidFile(full_path.to_path_buf()))?;
let hash = quick_hash(&canonical_path);
dependencies.push((name, hash));
}
+1 -8
View File
@@ -1,15 +1,8 @@
use assembler::codegen::codegen;
pub mod assembler;
pub mod tooling;
mod util;
pub mod prelude {
pub use crate::assembler::assemble;
pub use crate::assembler::CompilerEngine;
pub use crate::assembler::disassemble;
}
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
fn log(message: &str) {
println!("\x1b[32mINFO:\x1b[0m {message}");
}
+38 -11
View File
@@ -1,7 +1,8 @@
use assembler::prelude::*;
use std::{fs, io::Write, path::PathBuf};
fn main() {
// parse args:
// Parse command line arguments
let args: Vec<String> = std::env::args().collect();
if args.len() == 2 && args[1] == "init" {
@@ -10,23 +11,49 @@ fn main() {
}
if args.len() != 5 || args[1] != "-i" || args[3] != "-o" {
eprintln!("Usage: binary_name -i input_path -o output_path");
eprintln!("Usage: {} -i input_path -o output_path", args[0]);
std::process::exit(1);
}
let input_path = &args[2];
let output_path = &args[4];
let src = PathBuf::from(input_path);
let mut output_file = fs::File::create(output_path).unwrap();
match assembler::assembler::assemble(&src) {
Ok(res) => {
res.iter().map(|i| i.encode()).for_each(|i| {
output_file.write_all(&i.to_le_bytes()).unwrap();
});
}
// Create the output file
let mut output_file = match fs::File::create(output_path) {
Ok(file) => file,
Err(e) => {
eprintln!("{e}");
eprintln!("Failed to create output file: {}", e);
std::process::exit(1);
}
};
// Initialize the compiler engine
let mut engine = CompilerEngine::new();
// Assemble the source file
if let Err(e) = engine.assemble(&src) {
eprintln!("Assembly error: {}", e);
std::process::exit(1);
}
// Build and write the output
match engine.result() {
Some(Ok(instructions)) => {
for instruction in instructions {
if let Err(e) = output_file.write_all(&instruction.encode().to_le_bytes())
{
eprintln!("Failed to write to output file: {}", e);
std::process::exit(1);
}
}
}
Some(Err(e)) => {
eprintln!("Build error: {}", e);
std::process::exit(1);
}
None => {
eprintln!("Build error: No result available");
std::process::exit(1);
}
}
+94
View File
@@ -0,0 +1,94 @@
use std::{fmt, sync::mpsc::Sender};
#[allow(dead_code)]
#[derive(Debug)]
pub struct Logger {
pub sender: Sender<Entry>,
}
impl Logger {
pub fn new(sender: Sender<Entry>) -> Self {
Self { sender }
}
pub fn debug<T: fmt::Display>(&self, message: T) {
self.sender
.send(Entry {
etype: EntryType::Debug,
message: message.to_string(),
})
.unwrap();
}
pub fn info<T: fmt::Display>(&self, message: T) {
self.sender
.send(Entry {
etype: EntryType::Info,
message: message.to_string(),
})
.unwrap();
}
pub fn warn<T: fmt::Display>(&self, message: T) {
self.sender
.send(Entry {
etype: EntryType::Warn,
message: message.to_string(),
})
.unwrap();
}
pub fn error<T: fmt::Display>(&self, message: T) {
self.sender
.send(Entry {
etype: EntryType::Error,
message: message.to_string(),
})
.unwrap();
}
pub fn fatal<T: fmt::Display>(&self, message: T) {
self.sender
.send(Entry {
etype: EntryType::Fatal,
message: message.to_string(),
})
.unwrap();
}
}
pub struct Entry {
etype: EntryType,
pub message: String,
}
#[derive(Copy, Clone, Eq, PartialEq)]
enum EntryType {
Debug,
Info,
Warn,
Error,
Fatal,
}
impl fmt::Display for EntryType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:<5}",
match self {
EntryType::Debug => "DEBUG",
EntryType::Info => "INFO",
EntryType::Warn => "WARN",
EntryType::Error => "ERROR",
EntryType::Fatal => "FATAL",
}
)
}
}
impl fmt::Display for Entry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}: {}", self.etype, self.message)
}
}
+2
View File
@@ -1,3 +1,5 @@
pub mod logging;
use std::io::Write;
pub fn input(prompt: &str) -> String {