assembler: broke everything, currently modularising
This commit is contained in:
Generated
+1
@@ -270,6 +270,7 @@ dependencies = [
|
||||
"common",
|
||||
"num_cpus",
|
||||
"threadpool",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -17,3 +17,4 @@ clap = { version = "4.5.40", features = ["derive"] }
|
||||
common = { path = "../common" }
|
||||
num_cpus = "1.17.0"
|
||||
threadpool = "1.8.1"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
|
||||
@@ -6,8 +6,10 @@ use std::{
|
||||
thread::{self, JoinHandle},
|
||||
};
|
||||
|
||||
use crate::assembler::{AssembleError, Token, expand_pseudo_ops, lexer, quick_hash};
|
||||
use crate::assembler::{Node, Parser, resolve_dependencies};
|
||||
use crate::assembler::{Node, Parser, ProgramRef, Task, resolve_dependencies};
|
||||
use crate::assembler::{
|
||||
Token, error::AssembleError, expand_pseudo_ops, lexer, quick_hash,
|
||||
};
|
||||
use crate::util::logging::Logger;
|
||||
|
||||
// pub fn new_assemble(path: &Path) {
|
||||
@@ -55,71 +57,6 @@ impl Default for Program {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ProgramRef {
|
||||
program: Arc<Mutex<Program>>,
|
||||
}
|
||||
|
||||
impl ProgramRef {
|
||||
#[must_use]
|
||||
pub fn new(program: Program) -> Self {
|
||||
Self {
|
||||
program: Arc::new(Mutex::new(program)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(&self, path: &Path) {
|
||||
self.program
|
||||
.lock()
|
||||
.expect("Failed to acquire program lock")
|
||||
.registry
|
||||
.insert(quick_hash(path));
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn is_registered(&self, path: &Path) -> bool {
|
||||
self.program
|
||||
.lock()
|
||||
.expect("Failed to acquire program lock")
|
||||
.registry
|
||||
.contains(&quick_hash(path))
|
||||
}
|
||||
|
||||
// pub fn get_tasks(&self) -> Vec<&Task> {
|
||||
// self.program.lock().unwrap().tasks.iter().collect()
|
||||
// }
|
||||
|
||||
pub fn add_task(&self, task: Task) {
|
||||
self.program
|
||||
.lock()
|
||||
.expect("Failed to acquire program lock")
|
||||
.add_task(task);
|
||||
}
|
||||
|
||||
pub fn add_module(&self, module: Module) {
|
||||
self.program
|
||||
.lock()
|
||||
.expect("Failed to acquire program lock")
|
||||
.modules
|
||||
.push(module);
|
||||
}
|
||||
|
||||
pub fn log(&self, message: &str) {
|
||||
self.program
|
||||
.lock()
|
||||
.expect("Failed to acquire program lock")
|
||||
.logger
|
||||
.log(message);
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for ProgramRef {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
program: self.program.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Module {
|
||||
pub path: PathBuf,
|
||||
pub hash: u64,
|
||||
@@ -144,7 +81,8 @@ impl Module {
|
||||
}
|
||||
|
||||
pub fn build(path: PathBuf, program: ProgramRef) -> Result<Task, AssembleError> {
|
||||
// Spawn a thread that creates the main function and executes the lexer and parser.
|
||||
// Spawn a thread that creates the main function and executes the lexer and
|
||||
// parser.
|
||||
let handle = thread::spawn(move || {
|
||||
let mut module =
|
||||
Self::new(path.clone(), quick_hash(&path), Vec::new(), program.clone());
|
||||
@@ -154,7 +92,8 @@ impl Module {
|
||||
module.parse(tokens);
|
||||
module.expand();
|
||||
module.prepare_dependencies();
|
||||
module
|
||||
|
||||
Ok(module)
|
||||
}
|
||||
Err(why) => {
|
||||
eprintln!(
|
||||
@@ -162,13 +101,12 @@ impl Module {
|
||||
path.display()
|
||||
);
|
||||
|
||||
// TODO: Find a way to make this work without panicking.
|
||||
unreachable!()
|
||||
Err(why)
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Task { module: handle })
|
||||
Ok(Task::new(path, program)?)
|
||||
}
|
||||
|
||||
fn lex(&self) -> Result<Vec<Token>, AssembleError> {
|
||||
@@ -181,8 +119,13 @@ impl Module {
|
||||
));
|
||||
}
|
||||
|
||||
let src = fs::read_to_string(&self.path)
|
||||
.map_err(|_| AssembleError::InvalidFile(self.path.clone()))?;
|
||||
let src = fs::read_to_string(&self.path).map_err(|e| {
|
||||
AssembleError::Io(format!(
|
||||
"Failed to read file '{}': {}",
|
||||
self.path.display(),
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
let file_hash = quick_hash(&self.path);
|
||||
|
||||
@@ -258,7 +201,3 @@ impl Module {
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Task {
|
||||
module: JoinHandle<Module>,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
//! Compiler engine for orchestrating the assembly process.
|
||||
|
||||
use crate::assembler::{AssembleError, Program, Task};
|
||||
use common::prelude::Instruction;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Supported output formats for the assembler.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum OutputFormat {
|
||||
/// Flat binary executable
|
||||
Binary,
|
||||
/// ELF relocatable object file
|
||||
ElfObject,
|
||||
/// ELF executable
|
||||
ElfExecutable,
|
||||
}
|
||||
|
||||
/// Main compilation orchestrator that manages the assembly process.
|
||||
pub struct CompilerEngine {
|
||||
/// Configuration options for compilation
|
||||
pub output_format: OutputFormat,
|
||||
pub include_debug_info: bool,
|
||||
pub optimization_level: u8,
|
||||
}
|
||||
|
||||
impl CompilerEngine {
|
||||
/// Creates a new compiler engine with default settings.
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
output_format: OutputFormat::Binary,
|
||||
include_debug_info: false,
|
||||
optimization_level: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new compiler engine with specified output format.
|
||||
#[must_use]
|
||||
pub fn with_output_format(output_format: OutputFormat) -> Self {
|
||||
Self {
|
||||
output_format,
|
||||
include_debug_info: false,
|
||||
optimization_level: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the output format for compilation.
|
||||
pub fn set_output_format(&mut self, format: OutputFormat) {
|
||||
self.output_format = format;
|
||||
}
|
||||
|
||||
/// Enables or disables debug information generation.
|
||||
pub fn set_debug_info(&mut self, enabled: bool) {
|
||||
self.include_debug_info = enabled;
|
||||
}
|
||||
|
||||
/// Sets the optimization level (0-3).
|
||||
pub fn set_optimization_level(&mut self, level: u8) {
|
||||
self.optimization_level = level.min(3);
|
||||
}
|
||||
|
||||
/// Main assembly function that orchestrates the entire compilation process.
|
||||
pub fn assemble(
|
||||
&self,
|
||||
main_path: &Path,
|
||||
output_path: Option<&Path>,
|
||||
) -> Result<Vec<Instruction>, AssembleError> {
|
||||
let program = Program::new();
|
||||
|
||||
// Set the main path in the program
|
||||
program.set_main_path(main_path.to_path_buf())?;
|
||||
|
||||
// Create and execute the main compilation task
|
||||
let main_task = Task::new(main_path.to_path_buf(), program.clone())?;
|
||||
let module = main_task.join()?;
|
||||
|
||||
program.add_module(module)?;
|
||||
|
||||
// Wait for all dependency compilation tasks to complete
|
||||
self.wait_for_completion(&program)?;
|
||||
|
||||
// Generate final instructions
|
||||
let instructions = self.generate_instructions(&program)?;
|
||||
|
||||
Ok(instructions)
|
||||
}
|
||||
|
||||
/// Waits for all compilation tasks to complete.
|
||||
fn wait_for_completion(&self, program: &Program) -> Result<(), AssembleError> {
|
||||
let tasks = program.get_tasks()?;
|
||||
|
||||
for task in tasks {
|
||||
let module = task.join()?;
|
||||
program.add_module(module)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generates the final instruction stream from all compiled modules.
|
||||
fn generate_instructions(
|
||||
&self,
|
||||
program: &Program,
|
||||
) -> Result<Vec<Instruction>, AssembleError> {
|
||||
let mut all_nodes = Vec::new();
|
||||
|
||||
// Collect all nodes from all modules
|
||||
for module in program.get_modules()? {
|
||||
all_nodes.extend(module.nodes.clone());
|
||||
}
|
||||
|
||||
// Apply resolution and code generation
|
||||
crate::assembler::create_sections(&mut all_nodes)?;
|
||||
crate::assembler::resolve_symbols(&mut all_nodes)?;
|
||||
crate::assembler::codegen(all_nodes)
|
||||
}
|
||||
|
||||
/// Determines the default output path based on input path and output format.
|
||||
fn default_output_path(&self, input_path: &Path) -> PathBuf {
|
||||
let stem = input_path.file_stem().unwrap_or_default();
|
||||
let parent = input_path.parent().unwrap_or(Path::new("."));
|
||||
|
||||
let extension = match self.output_format {
|
||||
OutputFormat::Binary => "bin",
|
||||
OutputFormat::ElfObject => "o",
|
||||
OutputFormat::ElfExecutable => "elf",
|
||||
};
|
||||
|
||||
parent.join(format!("{}.{}", stem.to_string_lossy(), extension))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CompilerEngine {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience function for simple assembly with default settings.
|
||||
pub fn assemble(input_path: &Path) -> Result<Vec<Instruction>, AssembleError> {
|
||||
let engine = CompilerEngine::new();
|
||||
engine.assemble(input_path, None)
|
||||
}
|
||||
|
||||
/// Convenience function for assembling to ELF object format.
|
||||
pub fn assemble_to_object(
|
||||
input_path: &Path,
|
||||
output_path: Option<&Path>,
|
||||
) -> Result<Vec<Instruction>, AssembleError> {
|
||||
let engine = CompilerEngine::with_output_format(OutputFormat::ElfObject);
|
||||
engine.assemble(input_path, output_path)
|
||||
}
|
||||
|
||||
/// Convenience function for assembling to ELF executable format.
|
||||
pub fn assemble_to_executable(
|
||||
input_path: &Path,
|
||||
output_path: Option<&Path>,
|
||||
) -> Result<Vec<Instruction>, AssembleError> {
|
||||
let engine = CompilerEngine::with_output_format(OutputFormat::ElfExecutable);
|
||||
engine.assemble(input_path, output_path)
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
//! Error types for the DSA assembler.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
/// Comprehensive error type for assembly operations.
|
||||
#[derive(Debug)]
|
||||
pub enum AssembleError {
|
||||
/// IO-related errors (file not found, permission denied, etc.).
|
||||
Io(std::io::Error),
|
||||
|
||||
/// Lexical analysis errors
|
||||
Lexer {
|
||||
message: String,
|
||||
line: usize,
|
||||
column: usize,
|
||||
},
|
||||
|
||||
/// Parsing errors
|
||||
Parser {
|
||||
message: String,
|
||||
line: usize,
|
||||
token: String,
|
||||
},
|
||||
|
||||
/// Symbol resolution errors
|
||||
Symbol {
|
||||
message: String,
|
||||
symbol_name: String,
|
||||
},
|
||||
|
||||
/// Code generation errors
|
||||
Codegen {
|
||||
message: String,
|
||||
instruction: String,
|
||||
},
|
||||
|
||||
/// Dependency resolution errors
|
||||
Dependency {
|
||||
message: String,
|
||||
module_path: String,
|
||||
},
|
||||
|
||||
/// Threading and synchronization errors
|
||||
Threading(String),
|
||||
|
||||
/// Output generation errors
|
||||
Output { message: String, format: String },
|
||||
|
||||
/// Generic assembly error
|
||||
Generic(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for AssembleError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Io(msg) => write!(f, "IO Error: {}", msg),
|
||||
Self::Lexer {
|
||||
message,
|
||||
line,
|
||||
column,
|
||||
} => {
|
||||
write!(f, "Lexer Error at {}:{}: {}", line, column, message)
|
||||
}
|
||||
Self::Parser {
|
||||
message,
|
||||
line,
|
||||
token,
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"Parser Error at line {}, token '{}': {}",
|
||||
line, token, message
|
||||
)
|
||||
}
|
||||
Self::Symbol {
|
||||
message,
|
||||
symbol_name,
|
||||
} => {
|
||||
write!(f, "Symbol Error '{}': {}", symbol_name, message)
|
||||
}
|
||||
Self::Codegen {
|
||||
message,
|
||||
instruction,
|
||||
} => {
|
||||
write!(f, "Codegen Error in '{}': {}", instruction, message)
|
||||
}
|
||||
Self::Dependency {
|
||||
message,
|
||||
module_path,
|
||||
} => {
|
||||
write!(f, "Dependency Error in '{}': {}", module_path, message)
|
||||
}
|
||||
Self::Threading(msg) => write!(f, "Threading Error: {}", msg),
|
||||
Self::Output { message, format } => {
|
||||
write!(f, "Output Error ({}): {}", format, message)
|
||||
}
|
||||
Self::Generic(msg) => write!(f, "Assembly Error: {}", msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for AssembleError {}
|
||||
|
||||
impl From<std::io::Error> for AssembleError {
|
||||
fn from(error: std::io::Error) -> Self {
|
||||
Self::Io(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<std::sync::PoisonError<T>> for AssembleError {
|
||||
fn from(error: std::sync::PoisonError<T>) -> Self {
|
||||
Self::Threading(format!("Mutex poisoned: {}", error))
|
||||
}
|
||||
}
|
||||
+20
-243
@@ -1,266 +1,43 @@
|
||||
#![allow(dead_code, unused)]
|
||||
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
fmt, fs,
|
||||
hash::{DefaultHasher, Hash, Hasher},
|
||||
path::{Path, PathBuf},
|
||||
sync::{Arc, Mutex, mpsc},
|
||||
thread,
|
||||
};
|
||||
//! DSA Assembler module - converts assembly source code into executable instructions.
|
||||
|
||||
use common::prelude::Instruction;
|
||||
|
||||
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
|
||||
fn log(message: &str) {
|
||||
println!("\x1b[32mINFO:\x1b[0m {message}");
|
||||
}
|
||||
use std::path::Path;
|
||||
|
||||
// Module declarations
|
||||
#[macro_use]
|
||||
pub mod macros;
|
||||
|
||||
#[allow(clippy::module_inception)]
|
||||
pub mod assembler;
|
||||
pub mod codegen;
|
||||
pub mod engine;
|
||||
pub mod error;
|
||||
pub mod expand;
|
||||
pub mod lexer;
|
||||
pub mod model;
|
||||
pub mod parser;
|
||||
pub mod program;
|
||||
pub mod resolver;
|
||||
pub mod task;
|
||||
pub mod util;
|
||||
|
||||
// Re-exports
|
||||
// Re-exports for backward compatibility and convenience
|
||||
pub use self::{
|
||||
codegen::codegen,
|
||||
engine::{
|
||||
CompilerEngine, OutputFormat, assemble, assemble_to_executable,
|
||||
assemble_to_object,
|
||||
},
|
||||
error::AssembleError,
|
||||
expand::expand_pseudo_ops,
|
||||
lexer::lexer,
|
||||
model::{Module, Node, Opcode, Symbol, Token, TokenType},
|
||||
parser::{Parser, Program},
|
||||
parser::Parser,
|
||||
program::Program,
|
||||
resolver::{create_sections, resolve_dependencies, resolve_symbols},
|
||||
task::Task,
|
||||
util::{log, quick_hash},
|
||||
};
|
||||
|
||||
use crate::util::logging::{Entry, Logger};
|
||||
|
||||
pub struct CompilerEngine {
|
||||
result_tx: mpsc::Sender<Result<Vec<Instruction>, AssembleError>>,
|
||||
result_rx: Option<mpsc::Receiver<Result<Vec<Instruction>, AssembleError>>>,
|
||||
is_running: bool,
|
||||
}
|
||||
|
||||
impl CompilerEngine {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
let (tx, rx) = mpsc::channel();
|
||||
Self {
|
||||
result_tx: tx,
|
||||
result_rx: Some(rx),
|
||||
is_running: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the compilation process in a separate thread
|
||||
pub fn start_compilation(&mut self, src: &Path) {
|
||||
if self.is_running {
|
||||
return;
|
||||
}
|
||||
|
||||
let src = src.to_path_buf();
|
||||
let tx = self.result_tx.clone();
|
||||
|
||||
thread::spawn(move || {
|
||||
let result = assemble(&src);
|
||||
tx.send(result)
|
||||
.expect("Failed to send compilation result from worker thread");
|
||||
});
|
||||
|
||||
self.is_running = true;
|
||||
}
|
||||
|
||||
/// Check if compilation is complete and get the result
|
||||
pub fn try_get_result(&mut self) -> Option<Result<Vec<Instruction>, AssembleError>> {
|
||||
if !self.is_running {
|
||||
return None;
|
||||
}
|
||||
|
||||
match self
|
||||
.result_rx
|
||||
.as_ref()
|
||||
.expect("result_rx should be Some while compilation is running")
|
||||
.try_recv()
|
||||
{
|
||||
Ok(result) => {
|
||||
self.is_running = false;
|
||||
Some(result)
|
||||
}
|
||||
Err(mpsc::TryRecvError::Empty) => None,
|
||||
Err(mpsc::TryRecvError::Disconnected) => {
|
||||
self.is_running = false;
|
||||
Some(Err(AssembleError::Generic))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Block until compilation is complete and return the result
|
||||
pub fn wait_for_result(&mut self) -> Result<Vec<Instruction>, AssembleError> {
|
||||
if !self.is_running {
|
||||
return Err(AssembleError::Generic);
|
||||
}
|
||||
|
||||
if let Ok(result) = self
|
||||
.result_rx
|
||||
.take()
|
||||
.expect("result_rx should be Some while waiting for compilation result")
|
||||
.recv()
|
||||
{
|
||||
self.is_running = false;
|
||||
result
|
||||
} else {
|
||||
self.is_running = false;
|
||||
Err(AssembleError::Generic)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
|
||||
let mut modules = HashSet::new();
|
||||
let mut program = Program::new();
|
||||
|
||||
let hash = quick_hash(src);
|
||||
|
||||
if modules.contains(&hash) {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
prepare_dependency(src, &mut modules, &mut program)?;
|
||||
|
||||
let mut nodes = program.nodes.clone();
|
||||
|
||||
create_sections(&mut nodes)?;
|
||||
resolve_symbols(&mut nodes)?;
|
||||
|
||||
let instructions = codegen(nodes)?;
|
||||
Ok(instructions)
|
||||
}
|
||||
|
||||
impl Default for CompilerEngine {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_dependency(
|
||||
path: &Path,
|
||||
modules: &mut HashSet<u64>,
|
||||
program: &mut Program,
|
||||
) -> Result<(), AssembleError> {
|
||||
let filename = path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.expect("Failed to get file name from path");
|
||||
|
||||
if let Ok(path) = path.canonicalize() {
|
||||
log(&format!(
|
||||
"{:20} {:20} [{}]",
|
||||
"Building",
|
||||
filename,
|
||||
path.display()
|
||||
));
|
||||
}
|
||||
|
||||
let src = fs::read_to_string(path)
|
||||
.map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?;
|
||||
let file_hash = quick_hash(path);
|
||||
|
||||
log(&format!("{:20} {:20}", "Tokenising", filename));
|
||||
let tokens = lexer::lexer(src, file_hash)?;
|
||||
|
||||
log(&format!("{:20} {:20}", "Parsing", filename));
|
||||
let parsed = Parser::parse_nodes(tokens)?;
|
||||
|
||||
log(&format!("{:20} {:20}", "Resolving Deps", filename));
|
||||
// Get the parent directory of the source file to use as the base directory
|
||||
let base_dir = path
|
||||
.parent()
|
||||
.ok_or_else(|| AssembleError::InvalidFile(path.to_path_buf()))?;
|
||||
let mut nodes = expand_pseudo_ops(parsed, file_hash)?;
|
||||
nodes = resolve_dependencies(nodes, base_dir)?;
|
||||
|
||||
let deps = Parser::get_dependencies(&nodes, path)?;
|
||||
|
||||
log(&format!(
|
||||
"{:20} {:20}",
|
||||
"Expanding PseudoInstructions", filename
|
||||
));
|
||||
|
||||
// add a section instruction
|
||||
nodes.insert(
|
||||
0,
|
||||
node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)),
|
||||
);
|
||||
|
||||
for n in &nodes {
|
||||
println!("{n}");
|
||||
}
|
||||
|
||||
program.add_module(nodes);
|
||||
|
||||
for dep in deps {
|
||||
log(&format!(
|
||||
"{:20} {:20}",
|
||||
"Including",
|
||||
dep.file_name()
|
||||
.and_then(|f| f.to_str())
|
||||
.expect("Dependency path has no file name or is not valid UTF-8")
|
||||
));
|
||||
|
||||
let dep_hash = quick_hash(&dep);
|
||||
if modules.insert(dep_hash) {
|
||||
prepare_dependency(dep.as_path(), modules, program)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AssembleError {
|
||||
Generic,
|
||||
UnexpectedEof,
|
||||
InvalidFile(PathBuf),
|
||||
UnexpectedToken(Token, TokenType),
|
||||
InvalidArg,
|
||||
UndefinedSymbol(Symbol),
|
||||
/// Contains the nth element missing from the instruction.
|
||||
MissingArgument(u8),
|
||||
}
|
||||
|
||||
impl fmt::Display for AssembleError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Generic => write!(f, "Generic error"),
|
||||
Self::UnexpectedToken(tok, expected) => {
|
||||
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
|
||||
}
|
||||
Self::UnexpectedEof => write!(f, "Unexpected end of file"),
|
||||
Self::InvalidFile(path) => write!(f, "Invalid file `{}`", path.display()),
|
||||
Self::InvalidArg => write!(f, "Invalid argument"),
|
||||
Self::UndefinedSymbol(symbol) => {
|
||||
write!(f, "Undefined symbol {symbol}")
|
||||
}
|
||||
Self::MissingArgument(n) => {
|
||||
write!(f, "Missing argument #{n} from instruction arguments.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn quick_hash(value: &Path) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
value
|
||||
.canonicalize()
|
||||
.expect("Failed to canonicalize path for quick_hash")
|
||||
.to_str()
|
||||
.hash(&mut hasher);
|
||||
|
||||
hasher.finish()
|
||||
/// The old assemble function for compatibility reasons.
|
||||
pub fn legacy_assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
|
||||
engine::assemble(src)
|
||||
}
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
//! Data models for the DSA assembler.
|
||||
|
||||
use crate::assembler::{AssembleError, Parser, Program, expand_pseudo_ops, lexer};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use std::{fmt, str::FromStr};
|
||||
|
||||
use common::prelude::Register;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::assembler::AssembleError;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct Node {
|
||||
pub symbol: Option<Symbol>,
|
||||
pub opcode: Opcode,
|
||||
@@ -40,7 +44,9 @@ impl Node {
|
||||
self.args()
|
||||
.get(index)
|
||||
.cloned()
|
||||
.ok_or(AssembleError::InvalidArg)
|
||||
// TODO: This is a bad place to throw an error unless we write code to attach
|
||||
// context.
|
||||
.ok_or(AssembleError::Generic("Invalid argument index".to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,15 +73,6 @@ impl fmt::Display for Symbol {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Module {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Unresolved(name) => write!(f, "{name}"),
|
||||
Self::Resolved(name) => write!(f, "{name}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Opcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
@@ -160,12 +157,6 @@ impl PartialEq for Symbol {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Module {
|
||||
Resolved(u64),
|
||||
Unresolved(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Token {
|
||||
Symbol(Symbol),
|
||||
Register(Register),
|
||||
@@ -196,7 +187,7 @@ impl TokenType {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum Opcode {
|
||||
// Real instructions (0x00-0x26)
|
||||
Nop,
|
||||
@@ -417,3 +408,125 @@ impl Opcode {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a single source module and its compilation state.
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct Module {
|
||||
pub id: Uuid,
|
||||
pub path: PathBuf,
|
||||
pub hash: u64,
|
||||
pub nodes: Vec<Node>,
|
||||
program: Program,
|
||||
}
|
||||
|
||||
impl PartialEq for Module {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.id == other.id
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Module {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Module {{ id: {}, path: {}, nodes: {} }}",
|
||||
self.id,
|
||||
self.path.display(),
|
||||
self.nodes.len()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Module {}
|
||||
|
||||
impl Module {
|
||||
#[must_use]
|
||||
pub fn new(path: PathBuf, hash: u64, nodes: Vec<Node>, program: Program) -> Self {
|
||||
Self {
|
||||
id: Uuid::new_v4(),
|
||||
path,
|
||||
hash,
|
||||
nodes,
|
||||
program,
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes the full compilation pipeline for this module.
|
||||
pub fn compile(&mut self) -> Result<(), AssembleError> {
|
||||
self.lex()?;
|
||||
self.parse()?;
|
||||
self.expand()?;
|
||||
self.prepare_dependencies()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Lexical analysis stage.
|
||||
pub fn lex(&mut self) -> Result<Vec<crate::assembler::Token>, AssembleError> {
|
||||
// Log the build
|
||||
if let Ok(path) = self.path.canonicalize() {
|
||||
let _ = self.program.log(&format!(
|
||||
"{:20} {:20} [{}]",
|
||||
"Building",
|
||||
self.get_filename(),
|
||||
path.display()
|
||||
));
|
||||
}
|
||||
|
||||
// Read and lex the file
|
||||
let source = std::fs::read_to_string(&self.path)?;
|
||||
lexer(source, self.hash)
|
||||
}
|
||||
|
||||
/// Parsing stage.
|
||||
pub fn parse(&mut self) -> Result<(), AssembleError> {
|
||||
let source = std::fs::read_to_string(&self.path)?;
|
||||
let tokens = lexer(source, self.hash)?;
|
||||
let nodes = Parser::parse_nodes(tokens)?;
|
||||
self.nodes = nodes;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Pseudo-instruction expansion stage.
|
||||
pub fn expand(&mut self) -> Result<(), AssembleError> {
|
||||
self.nodes = expand_pseudo_ops(self.nodes.clone(), self.hash)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Dependency resolution stage.
|
||||
pub fn prepare_dependencies(&self) -> Result<(), AssembleError> {
|
||||
// let base_dir = self.path.parent();
|
||||
|
||||
let dependencies = Parser::get_dependencies(&self.nodes, &self.path)?;
|
||||
|
||||
for dep in dependencies {
|
||||
if self.program.is_registered(&dep)? {
|
||||
// we have already built this module!
|
||||
continue;
|
||||
}
|
||||
self.program.register(&dep)?;
|
||||
|
||||
// create new module task
|
||||
match Task::new(dep, self.program.clone()) {
|
||||
Ok(task) => {
|
||||
if let Err(e) = self.program.add_task(task) {
|
||||
eprintln!("Error adding task: {e}");
|
||||
}
|
||||
}
|
||||
Err(why) => {
|
||||
eprintln!("Error building program: {why}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gets the filename from a [`PathBuf`].
|
||||
fn get_filename(&self) -> &str {
|
||||
self.path
|
||||
.file_name()
|
||||
.and_then(|f| f.to_str())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
use crate::assembler::Task;
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
//! Program state management for multi-module compilation.
|
||||
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
path::PathBuf,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::assembler::{AssembleError, Module, Task, quick_hash};
|
||||
use crate::util::logging::Logger;
|
||||
|
||||
/// Main program state containing all modules and compilation metadata.
|
||||
#[derive(Debug)]
|
||||
pub struct Program {
|
||||
/// A field to be passed into a hasher.
|
||||
hash_me: Uuid,
|
||||
inner: Arc<Mutex<ProgramInner>>,
|
||||
}
|
||||
|
||||
impl std::hash::Hash for Program {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.hash_me.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Program {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
*self.inner.lock().unwrap() == *other.inner.lock().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct ProgramInner {
|
||||
pub main_path: PathBuf,
|
||||
pub registry: HashSet<u64>,
|
||||
pub modules: Vec<Module>,
|
||||
pub tasks: Vec<Task>,
|
||||
pub logger: Logger,
|
||||
}
|
||||
|
||||
impl Program {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
hash_me: Uuid::new_v4(),
|
||||
inner: Arc::new(Mutex::new(ProgramInner {
|
||||
registry: HashSet::new(),
|
||||
modules: Vec::new(),
|
||||
tasks: Vec::new(),
|
||||
main_path: PathBuf::new(),
|
||||
logger: Logger::new(),
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers a module path to prevent duplicate compilation.
|
||||
pub fn register(&self, path: &std::path::Path) -> Result<(), AssembleError> {
|
||||
self.inner.lock()?.registry.insert(quick_hash(path));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks if a module path is already registered.
|
||||
pub fn is_registered(&self, path: &std::path::Path) -> Result<bool, AssembleError> {
|
||||
Ok(self.inner.lock()?.registry.contains(&quick_hash(path)))
|
||||
}
|
||||
|
||||
/// Gets all compilation tasks.
|
||||
pub fn get_tasks(&self) -> Result<Vec<Task>, AssembleError> {
|
||||
Ok(self.inner.lock()?.tasks.clone())
|
||||
}
|
||||
|
||||
/// Adds a new compilation task.
|
||||
pub fn add_task(&self, task: Task) -> Result<(), AssembleError> {
|
||||
self.inner.lock()?.tasks.push(task);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Adds a compiled module to the program.
|
||||
pub fn add_module(&self, module: Module) -> Result<(), AssembleError> {
|
||||
self.inner.lock()?.modules.push(module);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gets all compiled modules.
|
||||
pub fn get_modules(&self) -> Result<Vec<Module>, AssembleError> {
|
||||
Ok(self.inner.lock()?.modules.clone())
|
||||
}
|
||||
|
||||
/// Logs a message using the program's logger.
|
||||
pub fn log(&self, message: &str) -> Result<(), AssembleError> {
|
||||
self.inner.lock()?.logger.log(message);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Sets the main path for the program.
|
||||
pub fn set_main_path(&self, path: PathBuf) -> Result<(), AssembleError> {
|
||||
self.inner.lock()?.main_path = path;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gets the main path for the program.
|
||||
pub fn get_main_path(&self) -> Result<PathBuf, AssembleError> {
|
||||
Ok(self.inner.lock()?.main_path.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Program {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
hash_me: self.hash_me.clone(),
|
||||
inner: Arc::clone(&self.inner),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Program {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
//! Threading utilities for parallel module compilation.
|
||||
|
||||
use std::{
|
||||
path::PathBuf,
|
||||
sync::Arc,
|
||||
thread::{self, JoinHandle},
|
||||
};
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::assembler::{AssembleError, Module, Program, quick_hash};
|
||||
|
||||
/// Represents a threaded compilation task for a single module.
|
||||
#[derive(Debug)]
|
||||
pub struct Task {
|
||||
id: Uuid,
|
||||
module_handle: Arc<JoinHandle<Result<Module, AssembleError>>>,
|
||||
}
|
||||
|
||||
impl PartialEq for Task {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.id == other.id
|
||||
}
|
||||
}
|
||||
|
||||
impl Task {
|
||||
/// Creates a new compilation task for the given module path.
|
||||
pub fn new(path: PathBuf, program: Program) -> Result<Self, AssembleError> {
|
||||
let handle = thread::spawn(move || {
|
||||
let mut module =
|
||||
Module::new(path.clone(), quick_hash(&path), Vec::new(), program.clone());
|
||||
|
||||
// Execute the compilation pipeline
|
||||
match module.compile() {
|
||||
Ok(()) => Ok(module),
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Error building program at path `{}`: {}",
|
||||
path.display(),
|
||||
e
|
||||
);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Self {
|
||||
module_handle: Arc::new(handle),
|
||||
id: Uuid::new_v4(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a task from an existing join handle (for compatibility).
|
||||
pub fn from_handle(handle: JoinHandle<Result<Module, AssembleError>>) -> Self {
|
||||
Self {
|
||||
module_handle: Arc::new(handle),
|
||||
id: Uuid::new_v4(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Waits for the compilation task to complete and returns the compiled module.
|
||||
pub fn join(self) -> Result<Module, AssembleError> {
|
||||
let Some(join_handle) = Arc::try_unwrap(self.module_handle).ok() else {
|
||||
let err_msg = String::from(
|
||||
"Cannot take ownership of reference counted task join_handle, multiple references exist.",
|
||||
);
|
||||
eprintln!("{err_msg}");
|
||||
return Err(AssembleError::Threading(err_msg));
|
||||
};
|
||||
|
||||
match join_handle.join() {
|
||||
Ok(result) => result,
|
||||
Err(panic_payload) => {
|
||||
let err_msg = format!(
|
||||
"Task thread panicked: {:?}",
|
||||
panic_payload
|
||||
.downcast_ref::<String>()
|
||||
.map(|s| s.as_str())
|
||||
.or_else(|| panic_payload.downcast_ref::<&str>().copied())
|
||||
.unwrap_or("Unknown panic")
|
||||
);
|
||||
eprintln!("{err_msg}");
|
||||
Err(AssembleError::Threading(err_msg))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Task {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
id: self.id.clone(),
|
||||
module_handle: Arc::clone(&self.module_handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
//! Utility functions for the assembler.
|
||||
|
||||
use std::{
|
||||
hash::{DefaultHasher, Hash, Hasher},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
/// Quick hash function for file paths.
|
||||
pub fn quick_hash(value: &Path) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
value
|
||||
.canonicalize()
|
||||
.expect("Failed to canonicalize path for quick_hash")
|
||||
.to_str()
|
||||
.hash(&mut hasher);
|
||||
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
/// TODO: Use an actual logging or tracing library for pretty (scoped) output.
|
||||
pub fn log(message: &str) {
|
||||
println!("\x1b[32mINFO:\x1b[0m {message}");
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
#![allow(unused)]
|
||||
use std::{fmt, sync::mpsc::Sender};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Logger {}
|
||||
|
||||
impl Logger {
|
||||
|
||||
@@ -38,7 +38,7 @@ pub enum InstructionType {
|
||||
Immediate,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
#[non_exhaustive]
|
||||
pub enum Register {
|
||||
// general purpose registers
|
||||
|
||||
Reference in New Issue
Block a user