Compare commits
6 Commits
11a57eab51
...
7cb7525484
| Author | SHA1 | Date | |
|---|---|---|---|
| 7cb7525484 | |||
| 7565374d5b | |||
| 9b9e153500 | |||
| 27267e3daa | |||
| fb84a6d3c3 | |||
| 4e5db58a84 |
Generated
+39
@@ -129,6 +129,15 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "android-activity"
|
||||
version = "0.6.0"
|
||||
@@ -269,6 +278,7 @@ dependencies = [
|
||||
"clap",
|
||||
"common",
|
||||
"num_cpus",
|
||||
"regex",
|
||||
"threadpool",
|
||||
"uuid",
|
||||
]
|
||||
@@ -2691,6 +2701,35 @@ dependencies = [
|
||||
"thiserror 2.0.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "renderdoc-sys"
|
||||
version = "1.1.0"
|
||||
|
||||
@@ -16,5 +16,6 @@ path = "src/lib.rs"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
common = { path = "../common" }
|
||||
num_cpus = "1.17.0"
|
||||
regex = "1.11.1"
|
||||
threadpool = "1.8.1"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
|
||||
@@ -0,0 +1,375 @@
|
||||
//! Simple compiler engine that orchestrates the entire compilation process.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
|
||||
use crate::{
|
||||
context::AssemblerContext,
|
||||
error::AssembleError,
|
||||
model::module::ModuleId,
|
||||
source::{token::Token, tokeniser::Tokeniser},
|
||||
};
|
||||
|
||||
use common::instructions::Instruction;
|
||||
|
||||
/// Error type for the `CompilerEngine`
|
||||
#[derive(Debug)]
|
||||
pub enum EngineError {
|
||||
/// Assembly error during compilation
|
||||
Assembly(AssembleError),
|
||||
/// Channel communication error
|
||||
Channel(String),
|
||||
/// Other generic error
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for EngineError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Assembly(e) => write!(f, "Assembly error: {e}"),
|
||||
Self::Channel(msg) => write!(f, "Channel error: {msg}"),
|
||||
Self::Other(msg) => write!(f, "Engine error: {msg}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for EngineError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::Assembly(e) => Some(e),
|
||||
Self::Channel(_) | Self::Other(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from AssembleError
|
||||
impl From<AssembleError> for EngineError {
|
||||
fn from(error: AssembleError) -> Self {
|
||||
Self::Assembly(error)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from mpsc::SendError
|
||||
impl<T> From<mpsc::SendError<T>> for EngineError {
|
||||
fn from(error: mpsc::SendError<T>) -> Self {
|
||||
Self::Channel(format!("Send error: {error}"))
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from mpsc::RecvError
|
||||
impl From<mpsc::RecvError> for EngineError {
|
||||
fn from(error: mpsc::RecvError) -> Self {
|
||||
Self::Channel(format!("Receive error: {error}"))
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from mpsc::TryRecvError
|
||||
impl From<mpsc::TryRecvError> for EngineError {
|
||||
fn from(error: mpsc::TryRecvError) -> Self {
|
||||
Self::Channel(format!("Try receive error: {error}"))
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from String for generic errors
|
||||
impl From<String> for EngineError {
|
||||
fn from(error: String) -> Self {
|
||||
Self::Other(error)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from &str for convenience
|
||||
impl From<&str> for EngineError {
|
||||
fn from(error: &str) -> Self {
|
||||
Self::Other(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple compiler engine that orchestrates the entire compilation process.
|
||||
pub struct CompilerEngine {
|
||||
result_tx: mpsc::Sender<Result<Vec<Instruction>, EngineError>>,
|
||||
result_rx: Option<mpsc::Receiver<Result<Vec<Instruction>, EngineError>>>,
|
||||
is_running: bool,
|
||||
}
|
||||
|
||||
impl CompilerEngine {
|
||||
/// Create a new compiler engine
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
let (tx, rx) = mpsc::channel();
|
||||
Self {
|
||||
result_tx: tx,
|
||||
result_rx: Some(rx),
|
||||
is_running: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the compilation process in a separate thread
|
||||
pub fn start_compilation<P: AsRef<Path>>(&mut self, src: P) {
|
||||
if self.is_running {
|
||||
return;
|
||||
}
|
||||
|
||||
let src = src.as_ref().to_path_buf();
|
||||
let tx = self.result_tx.clone();
|
||||
|
||||
thread::spawn(move || {
|
||||
let result = assemble(&src).map_err(EngineError::from);
|
||||
let _ = tx.send(result); // Ignore send errors if receiver is dropped
|
||||
});
|
||||
|
||||
self.is_running = true;
|
||||
}
|
||||
|
||||
/// Check if compilation is complete and get the result
|
||||
pub fn try_get_result(&mut self) -> Option<Result<Vec<Instruction>, EngineError>> {
|
||||
if !self.is_running {
|
||||
return None;
|
||||
}
|
||||
|
||||
match self
|
||||
.result_rx
|
||||
.as_ref()
|
||||
.expect("result_rx should be Some while compilation is running")
|
||||
.try_recv()
|
||||
{
|
||||
Ok(result) => {
|
||||
self.is_running = false;
|
||||
Some(result)
|
||||
}
|
||||
Err(mpsc::TryRecvError::Empty) => None,
|
||||
Err(mpsc::TryRecvError::Disconnected) => {
|
||||
self.is_running = false;
|
||||
Some(Err(EngineError::Channel(
|
||||
"Compilation thread disconnected".to_string(),
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Block until compilation is complete and return the result
|
||||
pub fn wait_for_result(&mut self) -> Result<Vec<Instruction>, EngineError> {
|
||||
if !self.is_running {
|
||||
return Err(EngineError::Other("No compilation in progress".to_string()));
|
||||
}
|
||||
|
||||
let result = self
|
||||
.result_rx
|
||||
.take()
|
||||
.expect("result_rx should be Some while waiting for compilation result")
|
||||
.recv()
|
||||
.map_err(EngineError::from)?;
|
||||
|
||||
self.is_running = false;
|
||||
result
|
||||
}
|
||||
|
||||
/// Add a source file to be compiled (for compatibility with old interface)
|
||||
pub fn add_source_file<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
path: P,
|
||||
) -> Result<(), EngineError> {
|
||||
let path = path.as_ref().to_path_buf();
|
||||
|
||||
// Verify file exists
|
||||
if !path.exists() {
|
||||
return Err(EngineError::Assembly(AssembleError::new_other_error(
|
||||
crate::error::AssembleErrorKind::Io(crate::error::IoError::new(
|
||||
crate::error::IoErrorKind::NotFound,
|
||||
Some(format!("Source file not found: {}", path.display())),
|
||||
)),
|
||||
)));
|
||||
}
|
||||
|
||||
// For now, just validate the file exists
|
||||
// TODO: Could store multiple files for batch compilation
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compile all added source files (synchronous version)
|
||||
pub fn compile(&mut self) -> Result<CompileResult, EngineError> {
|
||||
// This is a placeholder that matches the old interface
|
||||
// For now, return empty result since we don't have a specific file to compile
|
||||
Ok(CompileResult {
|
||||
modules: Vec::new(),
|
||||
tokens: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get access to the assembler context (placeholder)
|
||||
pub fn context(&self) -> Result<&AssemblerContext, EngineError> {
|
||||
// For now, return an error since we're using the threaded approach
|
||||
// TODO: Integrate context properly when we have more compilation phases
|
||||
Err(EngineError::Other(
|
||||
"Context not available in threaded mode".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CompilerEngine {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Main assembly function that orchestrates the compilation process
|
||||
fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
|
||||
// Verify the file exists
|
||||
if !src.exists() {
|
||||
return Err(AssembleError::new_other_error(
|
||||
crate::error::AssembleErrorKind::Io(crate::error::IoError::new(
|
||||
crate::error::IoErrorKind::NotFound,
|
||||
Some(format!("Source file not found: {}", src.display())),
|
||||
)),
|
||||
));
|
||||
}
|
||||
|
||||
let mut modules = HashSet::new();
|
||||
let mut all_tokens = HashMap::new();
|
||||
let mut module_ids = Vec::new();
|
||||
|
||||
// Create a new assembler context for this compilation
|
||||
let context = AssemblerContext::new();
|
||||
|
||||
// Process the main file and its dependencies
|
||||
prepare_dependency(
|
||||
src,
|
||||
&mut modules,
|
||||
&mut all_tokens,
|
||||
&mut module_ids,
|
||||
&context,
|
||||
)?;
|
||||
|
||||
// Phase 2: Parse tokens into AST (placeholder for now)
|
||||
// TODO: Add parser here when implemented
|
||||
println!("Phase 2: Parsing {} modules...", module_ids.len());
|
||||
|
||||
// Phase 3: Symbol resolution (placeholder for now)
|
||||
// TODO: Add symbol resolution here when implemented
|
||||
println!("Phase 3: Resolving symbols...");
|
||||
|
||||
// Phase 4: Code generation (placeholder for now)
|
||||
// TODO: Add code generation here when implemented
|
||||
println!("Phase 4: Generating code...");
|
||||
|
||||
// For now, return empty instructions since we don't have the full pipeline yet
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
/// Prepare a dependency (file) for compilation
|
||||
fn prepare_dependency(
|
||||
path: &Path,
|
||||
modules: &mut HashSet<u64>,
|
||||
all_tokens: &mut HashMap<ModuleId, Vec<Token>>,
|
||||
module_ids: &mut Vec<ModuleId>,
|
||||
context: &AssemblerContext,
|
||||
) -> Result<(), AssembleError> {
|
||||
let filename = path.file_name().and_then(|n| n.to_str()).ok_or_else(|| {
|
||||
AssembleError::new_other_error(crate::error::AssembleErrorKind::Io(
|
||||
crate::error::IoError::new(
|
||||
crate::error::IoErrorKind::InvalidData,
|
||||
Some("Failed to get file name from path".to_string()),
|
||||
),
|
||||
))
|
||||
})?;
|
||||
|
||||
// Calculate a simple hash for the file (similar to quick_hash)
|
||||
let file_hash = calculate_file_hash(path);
|
||||
|
||||
// Skip if we've already processed this module
|
||||
if modules.contains(&file_hash) {
|
||||
return Ok(());
|
||||
}
|
||||
modules.insert(file_hash);
|
||||
|
||||
if let Ok(canonical_path) = path.canonicalize() {
|
||||
println!("Building {} [{}]", filename, canonical_path.display());
|
||||
}
|
||||
|
||||
// Phase 1: Tokenize the file
|
||||
println!("Tokenising {filename}");
|
||||
let tokeniser = Tokeniser::new(path)?;
|
||||
let tokens = tokeniser.tokenise(context)?;
|
||||
|
||||
// Get the module ID that was registered during tokenization
|
||||
let module_id = get_module_id_for_file(path, context)?;
|
||||
|
||||
all_tokens.insert(module_id, tokens);
|
||||
module_ids.push(module_id);
|
||||
|
||||
// TODO: Parse tokens to find dependencies (.include directives, etc.)
|
||||
// For now, we'll just process the single file
|
||||
println!("Resolving dependencies for {filename}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Calculate a simple hash for a file path (similar to the old `quick_hash`)
|
||||
fn calculate_file_hash(path: &Path) -> u64 {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
if let Ok(canonical) = path.canonicalize() {
|
||||
canonical.hash(&mut hasher);
|
||||
} else {
|
||||
path.hash(&mut hasher);
|
||||
}
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
/// Get the module ID for a given source file
|
||||
fn get_module_id_for_file(
|
||||
file_path: &Path,
|
||||
context: &AssemblerContext,
|
||||
) -> Result<ModuleId, AssembleError> {
|
||||
{
|
||||
let registry = context.module_registry.read()?;
|
||||
|
||||
// Find module by path
|
||||
for module in registry.modules() {
|
||||
if module.path == file_path {
|
||||
return Ok(module.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(AssembleError::new_other_error(
|
||||
crate::error::AssembleErrorKind::Io(crate::error::IoError::new(
|
||||
crate::error::IoErrorKind::NotFound,
|
||||
Some(format!(
|
||||
"Module not found for file: {}",
|
||||
file_path.display()
|
||||
)),
|
||||
)),
|
||||
))
|
||||
}
|
||||
|
||||
/// Result of compilation. This is useless at present but compiles.
|
||||
#[derive(Debug)]
|
||||
pub struct CompileResult {
|
||||
pub modules: Vec<ModuleId>,
|
||||
pub tokens: HashMap<ModuleId, Vec<Token>>,
|
||||
}
|
||||
|
||||
impl CompileResult {
|
||||
/// Get tokens for a specific module
|
||||
#[must_use]
|
||||
pub fn get_tokens(&self, module_id: &ModuleId) -> Option<&Vec<Token>> {
|
||||
self.tokens.get(module_id)
|
||||
}
|
||||
|
||||
/// Get all module IDs
|
||||
#[must_use]
|
||||
pub fn module_ids(&self) -> &[ModuleId] {
|
||||
&self.modules
|
||||
}
|
||||
|
||||
/// Get total number of tokens across all modules
|
||||
#[must_use]
|
||||
pub fn total_tokens(&self) -> usize {
|
||||
self.tokens.values().map(std::vec::Vec::len).sum()
|
||||
}
|
||||
}
|
||||
+158
-11
@@ -51,20 +51,173 @@ impl Display for AssembleError {
|
||||
/// Marker trait.
|
||||
impl std::error::Error for AssembleError {}
|
||||
|
||||
/// Different types of errors that may occur when assembling a set of input source files.
|
||||
#[derive(Debug, Clone)]
|
||||
#[non_exhaustive]
|
||||
#[derive(Debug)]
|
||||
pub enum AssembleErrorKind {
|
||||
/// Usually unexpected I/O errors. Not normally recoverable.
|
||||
IO(std::io::Error),
|
||||
Io(IoError),
|
||||
/// Errors emitted from the [`Tokeniser`].
|
||||
Tokenise(TokeniserError),
|
||||
Tokeniser(TokeniserError),
|
||||
Parser(ParserError),
|
||||
Symbol(SymbolError),
|
||||
Codegen(CodegenError),
|
||||
Threading(ThreadingError),
|
||||
/// Returned for code where the functionality has not yet been implemented but we
|
||||
/// don't want the program to panic.
|
||||
Unimplemented(&'static str),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParserError {
|
||||
error_type: ParserErrorType,
|
||||
source_info: SourceInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ParserErrorType {
|
||||
UnexpectedToken,
|
||||
MissingOperand,
|
||||
InvalidInstruction,
|
||||
MissingLabel,
|
||||
DuplicateLabel,
|
||||
}
|
||||
|
||||
impl Display for ParserErrorType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::UnexpectedToken => write!(f, "unexpected token"),
|
||||
Self::MissingOperand => write!(f, "missing operand"),
|
||||
Self::InvalidInstruction => write!(f, "invalid instruction"),
|
||||
Self::MissingLabel => write!(f, "missing label"),
|
||||
Self::DuplicateLabel => write!(f, "duplicate label"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ParserError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// TODO: Print the path/to/filename.dsa:line_no, column col_no.
|
||||
write!(
|
||||
f,
|
||||
"Parser error, {} at {}",
|
||||
self.error_type, self.source_info
|
||||
)?;
|
||||
|
||||
// Prints out the context for our error.
|
||||
self.source_info
|
||||
.print_context_with_underline()
|
||||
.map_err(|e| {
|
||||
_ = writeln!(f, "Print context error: {e}");
|
||||
|
||||
std::fmt::Error {}
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SymbolError {
|
||||
Undefined,
|
||||
Duplicate,
|
||||
CircularDependency,
|
||||
InvalidReference,
|
||||
}
|
||||
|
||||
impl Display for SymbolError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Undefined => write!(f, "undefined symbol"),
|
||||
Self::Duplicate => write!(f, "duplicate symbol"),
|
||||
Self::CircularDependency => write!(f, "circular dependency"),
|
||||
Self::InvalidReference => write!(f, "invalid reference"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CodegenError {
|
||||
InvalidOperand,
|
||||
OutOfRange,
|
||||
UnsupportedInstruction,
|
||||
}
|
||||
|
||||
impl Display for CodegenError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::InvalidOperand => write!(f, "invalid operand"),
|
||||
Self::OutOfRange => write!(f, "out of range"),
|
||||
Self::UnsupportedInstruction => write!(f, "unsupported instruction"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ThreadingError {
|
||||
LockFailed,
|
||||
ThreadPanic,
|
||||
}
|
||||
|
||||
impl Display for ThreadingError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::LockFailed => write!(f, "lock failed"),
|
||||
Self::ThreadPanic => write!(f, "thread panic"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IoError {
|
||||
msg: Option<String>,
|
||||
kind: IoErrorKind,
|
||||
}
|
||||
|
||||
impl IoError {
|
||||
#[must_use]
|
||||
pub const fn new(kind: IoErrorKind, msg: Option<String>) -> Self {
|
||||
Self { msg, kind }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum IoErrorKind {
|
||||
NotFound,
|
||||
PermissionDenied,
|
||||
InvalidData,
|
||||
Other,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for IoErrorKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::NotFound => write!(f, "file not found"),
|
||||
Self::PermissionDenied => write!(f, "permission denied"),
|
||||
Self::InvalidData => write!(f, "invalid data"),
|
||||
Self::Other => write!(f, "other I/O error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for IoError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.kind)?;
|
||||
|
||||
if let Some(msg) = &self.msg {
|
||||
write!(f, ", \"{msg}\"")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for AssembleErrorKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Tokenise(why) => write!(f, "tokeniser error: {why}"),
|
||||
Self::Tokeniser(why) => write!(f, "tokeniser error: {why}"),
|
||||
Self::Unimplemented(why) => write!(f, "used unimplemented feature: {why}"),
|
||||
Self::Io(why) => write!(f, "problem occurred with I/O: {why}"),
|
||||
#[allow(unreachable_patterns)]
|
||||
_ => write!(
|
||||
f,
|
||||
"unhandled error type in Display implementation! See error.rs!"
|
||||
@@ -73,10 +226,4 @@ impl Display for AssembleErrorKind {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for AssembleErrorKind {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
Self::IO(err)
|
||||
}
|
||||
}
|
||||
|
||||
pub mod conversions;
|
||||
|
||||
@@ -1,7 +1,67 @@
|
||||
use crate::error::AssembleError;
|
||||
use std::{
|
||||
io::ErrorKind,
|
||||
sync::{PoisonError, RwLockReadGuard, RwLockWriteGuard},
|
||||
};
|
||||
|
||||
use crate::error::{AssembleError, IoError, IoErrorKind};
|
||||
|
||||
use super::{AssembleErrorKind, ThreadingError};
|
||||
|
||||
impl From<std::io::Error> for IoError {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
let kind = match err.kind() {
|
||||
ErrorKind::NotFound => IoErrorKind::NotFound,
|
||||
ErrorKind::PermissionDenied => IoErrorKind::PermissionDenied,
|
||||
ErrorKind::InvalidData => IoErrorKind::InvalidData,
|
||||
_ => IoErrorKind::Other,
|
||||
};
|
||||
|
||||
let msg = err.to_string();
|
||||
|
||||
Self::new(kind, Some(msg))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for AssembleError {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
Self::new_other_error(err.into())
|
||||
Self::new_other_error(AssembleErrorKind::Io(err.into()))
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Maybe attempt recovery? To be honest we don't want any threads to panic at all,
|
||||
// or we want them all to panic spectacularly.
|
||||
impl<T> From<PoisonError<RwLockReadGuard<'_, T>>> for AssembleError {
|
||||
fn from(err: PoisonError<RwLockReadGuard<'_, T>>) -> Self {
|
||||
Self::new_other_error(AssembleErrorKind::Threading(err.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<PoisonError<RwLockReadGuard<'_, T>>> for ThreadingError {
|
||||
fn from(_err: PoisonError<RwLockReadGuard<'_, T>>) -> Self {
|
||||
Self::LockFailed
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<PoisonError<RwLockWriteGuard<'_, T>>> for AssembleError {
|
||||
fn from(err: PoisonError<RwLockWriteGuard<'_, T>>) -> Self {
|
||||
Self::new_other_error(AssembleErrorKind::Threading(err.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<PoisonError<RwLockWriteGuard<'_, T>>> for ThreadingError {
|
||||
fn from(_err: PoisonError<RwLockWriteGuard<'_, T>>) -> Self {
|
||||
Self::LockFailed
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::fmt::Error> for AssembleError {
|
||||
fn from(err: std::fmt::Error) -> Self {
|
||||
IoError::new(IoErrorKind::Other, Some(err.to_string())).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<IoError> for AssembleError {
|
||||
fn from(err: IoError) -> Self {
|
||||
Self::new_other_error(AssembleErrorKind::Io(err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
)]
|
||||
|
||||
pub mod args;
|
||||
pub mod image_builder;
|
||||
// pub mod tooling;
|
||||
pub mod compiler_engine;
|
||||
pub mod context;
|
||||
pub mod error;
|
||||
pub mod model;
|
||||
@@ -23,11 +23,7 @@ pub mod symtab;
|
||||
|
||||
mod util;
|
||||
|
||||
pub mod prelude {
|
||||
pub use crate::image_builder;
|
||||
// pub use crate::tooling::brainf;
|
||||
// pub use crate::tooling::project;
|
||||
}
|
||||
// pub mod prelude {}
|
||||
|
||||
use num_cpus as _;
|
||||
use threadpool as _;
|
||||
|
||||
@@ -4,7 +4,10 @@
|
||||
//!
|
||||
//! They have unique identifiers in the form of UUIDs.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -22,7 +25,7 @@ impl ModuleId {
|
||||
|
||||
/// Convenience method to get the [`Module`] from a [`ModuleId`].
|
||||
#[must_use]
|
||||
pub fn to_module<'m>(&self, registry: &'m ModuleRegistry) -> Option<&'m Module> {
|
||||
pub fn to_module<'m>(&self, registry: &'m ModuleRegistry) -> Option<&'m Arc<Module>> {
|
||||
registry.get(self)
|
||||
}
|
||||
|
||||
@@ -40,7 +43,7 @@ impl std::fmt::Display for ModuleId {
|
||||
}
|
||||
|
||||
/// A single source file or compilation unit. Stores its own symbol table.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Module {
|
||||
/// The name of the module. This is typically the name of the file, less the `.dsa`
|
||||
/// extension.
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
//! This module contains the code for the module registry. This is a singleton storing all
|
||||
//! the modules being assembled.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use super::module::{Module, ModuleId};
|
||||
|
||||
/// Stores all the [`Module`]'s to be assembled.
|
||||
pub struct ModuleRegistry {
|
||||
modules: HashMap<ModuleId, Module>,
|
||||
modules: HashMap<ModuleId, Arc<Module>>,
|
||||
}
|
||||
|
||||
impl Default for ModuleRegistry {
|
||||
@@ -17,26 +17,28 @@ impl Default for ModuleRegistry {
|
||||
}
|
||||
|
||||
impl ModuleRegistry {
|
||||
#[must_use] pub fn new() -> Self {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
modules: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets a [`Module`] by ID.
|
||||
#[must_use] pub fn get(&self, module_id: &ModuleId) -> Option<&Module> {
|
||||
#[must_use]
|
||||
pub fn get(&self, module_id: &ModuleId) -> Option<&Arc<Module>> {
|
||||
self.modules.get(module_id)
|
||||
}
|
||||
|
||||
/// Adds a [`Module`] and returns its [`ModuleId`].
|
||||
pub fn add(&mut self, module: Module) -> ModuleId {
|
||||
pub fn add(&mut self, module: Arc<Module>) -> ModuleId {
|
||||
let id = module.id;
|
||||
self.modules.insert(id, module);
|
||||
id
|
||||
}
|
||||
|
||||
/// Returns an iterator of modules.
|
||||
pub fn modules(&self) -> impl Iterator<Item = &Module> {
|
||||
pub fn modules(&self) -> impl Iterator<Item = &Arc<Module>> {
|
||||
self.modules.values()
|
||||
}
|
||||
}
|
||||
|
||||
+11
-1
@@ -1,12 +1,17 @@
|
||||
//! This module contains anything within the first stage of assembly, i.e. the
|
||||
//! tokenisation stage, or utility functions for reading input files.
|
||||
|
||||
use std::path::Path;
|
||||
use std::{
|
||||
io::{BufRead, Lines},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use crate::error::AssembleError;
|
||||
|
||||
pub mod lines;
|
||||
pub mod source_info;
|
||||
pub mod token;
|
||||
pub mod token_info;
|
||||
pub mod tokeniser;
|
||||
|
||||
/// Attempts to load and open a source file, returning a [`Vec<u8>`] or an
|
||||
@@ -16,3 +21,8 @@ pub fn load_source_bytes<P: AsRef<Path>>(p: P) -> Result<Vec<u8>, AssembleError>
|
||||
|
||||
Ok(std::fs::read(path)?)
|
||||
}
|
||||
|
||||
/// Get the lines from a [`BufReader`].
|
||||
pub fn reader_lines<R: BufRead>(rdr: R) -> Lines<R> {
|
||||
rdr.lines()
|
||||
}
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
//! Enhanced lines iterator that tracks line numbers and character positions.
|
||||
|
||||
use std::io::{BufRead, BufReader, Cursor};
|
||||
|
||||
use crate::error::AssembleError;
|
||||
|
||||
/// Iterator that yields lines with their line numbers and character spans.
|
||||
pub struct LinesWithSpans<R: BufRead> {
|
||||
reader: R,
|
||||
line_number: usize,
|
||||
total_chars: usize,
|
||||
buffer: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LineSpan {
|
||||
/// The line number.
|
||||
pub line_number: usize,
|
||||
/// The contents of the line.
|
||||
pub content: String,
|
||||
/// Character offset from start of file.
|
||||
pub start_char: usize,
|
||||
/// End character offset (exclusive).
|
||||
pub end_char: usize,
|
||||
}
|
||||
|
||||
impl<R: BufRead> LinesWithSpans<R> {
|
||||
pub const fn new(reader: R) -> Self {
|
||||
Self {
|
||||
reader,
|
||||
line_number: 0,
|
||||
total_chars: 0,
|
||||
buffer: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: BufRead> Iterator for LinesWithSpans<R> {
|
||||
type Item = Result<LineSpan, AssembleError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.buffer.clear();
|
||||
|
||||
match self.reader.read_line(&mut self.buffer) {
|
||||
Ok(0) => None, // EOF
|
||||
Ok(bytes_read) => {
|
||||
self.line_number += 1;
|
||||
let start_char = self.total_chars;
|
||||
self.total_chars += bytes_read;
|
||||
|
||||
// Remove trailing newline for cleaner processing
|
||||
let content = if self.buffer.ends_with('\n') {
|
||||
self.buffer[..self.buffer.len() - 1].to_string()
|
||||
} else {
|
||||
self.buffer.clone()
|
||||
};
|
||||
|
||||
Some(Ok(LineSpan {
|
||||
line_number: self.line_number,
|
||||
content,
|
||||
start_char,
|
||||
end_char: self.total_chars,
|
||||
}))
|
||||
}
|
||||
Err(e) => Some(Err(e.into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to create lines iterator from data.
|
||||
#[must_use]
|
||||
pub fn lines_with_spans(data: &[u8]) -> LinesWithSpans<BufReader<Cursor<&[u8]>>> {
|
||||
let cursor = Cursor::new(data);
|
||||
let reader = BufReader::new(cursor);
|
||||
LinesWithSpans::new(reader)
|
||||
}
|
||||
@@ -4,22 +4,98 @@
|
||||
//! This will likely be attached to a [`Token`] which will in turn be attached to an AST
|
||||
//! [`Node`].
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::{
|
||||
fmt::{Display, Write},
|
||||
fs::File,
|
||||
io::BufReader,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use crate::model::module::Module;
|
||||
use crate::{
|
||||
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
|
||||
model::module::Module,
|
||||
source::lines::LinesWithSpans,
|
||||
};
|
||||
|
||||
/// Information on where the token is within the source.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SourceInfo {
|
||||
/// The line number within the source file underpinned by `module_id`.
|
||||
pub line_no: usize,
|
||||
pub module: Module,
|
||||
pub module: Arc<Module>,
|
||||
/// The indexes where this token may be found (line-local).
|
||||
pub span: std::ops::Range<usize>,
|
||||
}
|
||||
|
||||
impl Display for SourceInfo {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.module.name)
|
||||
write!(
|
||||
f,
|
||||
"{}:{}, column {}",
|
||||
self.module.path.display(),
|
||||
self.line_no,
|
||||
self.span.start
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceInfo {
|
||||
#[must_use]
|
||||
pub const fn new(
|
||||
line_no: usize,
|
||||
module: Arc<Module>,
|
||||
span: std::ops::Range<usize>,
|
||||
) -> Self {
|
||||
Self {
|
||||
line_no,
|
||||
module,
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
/// Prints out where in the source code the error originated with an underline similar
|
||||
/// to what rustc does.
|
||||
pub fn print_context_with_underline(&self) -> Result<(), AssembleError> {
|
||||
let f = File::open(&self.module.path)?;
|
||||
let rdr = BufReader::new(f);
|
||||
|
||||
let mut lines = LinesWithSpans::new(rdr);
|
||||
|
||||
let Some(line_result) = lines.nth(self.line_no - 1) else {
|
||||
// Handle a line not existing.
|
||||
return Err(AssembleError::new_source_error(
|
||||
self.clone(),
|
||||
AssembleErrorKind::Io(IoError::new(
|
||||
IoErrorKind::Other,
|
||||
Some(format!(
|
||||
"the line {} does not exist in input file `{}` but source info suggested otherwise!.",
|
||||
self.line_no,
|
||||
self.module.path.display()
|
||||
)),
|
||||
)),
|
||||
));
|
||||
};
|
||||
|
||||
let line_span = line_result?;
|
||||
|
||||
// Print the line number and line content.
|
||||
println!("{:>4} | {}", self.line_no, line_span.content);
|
||||
|
||||
let mut underline = String::new();
|
||||
write!(underline, "{:>4} | ", "")?;
|
||||
|
||||
for _ in 0..self.span.start {
|
||||
underline.push(' ');
|
||||
}
|
||||
|
||||
for _ in self.span.start..self.span.end.min(line_span.content.len()) {
|
||||
underline.push('^');
|
||||
}
|
||||
|
||||
// Print the underline in red and bold.
|
||||
// TODO: Use a crate to make this extra portable.
|
||||
println!("\x1b[1;31m{underline}\x1b[0m");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,12 @@
|
||||
//! easier to build from scratch and edit his code than it would be to try and wrangle it
|
||||
//! into shape.
|
||||
|
||||
use crate::source::source_info::SourceInfo;
|
||||
use crate::source::{
|
||||
source_info::SourceInfo,
|
||||
token_info::{
|
||||
DirectiveToken, InstructionToken, LabelToken, RegisterToken, SymbolToken,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum TokenType {
|
||||
@@ -18,45 +23,24 @@ pub enum TokenType {
|
||||
Instruction(InstructionToken),
|
||||
/// Label definition (e.g., `loop_start:`).
|
||||
Label(LabelToken),
|
||||
/// Assembler directive (e.g., `.global`, `.section`, `.dw`).
|
||||
/// Assembler directive (e.g., `.global`, `.section`, `.dw`, `.resb`).
|
||||
Directive(DirectiveToken),
|
||||
/// Comma separator.
|
||||
Comma,
|
||||
/// End of line.
|
||||
Newline,
|
||||
/// End of file.
|
||||
Eof,
|
||||
/// A line comment. This is to be filtered out of the token stream.
|
||||
Comment,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Token {
|
||||
/// The type of the token.
|
||||
token_type: TokenType,
|
||||
pub token_type: TokenType,
|
||||
/// Where in the source code is this [`Token`]?
|
||||
source_info: SourceInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SymbolToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LabelToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct DirectiveToken {
|
||||
pub directive: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct RegisterToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct InstructionToken {
|
||||
pub mnemonic: String,
|
||||
pub source_info: SourceInfo,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SymbolToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LabelToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct DirectiveToken {
|
||||
pub directive: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct RegisterToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct InstructionToken {
|
||||
pub mnemonic: String,
|
||||
}
|
||||
@@ -1,7 +1,301 @@
|
||||
//! This file contains the [`Tokeniser`], which consumes a [`Vec`] of input bytes and
|
||||
//! outputs a [`Vec<Token>`].
|
||||
|
||||
/// Consumes a [`Vec<u8>`] and outputs a [`Vec`] of [Token]'s.
|
||||
pub struct Tokeniser {}
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use crate::{
|
||||
context::AssemblerContext,
|
||||
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
|
||||
model::module::Module,
|
||||
source::{
|
||||
lines::lines_with_spans,
|
||||
load_source_bytes,
|
||||
source_info::SourceInfo,
|
||||
token::{Token, TokenType},
|
||||
token_info::{
|
||||
DirectiveToken, InstructionToken, LabelToken, RegisterToken, SymbolToken,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
pub mod error;
|
||||
|
||||
/// Consumes a [`Vec<u8>`] and outputs a [`Vec`] of [Token]'s.
|
||||
pub struct Tokeniser {
|
||||
/// The data in the file.
|
||||
pub data: Vec<u8>,
|
||||
/// The path to the file.
|
||||
pub path: PathBuf,
|
||||
|
||||
// Pre-compiled regex patterns
|
||||
label_regex: Regex,
|
||||
register_regex: Regex,
|
||||
immediate_regex: Regex,
|
||||
directive_regex: Regex,
|
||||
instruction_regex: Regex,
|
||||
symbol_regex: Regex,
|
||||
string_regex: Regex,
|
||||
comment_regex: Regex,
|
||||
}
|
||||
|
||||
impl Tokeniser {
|
||||
#[must_use]
|
||||
pub fn from_data(data: Vec<u8>, path: PathBuf) -> Self {
|
||||
Self {
|
||||
data,
|
||||
path,
|
||||
|
||||
label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):")
|
||||
.expect("Failed to compile label regex pattern"),
|
||||
register_regex: Regex::new(r"^(r[0-9]+|sp|fp|pc)")
|
||||
.expect("Failed to compile register regex pattern"),
|
||||
immediate_regex: Regex::new(r"^(0x[0-9a-fA-F]+|[0-9]+)")
|
||||
.expect("Failed to compile immediate regex pattern"),
|
||||
directive_regex: Regex::new(r"^\.([a-zA-Z]+)")
|
||||
.expect("Failed to compile directive regex pattern"),
|
||||
instruction_regex: Regex::new(
|
||||
r"^(add|sub|mul|div|jmp|call|ret|lli|nop|halt)",
|
||||
)
|
||||
.expect("Failed to compile instruction regex pattern"),
|
||||
symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)")
|
||||
.expect("Failed to compile symbol regex pattern"),
|
||||
string_regex: Regex::new(r#"^"([^"]*)"#)
|
||||
.expect("Failed to compile string regex pattern"),
|
||||
comment_regex: Regex::new("//.*")
|
||||
.expect("Failed to compile comment regex pattern"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a [`Tokeniser`] from a file path.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, AssembleError> {
|
||||
let path = path.as_ref().to_path_buf();
|
||||
let data = load_source_bytes(&path)?;
|
||||
|
||||
Ok(Self::from_data(data, path))
|
||||
}
|
||||
|
||||
// Note that modules are tokenised in their own threads, possibly in parallel.
|
||||
pub fn tokenise(self, ctx: &AssemblerContext) -> Result<Vec<Token>, AssembleError> {
|
||||
let module_name = self.extract_module_name()?;
|
||||
|
||||
// Create a module for the source file being processed.
|
||||
let module = Arc::new(Module::new(module_name, &self.path));
|
||||
|
||||
{
|
||||
let mut module_registry = ctx.module_registry.write()?;
|
||||
module_registry.add(module.clone());
|
||||
}
|
||||
|
||||
let mut token_stream = Vec::new();
|
||||
let lines = lines_with_spans(&self.data);
|
||||
|
||||
// Process each line
|
||||
for line_result in lines {
|
||||
let line_span = line_result?;
|
||||
let trimmed = line_span.content.trim();
|
||||
|
||||
// Skip empty lines and add newline tokens
|
||||
if trimmed.is_empty() {
|
||||
token_stream.push(Token::new(
|
||||
TokenType::Newline,
|
||||
SourceInfo::new(line_span.line_number, module.clone(), 0..1),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Actually tokenize the line content
|
||||
let line_tokens = self.tokenize_line(&line_span, &module)?;
|
||||
token_stream.extend(line_tokens);
|
||||
|
||||
// Add newline token at end of line
|
||||
token_stream.push(Token::new(
|
||||
TokenType::Newline,
|
||||
SourceInfo::new(
|
||||
line_span.line_number,
|
||||
module.clone(),
|
||||
line_span.content.len()..line_span.content.len(),
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// Add EOF token
|
||||
token_stream.push(Token::new(TokenType::Eof, SourceInfo::new(0, module, 0..0)));
|
||||
|
||||
Ok(token_stream)
|
||||
}
|
||||
|
||||
fn tokenize_line(
|
||||
&self,
|
||||
line_span: &crate::source::lines::LineSpan,
|
||||
module: &Arc<Module>,
|
||||
) -> Result<Vec<Token>, AssembleError> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut remaining = line_span.content.trim();
|
||||
let start_column = line_span.start_char;
|
||||
|
||||
while !remaining.is_empty() {
|
||||
// Try to match a token.
|
||||
let (token_type, consumed) = self.match_token(remaining)?;
|
||||
|
||||
tokens.push(Token::new(
|
||||
token_type,
|
||||
SourceInfo::new(
|
||||
line_span.line_number,
|
||||
module.clone(),
|
||||
start_column..start_column + consumed,
|
||||
),
|
||||
));
|
||||
|
||||
// Advance position.
|
||||
remaining = remaining[consumed..].trim_start();
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
fn try_match_comment(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.comment_regex.captures(input)?;
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Comment, len))
|
||||
}
|
||||
|
||||
fn try_match_label(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.label_regex.captures(input)?;
|
||||
let name = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Label(LabelToken { name }), len))
|
||||
}
|
||||
|
||||
fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.register_regex.captures(input)?;
|
||||
let name = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Register(RegisterToken { name }), len))
|
||||
}
|
||||
|
||||
fn try_match_immediate(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.immediate_regex.captures(input)?;
|
||||
let value_str = caps.get(1)?.as_str();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
let value = if let Some(hex_part) = value_str.strip_prefix("0x") {
|
||||
u32::from_str_radix(hex_part, 16).ok()?
|
||||
} else if let Some(bin_part) = value_str.strip_prefix("0b") {
|
||||
u32::from_str_radix(bin_part, 2).ok()?
|
||||
} else if let Some(oct_part) = value_str.strip_prefix("0o") {
|
||||
u32::from_str_radix(oct_part, 8).ok()?
|
||||
} else {
|
||||
value_str.parse::<u32>().ok()?
|
||||
};
|
||||
|
||||
Some((TokenType::Immediate(value), len))
|
||||
}
|
||||
|
||||
fn try_match_directive(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.directive_regex.captures(input)?;
|
||||
let directive = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Directive(DirectiveToken { directive }), len))
|
||||
}
|
||||
|
||||
fn try_match_instruction(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.instruction_regex.captures(input)?;
|
||||
let mnemonic = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Instruction(InstructionToken { mnemonic }), len))
|
||||
}
|
||||
|
||||
fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.symbol_regex.captures(input)?;
|
||||
let name = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Symbol(SymbolToken { name }), len))
|
||||
}
|
||||
|
||||
fn try_match_string(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.string_regex.captures(input)?;
|
||||
let content = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::String(content), len))
|
||||
}
|
||||
|
||||
fn match_token(&self, input: &str) -> Result<(TokenType, usize), AssembleError> {
|
||||
if let Some(m) = self.try_match_comment(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_label(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_register(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_immediate(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_directive(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_instruction(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_string(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_symbol(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
// Handle miscellaneous characters.
|
||||
match input.chars().next() {
|
||||
Some(',') => Ok((TokenType::Comma, 1)),
|
||||
Some(c) => Err(AssembleError::new_other_error(AssembleErrorKind::Io(
|
||||
IoError::new(
|
||||
IoErrorKind::InvalidData,
|
||||
Some(format!("Unexpected character: '{c}'")),
|
||||
),
|
||||
))),
|
||||
None => Err(AssembleError::new_other_error(AssembleErrorKind::Io(
|
||||
IoError::new(
|
||||
IoErrorKind::InvalidData,
|
||||
Some("Unexpected end of input".to_string()),
|
||||
),
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_module_name(&self) -> Result<String, AssembleError> {
|
||||
let module_name = self
|
||||
.path
|
||||
.file_name()
|
||||
.map(|f| f.to_string_lossy().to_string())
|
||||
.ok_or_else(|| {
|
||||
AssembleError::new_other_error(AssembleErrorKind::Io(IoError::new(
|
||||
IoErrorKind::InvalidData,
|
||||
Some(
|
||||
"filename couldn't be extracted, is it valid UTF-8?".to_string(),
|
||||
),
|
||||
)))
|
||||
})?;
|
||||
|
||||
Ok(module_name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! This module contains the error types for the tokeniser.
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum TokeniserError {}
|
||||
|
||||
impl std::fmt::Display for TokeniserError {
|
||||
|
||||
+17
-16
@@ -46,12 +46,11 @@ impl SymbolTable {
|
||||
&& let Some(existing) = self.symbols.get(&existing_id)
|
||||
&& existing.module_id == module_id
|
||||
{
|
||||
return Err(AssembleError::new_other_error(
|
||||
crate::error::AssembleErrorKind::IO(std::io::Error::new(
|
||||
std::io::ErrorKind::AlreadyExists,
|
||||
format!("Symbol '{name}' already defined in module"),
|
||||
)),
|
||||
));
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::AlreadyExists,
|
||||
format!("Symbol '{name}' already defined in module"),
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
// Add to all mappings
|
||||
@@ -63,19 +62,22 @@ impl SymbolTable {
|
||||
}
|
||||
|
||||
/// Gets the [`Symbol`] by its [`SymbolId`].
|
||||
#[must_use] pub fn get(&self, id: &SymbolId) -> Option<&Symbol> {
|
||||
#[must_use]
|
||||
pub fn get(&self, id: &SymbolId) -> Option<&Symbol> {
|
||||
self.symbols.get(id)
|
||||
}
|
||||
|
||||
/// Gets the [`Symbol`] by its name.
|
||||
#[must_use] pub fn get_by_name(&self, name: &str) -> Option<&Symbol> {
|
||||
#[must_use]
|
||||
pub fn get_by_name(&self, name: &str) -> Option<&Symbol> {
|
||||
self.name_to_id
|
||||
.get(name)
|
||||
.and_then(|id| self.symbols.get(id))
|
||||
}
|
||||
|
||||
/// Gets all [`Symbol`]s in a module.
|
||||
#[must_use] pub fn get_module_symbols(&self, module_id: &ModuleId) -> Vec<&Symbol> {
|
||||
#[must_use]
|
||||
pub fn get_module_symbols(&self, module_id: &ModuleId) -> Vec<&Symbol> {
|
||||
self.module_symbols
|
||||
.get(module_id)
|
||||
.map(|ids| ids.iter().filter_map(|id| self.symbols.get(id)).collect())
|
||||
@@ -83,7 +85,8 @@ impl SymbolTable {
|
||||
}
|
||||
|
||||
/// Gets all the public symbols.
|
||||
#[must_use] pub fn get_public_symbols(&self) -> Vec<&Symbol> {
|
||||
#[must_use]
|
||||
pub fn get_public_symbols(&self) -> Vec<&Symbol> {
|
||||
self.symbols
|
||||
.values()
|
||||
.filter(|sym| matches!(sym.visibility, Visibility::Public))
|
||||
@@ -104,12 +107,10 @@ impl SymbolTable {
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
Err(AssembleError::new_other_error(
|
||||
crate::error::AssembleErrorKind::IO(std::io::Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
"Symbol not found",
|
||||
)),
|
||||
))
|
||||
Err(
|
||||
std::io::Error::new(std::io::ErrorKind::NotFound, "Symbol not found")
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ pub mod logging;
|
||||
|
||||
use std::io::Write;
|
||||
|
||||
pub fn input(prompt: &str) -> String {
|
||||
pub fn _input(prompt: &str) -> String {
|
||||
print!("{prompt}\n > ");
|
||||
std::io::stdout().flush().expect("Failed to flush stdout");
|
||||
let mut input = String::new();
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use assembler::compiler_engine::CompilerEngine;
|
||||
use common::prelude::Instruction;
|
||||
use egui::{Align, Context, Key, Layout, Ui};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user