Compare commits
23 Commits
11a57eab51
...
elf
| Author | SHA1 | Date | |
|---|---|---|---|
| b91207bfde | |||
| 4ac630ba02 | |||
| 85e3d443cc | |||
| 0528768947 | |||
| 21582f1297 | |||
| 6ceb35d439 | |||
| 8bb252e941 | |||
| 5317988fdd | |||
| d15e00c272 | |||
| a65dca6c5c | |||
| b8be1bd95f | |||
| f42c6d4095 | |||
| eebea82c4a | |||
| ed4fcc8495 | |||
| 40f8b1d57b | |||
| 68e459f32b | |||
| d9807b5b36 | |||
| 7cb7525484 | |||
| 7565374d5b | |||
| 9b9e153500 | |||
| 27267e3daa | |||
| fb84a6d3c3 | |||
| 4e5db58a84 |
@@ -5,3 +5,7 @@ rustc-wrapper = "sccache"
|
||||
|
||||
[future-incompat-report]
|
||||
frequency = "always"
|
||||
|
||||
[profile.profiling]
|
||||
inherits = "release"
|
||||
debug = true
|
||||
|
||||
Generated
+39
@@ -129,6 +129,15 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "android-activity"
|
||||
version = "0.6.0"
|
||||
@@ -269,6 +278,7 @@ dependencies = [
|
||||
"clap",
|
||||
"common",
|
||||
"num_cpus",
|
||||
"regex",
|
||||
"threadpool",
|
||||
"uuid",
|
||||
]
|
||||
@@ -2691,6 +2701,35 @@ dependencies = [
|
||||
"thiserror 2.0.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "renderdoc-sys"
|
||||
version = "1.1.0"
|
||||
|
||||
@@ -15,3 +15,7 @@ panic = "abort" # Cranelift does not support stack unwinds.
|
||||
lto = false
|
||||
debug = true
|
||||
incremental = false # sccache does not support caching incremental crates.
|
||||
|
||||
[profile.release]
|
||||
incremental = true
|
||||
lto = "fat"
|
||||
|
||||
@@ -16,5 +16,6 @@ path = "src/lib.rs"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
common = { path = "../common" }
|
||||
num_cpus = "1.17.0"
|
||||
regex = "1.11.1"
|
||||
threadpool = "1.8.1"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
|
||||
@@ -5,7 +5,8 @@ pub struct Args {
|
||||
/// The output format to assemble to. Currently just ELF or a flat binary.
|
||||
#[arg(value_enum)]
|
||||
output_format: Option<OutputFormat>,
|
||||
/// Whether the relocatable object files should be statically linked into a single executable or library.
|
||||
/// Whether the relocatable object files should be statically linked into a single
|
||||
/// executable or library.
|
||||
link: bool,
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,374 @@
|
||||
//! Simple compiler engine that orchestrates the entire compilation process.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
|
||||
use crate::error::{AssembleErrorKind, IoErrorKind};
|
||||
use crate::{
|
||||
context::AssemblerContext,
|
||||
error::AssembleError,
|
||||
model::module::ModuleId,
|
||||
source::{token::Token, tokeniser::Tokeniser},
|
||||
};
|
||||
|
||||
use common::instructions::Instruction;
|
||||
|
||||
/// Error type for the `CompilerEngine`
|
||||
#[derive(Debug)]
|
||||
pub enum EngineError {
|
||||
/// Assembly error during compilation
|
||||
Assembly(AssembleError),
|
||||
/// Channel communication error
|
||||
Channel(String),
|
||||
/// Other generic error
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for EngineError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Assembly(e) => write!(f, "Assembly error: {e}"),
|
||||
Self::Channel(msg) => write!(f, "Channel error: {msg}"),
|
||||
Self::Other(msg) => write!(f, "Engine error: {msg}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for EngineError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::Assembly(e) => Some(e),
|
||||
Self::Channel(_) | Self::Other(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from AssembleError
|
||||
impl From<AssembleError> for EngineError {
|
||||
fn from(error: AssembleError) -> Self {
|
||||
Self::Assembly(error)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from mpsc::SendError
|
||||
impl<T> From<mpsc::SendError<T>> for EngineError {
|
||||
fn from(error: mpsc::SendError<T>) -> Self {
|
||||
Self::Channel(format!("Send error: {error}"))
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from mpsc::RecvError
|
||||
impl From<mpsc::RecvError> for EngineError {
|
||||
fn from(error: mpsc::RecvError) -> Self {
|
||||
Self::Channel(format!("Receive error: {error}"))
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from mpsc::TryRecvError
|
||||
impl From<mpsc::TryRecvError> for EngineError {
|
||||
fn from(error: mpsc::TryRecvError) -> Self {
|
||||
Self::Channel(format!("Try receive error: {error}"))
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from String for generic errors
|
||||
impl From<String> for EngineError {
|
||||
fn from(error: String) -> Self {
|
||||
Self::Other(error)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from &str for convenience
|
||||
impl From<&str> for EngineError {
|
||||
fn from(error: &str) -> Self {
|
||||
Self::Other(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple compiler engine that orchestrates the entire compilation process.
|
||||
pub struct CompilerEngine {
|
||||
result_tx: mpsc::Sender<Result<Vec<Instruction>, EngineError>>,
|
||||
result_rx: Option<mpsc::Receiver<Result<Vec<Instruction>, EngineError>>>,
|
||||
is_running: bool,
|
||||
}
|
||||
|
||||
impl CompilerEngine {
|
||||
/// Create a new compiler engine
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
let (tx, rx) = mpsc::channel();
|
||||
Self {
|
||||
result_tx: tx,
|
||||
result_rx: Some(rx),
|
||||
is_running: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the compilation process in a separate thread
|
||||
pub fn start_compilation<P: AsRef<Path>>(&mut self, src: P) {
|
||||
if self.is_running {
|
||||
return;
|
||||
}
|
||||
|
||||
let src = src.as_ref().to_path_buf();
|
||||
let tx = self.result_tx.clone();
|
||||
|
||||
thread::spawn(move || {
|
||||
let result = assemble(&src).map_err(EngineError::from);
|
||||
let _ = tx.send(result); // Ignore send errors if receiver is dropped
|
||||
});
|
||||
|
||||
self.is_running = true;
|
||||
}
|
||||
|
||||
/// Check if compilation is complete and get the result
|
||||
pub fn try_get_result(&mut self) -> Option<Result<Vec<Instruction>, EngineError>> {
|
||||
if !self.is_running {
|
||||
return None;
|
||||
}
|
||||
|
||||
match self
|
||||
.result_rx
|
||||
.as_ref()
|
||||
.expect("result_rx should be Some while compilation is running")
|
||||
.try_recv()
|
||||
{
|
||||
Ok(result) => {
|
||||
self.is_running = false;
|
||||
Some(result)
|
||||
}
|
||||
Err(mpsc::TryRecvError::Empty) => None,
|
||||
Err(mpsc::TryRecvError::Disconnected) => {
|
||||
self.is_running = false;
|
||||
Some(Err(EngineError::Channel(
|
||||
"Compilation thread disconnected".to_string(),
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Block until compilation is complete and return the result
|
||||
pub fn wait_for_result(&mut self) -> Result<Vec<Instruction>, EngineError> {
|
||||
if !self.is_running {
|
||||
return Err(EngineError::Other("No compilation in progress".to_string()));
|
||||
}
|
||||
|
||||
let result = self
|
||||
.result_rx
|
||||
.take()
|
||||
.expect("result_rx should be Some while waiting for compilation result")
|
||||
.recv()
|
||||
.map_err(EngineError::from)?;
|
||||
|
||||
self.is_running = false;
|
||||
result
|
||||
}
|
||||
|
||||
/// Add a source file to be compiled (for compatibility with old interface)
|
||||
pub fn add_source_file<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
path: P,
|
||||
) -> Result<(), EngineError> {
|
||||
let path = path.as_ref().to_path_buf();
|
||||
|
||||
// Verify file exists
|
||||
if !path.exists() {
|
||||
return Err(EngineError::Assembly(AssembleError::new_other_error(
|
||||
AssembleErrorKind::Io(crate::error::IoError::new(
|
||||
IoErrorKind::NotFound,
|
||||
Some(format!("Source file not found: {}", path.display())),
|
||||
)),
|
||||
)));
|
||||
}
|
||||
|
||||
// For now, just validate the file exists
|
||||
// TODO: Could store multiple files for batch compilation
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compile all added source files (synchronous version)
|
||||
pub fn compile(&mut self) -> Result<CompileResult, EngineError> {
|
||||
// This is a placeholder that matches the old interface
|
||||
// For now, return empty result since we don't have a specific file to compile
|
||||
Ok(CompileResult {
|
||||
modules: Vec::new(),
|
||||
tokens: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get access to the assembler context (placeholder)
|
||||
pub fn context(&self) -> Result<&AssemblerContext, EngineError> {
|
||||
// For now, return an error since we're using the threaded approach
|
||||
// TODO: Integrate context properly when we have more compilation phases
|
||||
Err(EngineError::Other(
|
||||
"Context not available in threaded mode".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CompilerEngine {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Main assembly function that orchestrates the compilation process
|
||||
fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
|
||||
// Verify the file exists
|
||||
if !src.exists() {
|
||||
return Err(AssembleError::new_other_error(AssembleErrorKind::Io(
|
||||
crate::error::IoError::new(
|
||||
IoErrorKind::NotFound,
|
||||
Some(format!("Source file not found: {}", src.display())),
|
||||
),
|
||||
)));
|
||||
}
|
||||
|
||||
let mut modules = HashSet::new();
|
||||
let mut all_tokens = HashMap::new();
|
||||
let mut module_ids = Vec::new();
|
||||
|
||||
// Create a new assembler context for this compilation
|
||||
let context = AssemblerContext::new();
|
||||
|
||||
// Process the main file and its dependencies
|
||||
prepare_dependency(
|
||||
src,
|
||||
&mut modules,
|
||||
&mut all_tokens,
|
||||
&mut module_ids,
|
||||
&context,
|
||||
)?;
|
||||
|
||||
// Phase 2: Parse tokens into AST (placeholder for now)
|
||||
// TODO: Add parser here when implemented
|
||||
println!("Phase 2: Parsing {} modules...", module_ids.len());
|
||||
|
||||
// Phase 3: Symbol resolution (placeholder for now)
|
||||
// TODO: Add symbol resolution here when implemented
|
||||
println!("Phase 3: Resolving symbols...");
|
||||
|
||||
// Phase 4: Code generation (placeholder for now)
|
||||
// TODO: Add code generation here when implemented
|
||||
println!("Phase 4: Generating code...");
|
||||
|
||||
// For now, return empty instructions since we don't have the full pipeline yet
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
/// Prepare a dependency (file) for compilation
|
||||
fn prepare_dependency(
|
||||
path: &Path,
|
||||
modules: &mut HashSet<u64>,
|
||||
all_tokens: &mut HashMap<ModuleId, Vec<Token>>,
|
||||
module_ids: &mut Vec<ModuleId>,
|
||||
context: &AssemblerContext,
|
||||
) -> Result<(), AssembleError> {
|
||||
let filename = path.file_name().and_then(|n| n.to_str()).ok_or_else(|| {
|
||||
AssembleError::new_other_error(AssembleErrorKind::Io(crate::error::IoError::new(
|
||||
IoErrorKind::InvalidData,
|
||||
Some("Failed to get file name from path".to_string()),
|
||||
)))
|
||||
})?;
|
||||
|
||||
// Calculate a simple hash for the file (similar to quick_hash)
|
||||
let file_hash = calculate_file_hash(path);
|
||||
|
||||
// Skip if we've already processed this module
|
||||
if modules.contains(&file_hash) {
|
||||
return Ok(());
|
||||
}
|
||||
modules.insert(file_hash);
|
||||
|
||||
if let Ok(canonical_path) = path.canonicalize() {
|
||||
println!("Building {} [{}]", filename, canonical_path.display());
|
||||
}
|
||||
|
||||
// Phase 1: Tokenize the file
|
||||
println!("Tokenising {filename}");
|
||||
let tokeniser = Tokeniser::new(path, context)?;
|
||||
let tokens = tokeniser.tokenise()?;
|
||||
|
||||
// Get the module ID that was registered during tokenization
|
||||
let module_id = get_module_id_for_file(path, context)?;
|
||||
|
||||
all_tokens.insert(module_id, tokens);
|
||||
module_ids.push(module_id);
|
||||
|
||||
// TODO: Parse tokens to find dependencies (.include directives, etc.)
|
||||
// For now, we'll just process the single file
|
||||
println!("Resolving dependencies for {filename}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Calculate a simple hash for a file path (similar to the old `quick_hash`)
|
||||
fn calculate_file_hash(path: &Path) -> u64 {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
if let Ok(canonical) = path.canonicalize() {
|
||||
canonical.hash(&mut hasher);
|
||||
} else {
|
||||
path.hash(&mut hasher);
|
||||
}
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
/// Get the module ID for a given source file
|
||||
fn get_module_id_for_file(
|
||||
file_path: &Path,
|
||||
context: &AssemblerContext,
|
||||
) -> Result<ModuleId, AssembleError> {
|
||||
{
|
||||
let registry = context.module_registry.read()?;
|
||||
|
||||
// Find module by path.
|
||||
for module in registry.modules() {
|
||||
if module.path == file_path {
|
||||
return Ok(module.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(AssembleError::new_other_error(AssembleErrorKind::Io(
|
||||
crate::error::IoError::new(
|
||||
IoErrorKind::NotFound,
|
||||
Some(format!(
|
||||
"Module not found for file: {}",
|
||||
file_path.display()
|
||||
)),
|
||||
),
|
||||
)))
|
||||
}
|
||||
|
||||
/// Result of compilation. This is useless at present but compiles.
|
||||
#[derive(Debug)]
|
||||
pub struct CompileResult {
|
||||
pub modules: Vec<ModuleId>,
|
||||
pub tokens: HashMap<ModuleId, Vec<Token>>,
|
||||
}
|
||||
|
||||
impl CompileResult {
|
||||
/// Get tokens for a specific module
|
||||
#[must_use]
|
||||
pub fn get_tokens(&self, module_id: &ModuleId) -> Option<&Vec<Token>> {
|
||||
self.tokens.get(module_id)
|
||||
}
|
||||
|
||||
/// Get all module IDs
|
||||
#[must_use]
|
||||
pub fn module_ids(&self) -> &[ModuleId] {
|
||||
&self.modules
|
||||
}
|
||||
|
||||
/// Get total number of tokens across all modules
|
||||
#[must_use]
|
||||
pub fn total_tokens(&self) -> usize {
|
||||
self.tokens.values().map(std::vec::Vec::len).sum()
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,8 @@ impl Default for AssemblerContext {
|
||||
}
|
||||
|
||||
impl AssemblerContext {
|
||||
#[must_use] pub fn new() -> Self {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
symbol_table: RwLock::new(SymbolTable::new()),
|
||||
module_registry: RwLock::new(ModuleRegistry::new()),
|
||||
|
||||
+205
-12
@@ -13,6 +13,9 @@ pub struct AssembleError {
|
||||
source_info: Option<SourceInfo>,
|
||||
/// The type of assembly error that occurred.
|
||||
kind: AssembleErrorKind,
|
||||
/// Whether context should be added to errors being printed. This might get changed
|
||||
/// to Verbosity in the future.
|
||||
display_quietly: bool,
|
||||
}
|
||||
|
||||
impl AssembleError {
|
||||
@@ -24,6 +27,7 @@ impl AssembleError {
|
||||
Self {
|
||||
source_info: Some(source_info),
|
||||
kind,
|
||||
display_quietly: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,16 +36,86 @@ impl AssembleError {
|
||||
Self {
|
||||
source_info: None,
|
||||
kind,
|
||||
display_quietly: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Prints a parser error to the screen.
|
||||
fn print_parser_error(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
parse_error: &ParserError,
|
||||
) -> std::fmt::Result {
|
||||
let Some(source_info) = &self.source_info else {
|
||||
write!(
|
||||
f,
|
||||
"parser error thrown with no source information. Error: {parse_error}"
|
||||
)?;
|
||||
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
writeln!(f, "parser error of type `{parse_error}`.\n")?;
|
||||
|
||||
// Prints out the context for our error.
|
||||
if !self.display_quietly {
|
||||
source_info.print_context_with_underline().map_err(|e| {
|
||||
_ = writeln!(f, "print context error: {e}");
|
||||
|
||||
std::fmt::Error {}
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Prints a tokeniser error to the screen.
|
||||
fn print_tokeniser_error(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
err: &TokeniserError,
|
||||
) -> std::fmt::Result {
|
||||
let Some(source_info) = &self.source_info else {
|
||||
write!(
|
||||
f,
|
||||
"Tokeniser error thrown with no source information. Error: {err}"
|
||||
)?;
|
||||
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
writeln!(f, "tokeniser error of type `{err}`.\n")?;
|
||||
|
||||
// Prints out the context for our error.
|
||||
source_info.print_context_with_underline().map_err(|e| {
|
||||
_ = writeln!(f, "Print context error: {e}");
|
||||
|
||||
std::fmt::Error {}
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for AssembleError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(info) = &self.source_info {
|
||||
write!(f, "at {info}")?;
|
||||
write!(f, "At {info}, got ")?;
|
||||
|
||||
match &self.kind {
|
||||
AssembleErrorKind::Parser(err) => self.print_parser_error(f, err)?,
|
||||
AssembleErrorKind::Tokeniser(err) => {
|
||||
self.print_tokeniser_error(f, err)?;
|
||||
}
|
||||
_ => write!(f, "{}", self.kind)?,
|
||||
}
|
||||
|
||||
writeln!(f)?;
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Handle errors without SourceInfo.
|
||||
write!(f, "{}", self.kind)?;
|
||||
|
||||
Ok(())
|
||||
@@ -51,20 +125,145 @@ impl Display for AssembleError {
|
||||
/// Marker trait.
|
||||
impl std::error::Error for AssembleError {}
|
||||
|
||||
/// Different types of errors that may occur when assembling a set of input source files.
|
||||
#[derive(Debug, Clone)]
|
||||
#[non_exhaustive]
|
||||
#[derive(Debug)]
|
||||
pub enum AssembleErrorKind {
|
||||
/// Usually unexpected I/O errors. Not normally recoverable.
|
||||
IO(std::io::Error),
|
||||
Io(IoError),
|
||||
/// Errors emitted from the [`Tokeniser`].
|
||||
Tokenise(TokeniserError),
|
||||
Tokeniser(TokeniserError),
|
||||
Parser(ParserError),
|
||||
Symbol(SymbolError),
|
||||
Codegen(CodegenError),
|
||||
Threading(ThreadingError),
|
||||
/// Returned for code where the functionality has not yet been implemented but we
|
||||
/// don't want the program to panic.
|
||||
Unimplemented(&'static str),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ParserError {
|
||||
UnexpectedToken,
|
||||
MissingOperand,
|
||||
InvalidInstruction,
|
||||
MissingLabel,
|
||||
DuplicateLabel,
|
||||
}
|
||||
|
||||
impl Display for ParserError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::UnexpectedToken => write!(f, "unexpected token"),
|
||||
Self::MissingOperand => write!(f, "missing operand"),
|
||||
Self::InvalidInstruction => write!(f, "invalid instruction"),
|
||||
Self::MissingLabel => write!(f, "missing label"),
|
||||
Self::DuplicateLabel => write!(f, "duplicate label"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SymbolError {
|
||||
Undefined,
|
||||
Duplicate,
|
||||
CircularDependency,
|
||||
InvalidReference,
|
||||
}
|
||||
|
||||
impl Display for SymbolError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Undefined => write!(f, "undefined symbol"),
|
||||
Self::Duplicate => write!(f, "duplicate symbol"),
|
||||
Self::CircularDependency => write!(f, "circular dependency"),
|
||||
Self::InvalidReference => write!(f, "invalid reference"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CodegenError {
|
||||
InvalidOperand,
|
||||
OutOfRange,
|
||||
UnsupportedInstruction,
|
||||
}
|
||||
|
||||
impl Display for CodegenError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::InvalidOperand => write!(f, "invalid operand"),
|
||||
Self::OutOfRange => write!(f, "out of range"),
|
||||
Self::UnsupportedInstruction => write!(f, "unsupported instruction"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ThreadingError {
|
||||
LockFailed,
|
||||
ThreadPanic,
|
||||
}
|
||||
|
||||
impl Display for ThreadingError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::LockFailed => write!(f, "lock failed"),
|
||||
Self::ThreadPanic => write!(f, "thread panic"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IoError {
|
||||
msg: Option<String>,
|
||||
kind: IoErrorKind,
|
||||
}
|
||||
|
||||
impl IoError {
|
||||
#[must_use]
|
||||
pub const fn new(kind: IoErrorKind, msg: Option<String>) -> Self {
|
||||
Self { msg, kind }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum IoErrorKind {
|
||||
NotFound,
|
||||
PermissionDenied,
|
||||
InvalidData,
|
||||
Other,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for IoErrorKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::NotFound => write!(f, "file not found"),
|
||||
Self::PermissionDenied => write!(f, "permission denied"),
|
||||
Self::InvalidData => write!(f, "invalid data"),
|
||||
Self::Other => write!(f, "other I/O error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for IoError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.kind)?;
|
||||
|
||||
if let Some(msg) = &self.msg {
|
||||
write!(f, ", \"{msg}\"")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for AssembleErrorKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Tokenise(why) => write!(f, "tokeniser error: {why}"),
|
||||
Self::Tokeniser(why) => write!(f, "tokeniser error: {why}"),
|
||||
Self::Unimplemented(why) => write!(f, "used unimplemented feature: {why}"),
|
||||
Self::Io(why) => write!(f, "problem occurred with I/O: {why}"),
|
||||
#[allow(unreachable_patterns)]
|
||||
_ => write!(
|
||||
f,
|
||||
"unhandled error type in Display implementation! See error.rs!"
|
||||
@@ -73,10 +272,4 @@ impl Display for AssembleErrorKind {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for AssembleErrorKind {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
Self::IO(err)
|
||||
}
|
||||
}
|
||||
|
||||
pub mod conversions;
|
||||
|
||||
@@ -1,7 +1,67 @@
|
||||
use crate::error::AssembleError;
|
||||
use std::{
|
||||
io::ErrorKind,
|
||||
sync::{PoisonError, RwLockReadGuard, RwLockWriteGuard},
|
||||
};
|
||||
|
||||
use crate::error::{AssembleError, IoError, IoErrorKind};
|
||||
|
||||
use super::{AssembleErrorKind, ThreadingError};
|
||||
|
||||
impl From<std::io::Error> for IoError {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
let kind = match err.kind() {
|
||||
ErrorKind::NotFound => IoErrorKind::NotFound,
|
||||
ErrorKind::PermissionDenied => IoErrorKind::PermissionDenied,
|
||||
ErrorKind::InvalidData => IoErrorKind::InvalidData,
|
||||
_ => IoErrorKind::Other,
|
||||
};
|
||||
|
||||
let msg = err.to_string();
|
||||
|
||||
Self::new(kind, Some(msg))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for AssembleError {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
Self::new_other_error(err.into())
|
||||
Self::new_other_error(AssembleErrorKind::Io(err.into()))
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Maybe attempt recovery? To be honest we don't want any threads to panic at all,
|
||||
// or we want them all to panic spectacularly.
|
||||
impl<T> From<PoisonError<RwLockReadGuard<'_, T>>> for AssembleError {
|
||||
fn from(err: PoisonError<RwLockReadGuard<'_, T>>) -> Self {
|
||||
Self::new_other_error(AssembleErrorKind::Threading(err.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<PoisonError<RwLockReadGuard<'_, T>>> for ThreadingError {
|
||||
fn from(_err: PoisonError<RwLockReadGuard<'_, T>>) -> Self {
|
||||
Self::LockFailed
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<PoisonError<RwLockWriteGuard<'_, T>>> for AssembleError {
|
||||
fn from(err: PoisonError<RwLockWriteGuard<'_, T>>) -> Self {
|
||||
Self::new_other_error(AssembleErrorKind::Threading(err.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<PoisonError<RwLockWriteGuard<'_, T>>> for ThreadingError {
|
||||
fn from(_err: PoisonError<RwLockWriteGuard<'_, T>>) -> Self {
|
||||
Self::LockFailed
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::fmt::Error> for AssembleError {
|
||||
fn from(err: std::fmt::Error) -> Self {
|
||||
IoError::new(IoErrorKind::Other, Some(err.to_string())).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<IoError> for AssembleError {
|
||||
fn from(err: IoError) -> Self {
|
||||
Self::new_other_error(AssembleErrorKind::Io(err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
)]
|
||||
|
||||
pub mod args;
|
||||
pub mod image_builder;
|
||||
// pub mod tooling;
|
||||
pub mod compiler_engine;
|
||||
pub mod context;
|
||||
pub mod error;
|
||||
pub mod model;
|
||||
@@ -23,11 +23,7 @@ pub mod symtab;
|
||||
|
||||
mod util;
|
||||
|
||||
pub mod prelude {
|
||||
pub use crate::image_builder;
|
||||
// pub use crate::tooling::brainf;
|
||||
// pub use crate::tooling::project;
|
||||
}
|
||||
// pub mod prelude {}
|
||||
|
||||
use num_cpus as _;
|
||||
use threadpool as _;
|
||||
|
||||
+30
-1
@@ -1,3 +1,10 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use assembler::{
|
||||
error::{AssembleError, AssembleErrorKind, ParserError},
|
||||
model::module::Module,
|
||||
source::{source_info::SourceInfo, token::TokenType, tokeniser::Tokeniser},
|
||||
};
|
||||
use common as _;
|
||||
use num_cpus as _;
|
||||
use threadpool as _;
|
||||
@@ -5,9 +12,31 @@ use threadpool as _;
|
||||
// use clap::Parser;
|
||||
// use std::{fs, io::Write, path::PathBuf};
|
||||
|
||||
fn main() {
|
||||
fn main() -> Result<(), AssembleError> {
|
||||
// // Parse command line arguments
|
||||
// let args: Vec<String> = std::env::args().collect();
|
||||
let contents = include_bytes!("../../resources/dsa/bf.dsa").to_vec();
|
||||
|
||||
let module = Arc::new(Module::new("resources/dsa/bf.dsa")?);
|
||||
let tok = Tokeniser::from_data(contents, module.clone());
|
||||
|
||||
let ts = tok
|
||||
.tokenise()?
|
||||
.into_iter()
|
||||
.filter(|t| !matches!(t.token_type, TokenType::Eof | TokenType::Newline));
|
||||
|
||||
for t in ts {
|
||||
t.source_info.print_context_with_underline()?;
|
||||
}
|
||||
|
||||
let test_error: AssembleError = AssembleError::new_source_error(
|
||||
SourceInfo::new(45, module.clone(), 4..7),
|
||||
AssembleErrorKind::Parser(ParserError::InvalidInstruction),
|
||||
);
|
||||
|
||||
eprintln!("\n\n{test_error}");
|
||||
|
||||
Ok(())
|
||||
|
||||
// let _clap_args = assembler::args::Args::parse();
|
||||
|
||||
|
||||
@@ -4,17 +4,29 @@
|
||||
//!
|
||||
//! They have unique identifiers in the form of UUIDs.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use regex::Regex;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::model::module_registry::ModuleRegistry;
|
||||
use crate::{
|
||||
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
|
||||
model::module_registry::ModuleRegistry,
|
||||
};
|
||||
|
||||
/// The ID for a module. A tuple struct for type safety.
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct ModuleId(Uuid);
|
||||
|
||||
impl ModuleId {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self(Uuid::new_v4())
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn from_module(module: &Module) -> Self {
|
||||
module.id
|
||||
@@ -22,7 +34,7 @@ impl ModuleId {
|
||||
|
||||
/// Convenience method to get the [`Module`] from a [`ModuleId`].
|
||||
#[must_use]
|
||||
pub fn to_module<'m>(&self, registry: &'m ModuleRegistry) -> Option<&'m Module> {
|
||||
pub fn to_module<'m>(&self, registry: &'m ModuleRegistry) -> Option<&'m Arc<Module>> {
|
||||
registry.get(self)
|
||||
}
|
||||
|
||||
@@ -33,6 +45,12 @@ impl ModuleId {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ModuleId {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ModuleId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
@@ -40,7 +58,7 @@ impl std::fmt::Display for ModuleId {
|
||||
}
|
||||
|
||||
/// A single source file or compilation unit. Stores its own symbol table.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Module {
|
||||
/// The name of the module. This is typically the name of the file, less the `.dsa`
|
||||
/// extension.
|
||||
@@ -58,11 +76,35 @@ impl std::hash::Hash for Module {
|
||||
}
|
||||
|
||||
impl Module {
|
||||
pub fn new<P: AsRef<Path>>(name: String, path: P) -> Self {
|
||||
Self {
|
||||
name,
|
||||
path: path.as_ref().to_path_buf(),
|
||||
id: ModuleId(Uuid::new_v4()),
|
||||
}
|
||||
pub fn new<P: AsRef<Path>>(p: P) -> Result<Self, AssembleError> {
|
||||
let path = p.as_ref().to_path_buf();
|
||||
let name = Self::extract_module_name(&path)?;
|
||||
let id = ModuleId::new();
|
||||
|
||||
Ok(Self { name, path, id })
|
||||
}
|
||||
|
||||
/// Gets the name for a module from the path.
|
||||
fn extract_module_name<P: AsRef<Path>>(path: P) -> Result<String, AssembleError> {
|
||||
let extensions_regex = Regex::new(".(dsa|S|asm)$")
|
||||
.expect("For some reason the regular expression failed to compile!");
|
||||
let module_name = path
|
||||
.as_ref()
|
||||
.file_name()
|
||||
.map(|f| f.to_string_lossy())
|
||||
.ok_or_else(|| {
|
||||
AssembleError::new_other_error(AssembleErrorKind::Io(IoError::new(
|
||||
IoErrorKind::InvalidData,
|
||||
Some(
|
||||
"the filename couldn't be extracted, is it valid UTF-8?"
|
||||
.to_string(),
|
||||
),
|
||||
)))
|
||||
})?;
|
||||
|
||||
// Strip any file extensions given. We don't care for now.
|
||||
let out = extensions_regex.replace(&module_name, "");
|
||||
|
||||
Ok(out.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
//! This module contains the code for the module registry. This is a singleton storing all
|
||||
//! the modules being assembled.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use super::module::{Module, ModuleId};
|
||||
|
||||
/// Stores all the [`Module`]'s to be assembled.
|
||||
pub struct ModuleRegistry {
|
||||
modules: HashMap<ModuleId, Module>,
|
||||
modules: HashMap<ModuleId, Arc<Module>>,
|
||||
}
|
||||
|
||||
impl Default for ModuleRegistry {
|
||||
@@ -17,26 +17,28 @@ impl Default for ModuleRegistry {
|
||||
}
|
||||
|
||||
impl ModuleRegistry {
|
||||
#[must_use] pub fn new() -> Self {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
modules: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets a [`Module`] by ID.
|
||||
#[must_use] pub fn get(&self, module_id: &ModuleId) -> Option<&Module> {
|
||||
#[must_use]
|
||||
pub fn get(&self, module_id: &ModuleId) -> Option<&Arc<Module>> {
|
||||
self.modules.get(module_id)
|
||||
}
|
||||
|
||||
/// Adds a [`Module`] and returns its [`ModuleId`].
|
||||
pub fn add(&mut self, module: Module) -> ModuleId {
|
||||
pub fn add(&mut self, module: Arc<Module>) -> ModuleId {
|
||||
let id = module.id;
|
||||
self.modules.insert(id, module);
|
||||
id
|
||||
}
|
||||
|
||||
/// Returns an iterator of modules.
|
||||
pub fn modules(&self) -> impl Iterator<Item = &Module> {
|
||||
pub fn modules(&self) -> impl Iterator<Item = &Arc<Module>> {
|
||||
self.modules.values()
|
||||
}
|
||||
}
|
||||
|
||||
+12
-1
@@ -1,12 +1,18 @@
|
||||
//! This module contains anything within the first stage of assembly, i.e. the
|
||||
//! tokenisation stage, or utility functions for reading input files.
|
||||
|
||||
use std::path::Path;
|
||||
use std::{
|
||||
io::{BufRead, Lines},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use crate::error::AssembleError;
|
||||
|
||||
pub mod lines;
|
||||
pub mod opcode;
|
||||
pub mod source_info;
|
||||
pub mod token;
|
||||
pub mod token_info;
|
||||
pub mod tokeniser;
|
||||
|
||||
/// Attempts to load and open a source file, returning a [`Vec<u8>`] or an
|
||||
@@ -16,3 +22,8 @@ pub fn load_source_bytes<P: AsRef<Path>>(p: P) -> Result<Vec<u8>, AssembleError>
|
||||
|
||||
Ok(std::fs::read(path)?)
|
||||
}
|
||||
|
||||
/// Get the lines from a [`BufReader`].
|
||||
pub fn reader_lines<R: BufRead>(rdr: R) -> Lines<R> {
|
||||
rdr.lines()
|
||||
}
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
//! Enhanced lines iterator that tracks line numbers and character positions.
|
||||
|
||||
use std::io::{BufRead, BufReader, Cursor};
|
||||
|
||||
use crate::error::AssembleError;
|
||||
|
||||
/// Iterator that yields lines with their line numbers and character spans.
|
||||
pub struct LinesWithSpans<R: BufRead> {
|
||||
reader: R,
|
||||
line_number: usize,
|
||||
total_chars: usize,
|
||||
buffer: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LineSpan {
|
||||
/// The line number.
|
||||
pub line_number: usize,
|
||||
/// The contents of the line.
|
||||
pub content: String,
|
||||
/// Character offset from start of file.
|
||||
pub start_char: usize,
|
||||
/// End character offset (exclusive).
|
||||
pub end_char: usize,
|
||||
}
|
||||
|
||||
impl<R: BufRead> LinesWithSpans<R> {
|
||||
pub const fn new(reader: R) -> Self {
|
||||
Self {
|
||||
reader,
|
||||
line_number: 0,
|
||||
total_chars: 0,
|
||||
buffer: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: BufRead> Iterator for LinesWithSpans<R> {
|
||||
type Item = Result<LineSpan, AssembleError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.buffer.clear();
|
||||
|
||||
match self.reader.read_line(&mut self.buffer) {
|
||||
Ok(0) => None, // EOF
|
||||
Ok(bytes_read) => {
|
||||
self.line_number += 1;
|
||||
let start_char = self.total_chars;
|
||||
self.total_chars += bytes_read;
|
||||
|
||||
// Remove trailing newline for cleaner processing
|
||||
let content = if self.buffer.ends_with('\n') {
|
||||
self.buffer[..self.buffer.len() - 1].to_string()
|
||||
} else {
|
||||
self.buffer.clone()
|
||||
};
|
||||
|
||||
Some(Ok(LineSpan {
|
||||
line_number: self.line_number,
|
||||
content,
|
||||
start_char,
|
||||
end_char: self.total_chars,
|
||||
}))
|
||||
}
|
||||
Err(e) => Some(Err(e.into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to create lines iterator from data.
|
||||
#[must_use]
|
||||
pub fn lines_with_spans(data: &[u8]) -> LinesWithSpans<BufReader<Cursor<&[u8]>>> {
|
||||
let cursor = Cursor::new(data);
|
||||
let reader = BufReader::new(cursor);
|
||||
LinesWithSpans::new(reader)
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
//! This module contains instructions for tokenisation.
|
||||
|
||||
use std::{fmt, str::FromStr};
|
||||
|
||||
use common::prelude::{ITypeArgs, Instruction, Interrupt, RTypeArgs};
|
||||
|
||||
use crate::{
|
||||
error::{AssembleError, AssembleErrorKind},
|
||||
source::source_info::SourceInfo,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum Opcode {
|
||||
Nop,
|
||||
Mov,
|
||||
Movs,
|
||||
Ldb,
|
||||
Ldbs,
|
||||
Ldh,
|
||||
Ldhs,
|
||||
Ldw,
|
||||
Stb,
|
||||
Sth,
|
||||
Stw,
|
||||
Lli,
|
||||
Lui,
|
||||
Jmp,
|
||||
Jeq,
|
||||
Jne,
|
||||
Jgt,
|
||||
Jge,
|
||||
Jlt,
|
||||
Jle,
|
||||
Cmp,
|
||||
Inc,
|
||||
Dec,
|
||||
Shl,
|
||||
Shr,
|
||||
Add,
|
||||
Sub,
|
||||
And,
|
||||
Or,
|
||||
Not,
|
||||
Xor,
|
||||
Nand,
|
||||
Nor,
|
||||
Xnor,
|
||||
Int,
|
||||
Irt,
|
||||
Hlt,
|
||||
AddI,
|
||||
SubI,
|
||||
|
||||
// Pseudo-instructions
|
||||
Db,
|
||||
Dh,
|
||||
Dw,
|
||||
Resb,
|
||||
Resh,
|
||||
Resw,
|
||||
Push,
|
||||
Pop,
|
||||
Pusha,
|
||||
Popa,
|
||||
Lwi,
|
||||
Call,
|
||||
Return,
|
||||
|
||||
// Meta instructions (these aren't present in the binary as instructions)
|
||||
Include,
|
||||
Data,
|
||||
Segment,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum OpcodeFromStrError {
|
||||
InvalidRegister(&'static str),
|
||||
InvalidOpcode(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for OpcodeFromStrError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::InvalidRegister(reg) => write!(f, "register does not exist: {reg}"),
|
||||
Self::InvalidOpcode(op) => write!(f, "instruction does not exist: {op}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for OpcodeFromStrError {}
|
||||
|
||||
impl Opcode {
|
||||
pub const OPCODES: &[&str] = &[
|
||||
// Real instructions (0x00-0x26)
|
||||
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw",
|
||||
"lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc",
|
||||
"dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor",
|
||||
"xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions
|
||||
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "call", "return",
|
||||
"pusha", "popa", // meta instructions
|
||||
"include",
|
||||
];
|
||||
|
||||
pub fn to_instruction(
|
||||
&self,
|
||||
source_info: SourceInfo,
|
||||
) -> Result<Instruction, AssembleError> {
|
||||
match self {
|
||||
Self::Nop => Ok(Instruction::Nop),
|
||||
Self::Mov => Ok(Instruction::Mov(RTypeArgs::default())),
|
||||
Self::Movs => Ok(Instruction::MovSigned(RTypeArgs::default())),
|
||||
Self::Ldb => Ok(Instruction::LoadByte(ITypeArgs::default())),
|
||||
Self::Ldbs => Ok(Instruction::LoadByteSigned(ITypeArgs::default())),
|
||||
Self::Ldh => Ok(Instruction::LoadHalfword(ITypeArgs::default())),
|
||||
Self::Ldhs => Ok(Instruction::LoadHalfwordSigned(ITypeArgs::default())),
|
||||
Self::Ldw => Ok(Instruction::LoadWord(ITypeArgs::default())),
|
||||
Self::Stb => Ok(Instruction::StoreByte(ITypeArgs::default())),
|
||||
Self::Sth => Ok(Instruction::StoreHalfword(ITypeArgs::default())),
|
||||
Self::Stw => Ok(Instruction::StoreWord(ITypeArgs::default())),
|
||||
Self::Lli => Ok(Instruction::LoadLowerImmediate(ITypeArgs::default())),
|
||||
Self::Lui => Ok(Instruction::LoadUpperImmediate(ITypeArgs::default())),
|
||||
Self::Jmp => Ok(Instruction::Jump(ITypeArgs::default())),
|
||||
Self::Jeq => Ok(Instruction::JumpEq(ITypeArgs::default())),
|
||||
Self::Jne => Ok(Instruction::JumpNeq(ITypeArgs::default())),
|
||||
Self::Jgt => Ok(Instruction::JumpGt(ITypeArgs::default())),
|
||||
Self::Jge => Ok(Instruction::JumpGe(ITypeArgs::default())),
|
||||
Self::Jlt => Ok(Instruction::JumpLt(ITypeArgs::default())),
|
||||
Self::Jle => Ok(Instruction::JumpLe(ITypeArgs::default())),
|
||||
Self::Cmp => Ok(Instruction::Compare(RTypeArgs::default())),
|
||||
Self::Inc => Ok(Instruction::Increment(RTypeArgs::default())),
|
||||
Self::Dec => Ok(Instruction::Decrement(RTypeArgs::default())),
|
||||
Self::Shl => Ok(Instruction::ShiftLeft(RTypeArgs::default())),
|
||||
Self::Shr => Ok(Instruction::ShiftRight(RTypeArgs::default())),
|
||||
Self::Add => Ok(Instruction::Add(RTypeArgs::default())),
|
||||
Self::Sub => Ok(Instruction::Sub(RTypeArgs::default())),
|
||||
Self::And => Ok(Instruction::And(RTypeArgs::default())),
|
||||
Self::Or => Ok(Instruction::Or(RTypeArgs::default())),
|
||||
Self::Not => Ok(Instruction::Not(RTypeArgs::default())),
|
||||
Self::Xor => Ok(Instruction::Xor(RTypeArgs::default())),
|
||||
Self::Nand => Ok(Instruction::Nand(RTypeArgs::default())),
|
||||
Self::Nor => Ok(Instruction::Nor(RTypeArgs::default())),
|
||||
Self::Xnor => Ok(Instruction::Xnor(RTypeArgs::default())),
|
||||
Self::Int => Ok(Instruction::Interrupt(Interrupt::default())),
|
||||
Self::Irt => Ok(Instruction::IntReturn),
|
||||
Self::Hlt => Ok(Instruction::Halt),
|
||||
Self::AddI => Ok(Instruction::AddImmediate(ITypeArgs::default())),
|
||||
Self::SubI => Ok(Instruction::SubImmediate(ITypeArgs::default())),
|
||||
Self::Segment => Ok(Instruction::Segment(0)),
|
||||
_ => Err(AssembleError::new_source_error(
|
||||
source_info,
|
||||
AssembleErrorKind::Unimplemented(
|
||||
"Opcode::to_instruction called on an instruction that does not exist in common.",
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn to_opcode_value(&self) -> Option<u8> {
|
||||
match self {
|
||||
Self::Nop => Some(0x00),
|
||||
Self::Mov => Some(0x01),
|
||||
Self::Movs => Some(0x02),
|
||||
Self::Ldb => Some(0x03),
|
||||
Self::Ldbs => Some(0x04),
|
||||
Self::Ldh => Some(0x05),
|
||||
Self::Ldhs => Some(0x06),
|
||||
Self::Ldw => Some(0x07),
|
||||
Self::Stb => Some(0x08),
|
||||
Self::Sth => Some(0x09),
|
||||
Self::Stw => Some(0x0A),
|
||||
Self::Lli => Some(0x0B),
|
||||
Self::Lui => Some(0x0C),
|
||||
Self::Jmp => Some(0x0D),
|
||||
Self::Jeq => Some(0x0E),
|
||||
Self::Jne => Some(0x0F),
|
||||
Self::Jgt => Some(0x10),
|
||||
Self::Jge => Some(0x11),
|
||||
Self::Jlt => Some(0x12),
|
||||
Self::Jle => Some(0x13),
|
||||
Self::Cmp => Some(0x14),
|
||||
Self::Inc => Some(0x15),
|
||||
Self::Dec => Some(0x16),
|
||||
Self::Shl => Some(0x17),
|
||||
Self::Shr => Some(0x18),
|
||||
Self::Add => Some(0x19),
|
||||
Self::Sub => Some(0x1A),
|
||||
Self::And => Some(0x1B),
|
||||
Self::Or => Some(0x1C),
|
||||
Self::Not => Some(0x1D),
|
||||
Self::Xor => Some(0x1E),
|
||||
Self::Nand => Some(0x1F),
|
||||
Self::Nor => Some(0x20),
|
||||
Self::Xnor => Some(0x21),
|
||||
Self::Int => Some(0x22),
|
||||
Self::Irt => Some(0x23),
|
||||
Self::Hlt => Some(0x24),
|
||||
Self::AddI => Some(0x25),
|
||||
Self::SubI => Some(0x26),
|
||||
// TODO: Maybe recombine pseudos?
|
||||
Self::Segment => Some(0x27),
|
||||
// Pseudo-instructions don't have opcode values
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn is_pseudo_instruction(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::Db
|
||||
| Self::Dh
|
||||
| Self::Dw
|
||||
| Self::Resb
|
||||
| Self::Resh
|
||||
| Self::Resw
|
||||
| Self::Push
|
||||
| Self::Pop
|
||||
| Self::Lwi
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Opcode {
|
||||
type Err = OpcodeFromStrError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"nop" => Ok(Self::Nop),
|
||||
"mov" => Ok(Self::Mov),
|
||||
"movs" => Ok(Self::Movs),
|
||||
"ldb" => Ok(Self::Ldb),
|
||||
"ldbs" => Ok(Self::Ldbs),
|
||||
"ldh" => Ok(Self::Ldh),
|
||||
"ldhs" => Ok(Self::Ldhs),
|
||||
"ldw" => Ok(Self::Ldw),
|
||||
"stb" => Ok(Self::Stb),
|
||||
"sth" => Ok(Self::Sth),
|
||||
"stw" => Ok(Self::Stw),
|
||||
"lli" => Ok(Self::Lli),
|
||||
"lui" => Ok(Self::Lui),
|
||||
"jmp" => Ok(Self::Jmp),
|
||||
"jeq" => Ok(Self::Jeq),
|
||||
"jne" => Ok(Self::Jne),
|
||||
"jgt" => Ok(Self::Jgt),
|
||||
"jge" => Ok(Self::Jge),
|
||||
"jlt" => Ok(Self::Jlt),
|
||||
"jle" => Ok(Self::Jle),
|
||||
"cmp" => Ok(Self::Cmp),
|
||||
"inc" => Ok(Self::Inc),
|
||||
"dec" => Ok(Self::Dec),
|
||||
"shl" => Ok(Self::Shl),
|
||||
"shr" => Ok(Self::Shr),
|
||||
"add" => Ok(Self::Add),
|
||||
"sub" => Ok(Self::Sub),
|
||||
"and" => Ok(Self::And),
|
||||
"or" => Ok(Self::Or),
|
||||
"not" => Ok(Self::Not),
|
||||
"xor" => Ok(Self::Xor),
|
||||
"nand" => Ok(Self::Nand),
|
||||
"nor" => Ok(Self::Nor),
|
||||
"xnor" => Ok(Self::Xnor),
|
||||
"int" => Ok(Self::Int),
|
||||
"irt" => Ok(Self::Irt),
|
||||
"hlt" => Ok(Self::Hlt),
|
||||
"addi" => Ok(Self::AddI),
|
||||
"subi" => Ok(Self::SubI),
|
||||
"db" => Ok(Self::Db),
|
||||
"dh" => Ok(Self::Dh),
|
||||
"dw" => Ok(Self::Dw),
|
||||
"resb" => Ok(Self::Resb),
|
||||
"resh" => Ok(Self::Resh),
|
||||
"resw" => Ok(Self::Resw),
|
||||
"push" => Ok(Self::Push),
|
||||
"pop" => Ok(Self::Pop),
|
||||
"lwi" => Ok(Self::Lwi),
|
||||
"include" => Ok(Self::Include),
|
||||
"call" => Ok(Self::Call),
|
||||
"return" => Ok(Self::Return),
|
||||
"pusha" => Ok(Self::Pusha),
|
||||
"popa" => Ok(Self::Popa),
|
||||
_ => Err(OpcodeFromStrError::InvalidOpcode(s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Opcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Nop => write!(f, "nop"),
|
||||
Self::Mov => write!(f, "mov"),
|
||||
Self::Movs => write!(f, "movs"),
|
||||
Self::Ldb => write!(f, "ldb"),
|
||||
Self::Ldbs => write!(f, "ldbs"),
|
||||
Self::Ldh => write!(f, "ldh"),
|
||||
Self::Ldhs => write!(f, "ldhs"),
|
||||
Self::Ldw => write!(f, "ldw"),
|
||||
Self::Stb => write!(f, "stb"),
|
||||
Self::Sth => write!(f, "sth"),
|
||||
Self::Stw => write!(f, "stw"),
|
||||
Self::Lli => write!(f, "lli"),
|
||||
Self::Lui => write!(f, "lui"),
|
||||
Self::Jmp => write!(f, "jmp"),
|
||||
Self::Jeq => write!(f, "jeq"),
|
||||
Self::Jne => write!(f, "jne"),
|
||||
Self::Jgt => write!(f, "jgt"),
|
||||
Self::Jge => write!(f, "jge"),
|
||||
Self::Jlt => write!(f, "jlt"),
|
||||
Self::Jle => write!(f, "jle"),
|
||||
Self::Cmp => write!(f, "cmp"),
|
||||
Self::Inc => write!(f, "inc"),
|
||||
Self::Dec => write!(f, "dec"),
|
||||
Self::Shl => write!(f, "shl"),
|
||||
Self::Shr => write!(f, "shr"),
|
||||
Self::Add => write!(f, "add"),
|
||||
Self::Sub => write!(f, "sub"),
|
||||
Self::And => write!(f, "and"),
|
||||
Self::Or => write!(f, "or"),
|
||||
Self::Not => write!(f, "not"),
|
||||
Self::Xor => write!(f, "xor"),
|
||||
Self::Nand => write!(f, "nand"),
|
||||
Self::Nor => write!(f, "nor"),
|
||||
Self::Xnor => write!(f, "xnor"),
|
||||
Self::Int => write!(f, "int"),
|
||||
Self::Irt => write!(f, "irt"),
|
||||
Self::Hlt => write!(f, "hlt"),
|
||||
Self::AddI => write!(f, "addi"),
|
||||
Self::SubI => write!(f, "subi"),
|
||||
Self::Db => write!(f, "db"),
|
||||
Self::Dh => write!(f, "dh"),
|
||||
Self::Dw => write!(f, "dw"),
|
||||
Self::Resb => write!(f, "resb"),
|
||||
Self::Resh => write!(f, "resh"),
|
||||
Self::Resw => write!(f, "resw"),
|
||||
Self::Push => write!(f, "push"),
|
||||
Self::Pop => write!(f, "pop"),
|
||||
Self::Lwi => write!(f, "lwi"),
|
||||
Self::Call => write!(f, "call"),
|
||||
Self::Return => write!(f, "return"),
|
||||
Self::Pusha => write!(f, "pusha"),
|
||||
Self::Popa => write!(f, "popa"),
|
||||
|
||||
// meta instructions
|
||||
Self::Include => write!(f, "include"),
|
||||
Self::Data => write!(f, "data"),
|
||||
Self::Segment => write!(f, "[SEGMENT]"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
//! This module contains code for handling pseudo opcodes.
|
||||
|
||||
/// Pseudo instructions that cannot simply be lowered to ISA instructions.
|
||||
pub enum PseudoOpcode {}
|
||||
@@ -4,22 +4,101 @@
|
||||
//! This will likely be attached to a [`Token`] which will in turn be attached to an AST
|
||||
//! [`Node`].
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::{
|
||||
fmt::{Display, Write},
|
||||
fs::File,
|
||||
io::BufReader,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use crate::model::module::Module;
|
||||
use crate::{
|
||||
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
|
||||
model::module::Module,
|
||||
source::lines::LinesWithSpans,
|
||||
};
|
||||
|
||||
/// Information on where the token is within the source.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SourceInfo {
|
||||
/// The line number within the source file underpinned by `module_id`.
|
||||
pub line_no: usize,
|
||||
pub module: Module,
|
||||
pub line_number: usize,
|
||||
/// The [`Module`] the source code is associated with.
|
||||
pub module: Arc<Module>,
|
||||
/// The indexes where this token may be found (line-local).
|
||||
pub span: std::ops::Range<usize>,
|
||||
}
|
||||
|
||||
impl Display for SourceInfo {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.module.name)
|
||||
write!(
|
||||
f,
|
||||
"{}:{}:{}",
|
||||
self.module.path.display(),
|
||||
self.line_number,
|
||||
self.span.start + 1
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceInfo {
|
||||
#[must_use]
|
||||
pub const fn new(
|
||||
line_no: usize,
|
||||
module: Arc<Module>,
|
||||
span: std::ops::Range<usize>,
|
||||
) -> Self {
|
||||
Self {
|
||||
line_number: line_no,
|
||||
module,
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
/// Prints out where in the source code the error originated with an underline similar
|
||||
/// to what rustc does.
|
||||
pub fn print_context_with_underline(&self) -> Result<(), AssembleError> {
|
||||
let f = File::open(&self.module.path)?;
|
||||
let rdr = BufReader::new(f);
|
||||
|
||||
let mut lines = LinesWithSpans::new(rdr);
|
||||
|
||||
let Some(line_result) = lines.nth(self.line_number - 1) else {
|
||||
// Handle a line not existing.
|
||||
return Err(AssembleError::new_source_error(
|
||||
self.clone(),
|
||||
AssembleErrorKind::Io(IoError::new(
|
||||
IoErrorKind::Other,
|
||||
Some(format!(
|
||||
"the line {} does not exist in input file `{}` but source info suggested otherwise!.",
|
||||
self.line_number,
|
||||
self.module.path.display()
|
||||
)),
|
||||
)),
|
||||
));
|
||||
};
|
||||
|
||||
let line_span = line_result?;
|
||||
|
||||
// Print the line number and line content.
|
||||
println!("{:>4} | {}", self.line_number, line_span.content);
|
||||
|
||||
let mut pad_left = String::new();
|
||||
write!(pad_left, "{:>4} ", "")?;
|
||||
|
||||
let mut underline = String::new();
|
||||
|
||||
for _ in 0..self.span.start {
|
||||
pad_left.push(' ');
|
||||
}
|
||||
|
||||
for _ in self.span.start..self.span.end.min(line_span.content.len()) {
|
||||
underline.push('^');
|
||||
}
|
||||
|
||||
// Print the underline in red and bold.
|
||||
// TODO: Use a crate to make this extra portable.
|
||||
println!("{pad_left}\x1b[1;31m{underline}\x1b[0m");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,15 @@
|
||||
//! easier to build from scratch and edit his code than it would be to try and wrangle it
|
||||
//! into shape.
|
||||
|
||||
use crate::source::source_info::SourceInfo;
|
||||
use common::prelude::*;
|
||||
|
||||
use crate::source::{
|
||||
opcode::Opcode,
|
||||
source_info::SourceInfo,
|
||||
token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken},
|
||||
};
|
||||
|
||||
/// Represents the different types of tokens that can be produced by the tokeniser.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum TokenType {
|
||||
/// Symbol reference (e.g., `loop_start`, `my_data`).
|
||||
@@ -14,12 +21,18 @@ pub enum TokenType {
|
||||
Immediate(u32),
|
||||
/// String literal (e.g., `"hello world"`).
|
||||
String(String),
|
||||
/// Intermediate token for multiline strings (filtered out in final output)
|
||||
StringContinuation,
|
||||
/// Assembly instruction (e.g., `add`, `jmp`, `nop`).
|
||||
Instruction(InstructionToken),
|
||||
Instruction(Opcode),
|
||||
/// Label definition (e.g., `loop_start:`).
|
||||
Label(LabelToken),
|
||||
/// Assembler directive (e.g., `.global`, `.section`, `.dw`).
|
||||
Directive(DirectiveToken),
|
||||
/// Comment (e.g., `// this is a comment`).
|
||||
Comment,
|
||||
/// Comma separator.
|
||||
Comma,
|
||||
/// End of line.
|
||||
Newline,
|
||||
/// End of file.
|
||||
@@ -29,34 +42,9 @@ pub enum TokenType {
|
||||
#[derive(Debug)]
|
||||
pub struct Token {
|
||||
/// The type of the token.
|
||||
token_type: TokenType,
|
||||
pub token_type: TokenType,
|
||||
/// Where in the source code is this [`Token`]?
|
||||
source_info: SourceInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SymbolToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LabelToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct DirectiveToken {
|
||||
pub directive: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct RegisterToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct InstructionToken {
|
||||
pub mnemonic: String,
|
||||
pub source_info: SourceInfo,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
@@ -79,16 +67,13 @@ impl Token {
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn instruction(mnemonic: String, source_info: SourceInfo) -> Self {
|
||||
Self::new(
|
||||
TokenType::Instruction(InstructionToken { mnemonic }),
|
||||
source_info,
|
||||
)
|
||||
pub const fn instruction(op: Opcode, source_info: SourceInfo) -> Self {
|
||||
Self::new(TokenType::Instruction(op), source_info)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn register(name: String, source_info: SourceInfo) -> Self {
|
||||
Self::new(TokenType::Register(RegisterToken { name }), source_info)
|
||||
pub const fn register(reg: Register, source_info: SourceInfo) -> Self {
|
||||
Self::new(TokenType::Register(RegisterToken { reg }), source_info)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
use common::prelude::Register;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SymbolToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LabelToken {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct DirectiveToken {
|
||||
pub directive: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct RegisterToken {
|
||||
pub reg: Register,
|
||||
}
|
||||
|
||||
impl RegisterToken {
|
||||
#[must_use]
|
||||
pub const fn new(reg: Register) -> Self {
|
||||
Self { reg }
|
||||
}
|
||||
|
||||
/// Returns the name of a valid [`Register`]
|
||||
#[must_use]
|
||||
pub fn name(&self) -> String {
|
||||
self.reg.to_string()
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,421 @@
|
||||
//! This file contains the [`Tokeniser`], which consumes a [`Vec`] of input bytes and
|
||||
//! outputs a [`Vec<Token>`].
|
||||
|
||||
/// Consumes a [`Vec<u8>`] and outputs a [`Vec`] of [Token]'s.
|
||||
pub struct Tokeniser {}
|
||||
use std::{path::Path, str::FromStr, sync::Arc};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use common::prelude::*;
|
||||
|
||||
use crate::{
|
||||
context::AssemblerContext,
|
||||
error::{AssembleError, AssembleErrorKind},
|
||||
model::module::Module,
|
||||
source::{
|
||||
lines::{LineSpan, lines_with_spans},
|
||||
load_source_bytes,
|
||||
opcode::Opcode,
|
||||
source_info::SourceInfo,
|
||||
token::{Token, TokenType},
|
||||
token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken},
|
||||
tokeniser::error::TokeniserError,
|
||||
},
|
||||
};
|
||||
|
||||
pub mod error;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// Consumes a [`Vec<u8>`] and outputs a [`Vec`] of [Token]'s.
|
||||
pub struct Tokeniser {
|
||||
/// The data in the file.
|
||||
pub data: Vec<u8>,
|
||||
/// A copy of the Module in which the file is situated.
|
||||
pub module: Arc<Module>,
|
||||
|
||||
// Pre-compiled regex patterns
|
||||
label_regex: Regex,
|
||||
register_regex: Regex,
|
||||
immediate_regex: Regex,
|
||||
directive_regex: Regex,
|
||||
instruction_regex: Regex,
|
||||
symbol_regex: Regex,
|
||||
comment_regex: Regex,
|
||||
|
||||
// String parsing state
|
||||
in_string: bool,
|
||||
string_buffer: String,
|
||||
string_start_line: usize,
|
||||
string_start_column: usize,
|
||||
}
|
||||
|
||||
impl Tokeniser {
|
||||
#[must_use]
|
||||
pub fn from_data(data: Vec<u8>, module: Arc<Module>) -> Self {
|
||||
Self {
|
||||
data,
|
||||
module,
|
||||
label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):")
|
||||
.expect("Failed to compile label regex pattern"),
|
||||
register_regex: Regex::new(
|
||||
r"^(rg[0-9a-f]+|acc|spr|bpr|ret|idr|mmr|zero|noreg|pcx)\b",
|
||||
)
|
||||
.expect("Failed to compile register regex pattern"),
|
||||
immediate_regex: Regex::new(
|
||||
r"^(0x[0-9a-fA-F_]+|0b[0-1_]+|0o[0-7_]+|[0-9_]+)",
|
||||
)
|
||||
.expect("Failed to compile immediate regex pattern"),
|
||||
directive_regex: Regex::new(r"^(res[bwh]|d[bwh]|include|section|global|local)\b")
|
||||
.expect("Failed to compile directive regex pattern"),
|
||||
instruction_regex: Regex::new(
|
||||
r"^(nop|movs?|ld[bhw]s?|st[bhw]|l[lu]i|j(mp|[egl][qte])|cmp|[id]nc|sh[lr]|add[i]?|sub[i]?|x?n?or|and|not|i[rd]t|hlt|lhwmm|lidt|push[a]?|pop[a]?|lwi|return|call)\b",
|
||||
)
|
||||
.expect("Failed to compile instruction regex pattern"),
|
||||
symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)::{2}([a-zA-Z0-9_]*)|([a-zA-Z_][a-zA-Z0-9_]*)")
|
||||
.expect("Failed to compile symbol regex pattern"),
|
||||
comment_regex: Regex::new("^//.*")
|
||||
.expect("Failed to compile comment regex pattern"),
|
||||
|
||||
// Initialize string parsing state
|
||||
in_string: false,
|
||||
string_buffer: String::new(),
|
||||
string_start_line: 0,
|
||||
string_start_column: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a [`Tokeniser`] from a file path. Also creates the underlying [`Module`]
|
||||
/// for you.
|
||||
pub fn new<P: AsRef<Path>>(
|
||||
path: P,
|
||||
ctx: &AssemblerContext,
|
||||
) -> Result<Self, AssembleError> {
|
||||
let path = path.as_ref().to_path_buf();
|
||||
let data = load_source_bytes(&path)?;
|
||||
let module = Arc::new(Module::new(path)?);
|
||||
|
||||
{
|
||||
let mut module_registry = ctx.module_registry.write()?;
|
||||
module_registry.add(module.clone());
|
||||
}
|
||||
|
||||
Ok(Self::from_data(data, module))
|
||||
}
|
||||
|
||||
// Note that modules are tokenised in their own threads, possibly in parallel.
|
||||
pub fn tokenise(mut self) -> Result<Vec<Token>, AssembleError> {
|
||||
let mut token_stream = Vec::new();
|
||||
let data = self.data.clone();
|
||||
let lines = lines_with_spans(&data);
|
||||
|
||||
// Process each line
|
||||
for line_result in lines {
|
||||
let line_span = line_result?;
|
||||
let trimmed = line_span.content.trim();
|
||||
|
||||
// Skip empty lines and add newline tokens
|
||||
if trimmed.is_empty() {
|
||||
token_stream.push(Token::new(
|
||||
TokenType::Newline,
|
||||
SourceInfo::new(line_span.line_number, self.module.clone(), 0..1),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Actually tokenise the line content
|
||||
let line_tokens = self.tokenise_line(&line_span)?;
|
||||
token_stream.extend(line_tokens);
|
||||
|
||||
// Add newline token at end of line
|
||||
token_stream.push(Token::new(
|
||||
TokenType::Newline,
|
||||
SourceInfo::new(
|
||||
line_span.line_number,
|
||||
self.module.clone(),
|
||||
line_span.content.len()..line_span.content.len(),
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// Add EOF token
|
||||
token_stream.push(Token::new(
|
||||
TokenType::Eof,
|
||||
SourceInfo::new(0, self.module.clone(), 0..0),
|
||||
));
|
||||
|
||||
Ok(token_stream)
|
||||
}
|
||||
|
||||
fn tokenise_line(
|
||||
&mut self,
|
||||
line_span: &LineSpan,
|
||||
) -> Result<Vec<Token>, AssembleError> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut remaining = line_span.content.as_str();
|
||||
let mut column = 0;
|
||||
|
||||
// Skip leading whitespace
|
||||
let trimmed_start = remaining.trim_start();
|
||||
column += remaining.len() - trimmed_start.len();
|
||||
remaining = trimmed_start;
|
||||
|
||||
while !remaining.is_empty() {
|
||||
let start_column = column;
|
||||
|
||||
// Try to match a token.
|
||||
let (token_type, consumed) =
|
||||
self.match_token(remaining, line_span.line_number, column)?;
|
||||
|
||||
// Filter out string continuation tokens and comments.
|
||||
match token_type {
|
||||
TokenType::StringContinuation => {
|
||||
// Don't add to token stream, just consume input
|
||||
}
|
||||
TokenType::Comment => {
|
||||
// Don't add to token stream, consume rest of line
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
tokens.push(Token::new(
|
||||
token_type,
|
||||
SourceInfo::new(
|
||||
line_span.line_number,
|
||||
self.module.clone(),
|
||||
start_column..start_column + consumed,
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Advance position.
|
||||
remaining = &remaining[consumed..];
|
||||
column += consumed;
|
||||
|
||||
// Skip whitespace.
|
||||
let before_trim = remaining.len();
|
||||
remaining = remaining.trim_start();
|
||||
column += before_trim - remaining.len();
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
fn try_match_comment(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.comment_regex.captures(input)?;
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Comment, len))
|
||||
}
|
||||
|
||||
fn try_match_label(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.label_regex.captures(input)?;
|
||||
let name = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Label(LabelToken { name }), len))
|
||||
}
|
||||
|
||||
fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.register_regex.captures(input)?;
|
||||
|
||||
let captured_group = caps.get(1)?.as_str();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
let reg = Register::try_from(captured_group).ok()?;
|
||||
|
||||
Some((TokenType::Register(RegisterToken { reg }), len))
|
||||
}
|
||||
|
||||
fn try_match_immediate(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.immediate_regex.captures(input)?;
|
||||
let value_str = caps.get(1)?.as_str();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
// Remove any underscores that were inserted for readability.
|
||||
let value_str = value_str.replace('_', "");
|
||||
|
||||
let value = if let Some(hex_part) = value_str.strip_prefix("0x") {
|
||||
u32::from_str_radix(hex_part, 16).ok()?
|
||||
} else if let Some(bin_part) = value_str.strip_prefix("0b") {
|
||||
u32::from_str_radix(bin_part, 2).ok()?
|
||||
} else if let Some(oct_part) = value_str.strip_prefix("0o") {
|
||||
u32::from_str_radix(oct_part, 8).ok()?
|
||||
} else {
|
||||
value_str.parse::<u32>().ok()?
|
||||
};
|
||||
|
||||
Some((TokenType::Immediate(value), len))
|
||||
}
|
||||
|
||||
fn try_match_directive(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.directive_regex.captures(input)?;
|
||||
let directive = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
Some((TokenType::Directive(DirectiveToken { directive }), len))
|
||||
}
|
||||
|
||||
fn try_match_instruction(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.instruction_regex.captures(input)?;
|
||||
let mnemonic = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
let op = Opcode::from_str(&mnemonic).ok()?;
|
||||
|
||||
Some((TokenType::Instruction(op), len))
|
||||
}
|
||||
|
||||
fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.symbol_regex.captures(input)?;
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
// Check which capture group matched.
|
||||
let name = if let Some(scoped_name) = caps.get(1) {
|
||||
// Matched the scoped symbol pattern (name::scope).
|
||||
format!("{}::{}", scoped_name.as_str(), caps.get(2)?.as_str())
|
||||
} else if let Some(simple_name) = caps.get(3) {
|
||||
simple_name.as_str().to_string()
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some((TokenType::Symbol(SymbolToken { name }), len))
|
||||
}
|
||||
|
||||
fn try_match_string(
|
||||
&mut self,
|
||||
input: &str,
|
||||
line_number: usize,
|
||||
column: usize,
|
||||
) -> Option<(TokenType, usize)> {
|
||||
if self.in_string {
|
||||
// We're continuing a multiline string
|
||||
Some(self.handle_string_continuation(input, line_number, column))
|
||||
} else {
|
||||
// Look for the start of a new string
|
||||
self.handle_string_start(input, line_number, column)
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_string_start(
|
||||
&mut self,
|
||||
input: &str,
|
||||
line_number: usize,
|
||||
column: usize,
|
||||
) -> Option<(TokenType, usize)> {
|
||||
if !input.starts_with('"') {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the closing quote on the same line
|
||||
if let Some(end_pos) = input[1..].find('"') {
|
||||
// Complete string on one line
|
||||
let content = input[1..=end_pos].to_string();
|
||||
let len = end_pos + 2; // +2 for both quotes
|
||||
Some((TokenType::String(content), len))
|
||||
} else {
|
||||
// Start of multiline string
|
||||
self.in_string = true;
|
||||
self.string_start_line = line_number;
|
||||
self.string_start_column = column;
|
||||
self.string_buffer = input[1..].to_string(); // Everything after opening quote
|
||||
self.string_buffer.push('\n'); // Add newline for multiline
|
||||
|
||||
// Consume the entire rest of the line
|
||||
Some((TokenType::StringContinuation, input.len()))
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_string_continuation(
|
||||
&mut self,
|
||||
input: &str,
|
||||
_line_number: usize,
|
||||
_column: usize,
|
||||
) -> (TokenType, usize) {
|
||||
// Look for closing quote
|
||||
if let Some(end_pos) = input.find('"') {
|
||||
// End of multiline string found
|
||||
self.string_buffer.push_str(&input[..end_pos]);
|
||||
self.in_string = false;
|
||||
|
||||
let content = std::mem::take(&mut self.string_buffer);
|
||||
let len = end_pos + 1; // +1 for the closing quote
|
||||
|
||||
(TokenType::String(content), len)
|
||||
} else {
|
||||
// Continue multiline string
|
||||
self.string_buffer.push_str(input);
|
||||
self.string_buffer.push('\n'); // Add newline
|
||||
|
||||
// Consume the entire line
|
||||
(TokenType::StringContinuation, input.len())
|
||||
}
|
||||
}
|
||||
|
||||
#[expect(clippy::range_plus_one, reason = "RangeInclusive is a different type!")]
|
||||
fn match_token(
|
||||
&mut self,
|
||||
input: &str,
|
||||
line_number: usize,
|
||||
column: usize,
|
||||
) -> Result<(TokenType, usize), AssembleError> {
|
||||
if input.starts_with(',') {
|
||||
return Ok((TokenType::Comma, 1));
|
||||
}
|
||||
|
||||
// Check for string first (including multiline continuations).
|
||||
if let Some(m) = self.try_match_string(input, line_number, column) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_directive(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_instruction(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_comment(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_label(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_register(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_immediate(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
if let Some(m) = self.try_match_symbol(input) {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
let mut idx_iter = (column + 1)..;
|
||||
let Some(idx) = idx_iter.next() else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let source = SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
|
||||
|
||||
// Handle miscellaneous characters.
|
||||
if let Some(c) = input.chars().next() {
|
||||
Err(AssembleError::new_source_error(
|
||||
source,
|
||||
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedChar(c)),
|
||||
))
|
||||
} else {
|
||||
Err(AssembleError::new_source_error(
|
||||
source,
|
||||
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedEndOfInput(
|
||||
input.len(),
|
||||
)),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,41 @@
|
||||
//! This module contains the error types for the tokeniser.
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TokeniserError {}
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
/// Types of errors that may be returned during tokenisation.
|
||||
pub enum TokeniserError {
|
||||
/// An unexpected character was found in the source code.
|
||||
UnexpectedChar(char),
|
||||
/// An unterminated string literal was found. [`SourceInfo`] will be attached if this
|
||||
/// was returned.
|
||||
UnterminatedString,
|
||||
/// An invalid number format was encountered when parsing a literal value
|
||||
/// ([`TokenType::Immediate`]).
|
||||
InvalidNumber(&'static str),
|
||||
/// An unrecognized token was encountered.
|
||||
UnrecognisedToken,
|
||||
/// Returned if the consumed count was lower than the length of the input file.
|
||||
/// This is a sign you will need to debug some [`Tokeniser`] code to ensure that
|
||||
/// [`Tokeniser::match_token`] is working as intended.
|
||||
///
|
||||
/// First field is length of the line.
|
||||
UnexpectedEndOfInput(usize),
|
||||
}
|
||||
|
||||
impl TokeniserError {}
|
||||
|
||||
impl std::fmt::Display for TokeniserError {
|
||||
#[rustfmt::skip]
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "TODO!!!!!!")
|
||||
match self {
|
||||
Self::UnexpectedChar(c) => write!(f, "unexpected char '{c}' found in input")?,
|
||||
Self::InvalidNumber(lit) => write!(f, "invalid integer literal \"{lit}\" found in input")?,
|
||||
Self::UnrecognisedToken => write!(f, "unrecognised token found in input")?,
|
||||
Self::UnterminatedString => write!(f, "unterminated string literal")?,
|
||||
Self::UnexpectedEndOfInput(line_length) => write!(
|
||||
f, "unexpected end of input, input length: {line_length}"
|
||||
)?,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,418 @@
|
||||
//! Unit tests for the tokenizer
|
||||
|
||||
use common::prelude::Register;
|
||||
|
||||
use crate::{
|
||||
model::module::Module,
|
||||
source::{
|
||||
opcode::Opcode,
|
||||
token::{Token, TokenType},
|
||||
token_info::RegisterToken,
|
||||
tokeniser::Tokeniser,
|
||||
},
|
||||
};
|
||||
use std::{path::PathBuf, sync::Arc};
|
||||
|
||||
/// Helper function to create a tokenizer from source text
|
||||
fn create_tokenizer_from_source(source: &str) -> Tokeniser {
|
||||
let path = PathBuf::from("test.dsa");
|
||||
let module = Module::new(path).expect("Cannot create module!");
|
||||
|
||||
Tokeniser::from_data(source.as_bytes().to_vec(), Arc::new(module))
|
||||
}
|
||||
|
||||
/// Helper function to tokenize source and return tokens
|
||||
fn tokenize_source(source: &str) -> Result<Vec<Token>, crate::error::AssembleError> {
|
||||
let tokenizer = create_tokenizer_from_source(source);
|
||||
|
||||
tokenizer.tokenise()
|
||||
}
|
||||
|
||||
/// Helper function to extract token types from a token vector
|
||||
fn extract_token_types(tokens: &[Token]) -> Vec<&TokenType> {
|
||||
tokens.iter().map(|t| &t.token_type).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_source() {
|
||||
let tokens = tokenize_source("").expect("Failed to tokenize empty source");
|
||||
|
||||
// Should have at least EOF token
|
||||
assert!(!tokens.is_empty());
|
||||
assert!(matches!(
|
||||
tokens
|
||||
.last()
|
||||
.expect("Expected at least one token")
|
||||
.token_type,
|
||||
TokenType::Eof
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_whitespace_only() {
|
||||
let tokens = tokenize_source(" \n \n ").expect("Failed to tokenize whitespace");
|
||||
|
||||
// Should have newlines and EOF
|
||||
let token_types = extract_token_types(&tokens);
|
||||
assert!(token_types.iter().any(|t| matches!(t, TokenType::Newline)));
|
||||
assert!(token_types.iter().any(|t| matches!(t, TokenType::Eof)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_instruction() {
|
||||
let tokens = tokenize_source("add").expect("Failed to tokenize instruction");
|
||||
let token_types = extract_token_types(&tokens);
|
||||
|
||||
// Should have instruction, newline, and EOF
|
||||
assert!(
|
||||
token_types
|
||||
.iter()
|
||||
.any(|t| matches!(t, TokenType::Instruction(_)))
|
||||
);
|
||||
if let TokenType::Instruction(instr) = &tokens[0].token_type {
|
||||
assert_eq!(instr.to_string(), "add");
|
||||
} else {
|
||||
panic!("Expected instruction token");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_instructions() {
|
||||
let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"];
|
||||
|
||||
for instr in &instructions {
|
||||
let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");
|
||||
|
||||
if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
|
||||
assert_eq!(parsed_instr.to_string(), *instr);
|
||||
} else {
|
||||
panic!("Expected instruction token for {instr}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_registers() {
|
||||
let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize register");
|
||||
|
||||
if let TokenType::Register(reg) = &tokens[0].token_type {
|
||||
assert_eq!(reg.reg.to_string(), *expected);
|
||||
} else {
|
||||
panic!("Expected register token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_immediates() {
|
||||
let test_cases = [
|
||||
("42", 42),
|
||||
("0", 0),
|
||||
("0xFF", 255),
|
||||
("0x1234", 0x1234),
|
||||
("0xDEADBEEF", 0xDEAD_BEEF),
|
||||
("0o12", 0o12),
|
||||
("0b101", 0b101),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize immediate");
|
||||
|
||||
if let TokenType::Immediate(value) = &tokens[0].token_type {
|
||||
assert_eq!(*value, *expected);
|
||||
} else {
|
||||
panic!("Expected immediate token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_labels() {
|
||||
let test_cases = [
|
||||
("loop_start:", "loop_start"),
|
||||
("main:", "main"),
|
||||
("_private_label:", "_private_label"),
|
||||
("Label123:", "Label123"),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize label");
|
||||
|
||||
if let TokenType::Label(label) = &tokens[0].token_type {
|
||||
assert_eq!(label.name, *expected);
|
||||
} else {
|
||||
panic!("Expected label token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_directives() {
|
||||
let test_cases = [
|
||||
("global", "global"),
|
||||
("section", "section"),
|
||||
("local", "local"),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize directive");
|
||||
|
||||
if let TokenType::Directive(directive) = &tokens[0].token_type {
|
||||
assert_eq!(directive.directive, *expected);
|
||||
} else {
|
||||
panic!("Expected directive token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_symbols() {
|
||||
let test_cases = [
|
||||
("my_symbol", "my_symbol"),
|
||||
("_private", "_private"),
|
||||
("Symbol123", "Symbol123"),
|
||||
("camelCase", "camelCase"),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize symbol");
|
||||
|
||||
if let TokenType::Symbol(symbol) = &tokens[0].token_type {
|
||||
assert_eq!(symbol.name, *expected);
|
||||
} else {
|
||||
panic!("Expected symbol token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_instruction_line() {
|
||||
let source = "addi rg1, rg2, 0xFF";
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction");
|
||||
|
||||
// Should have: instruction, register, comma, register, comma, immediate, newline, EOF
|
||||
assert!(tokens.len() >= 6);
|
||||
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
|
||||
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
|
||||
assert!(matches!(tokens[2].token_type, TokenType::Comma));
|
||||
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
|
||||
assert!(matches!(tokens[4].token_type, TokenType::Comma));
|
||||
assert!(matches!(tokens[5].token_type, TokenType::Immediate(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiline_with_comments() {
|
||||
const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [
|
||||
TokenType::Instruction(Opcode::Add),
|
||||
TokenType::Register(RegisterToken::new(Register::Rg0)),
|
||||
TokenType::Comma,
|
||||
TokenType::Register(RegisterToken::new(Register::Rg1)),
|
||||
TokenType::Newline,
|
||||
TokenType::Instruction(Opcode::SubI),
|
||||
TokenType::Register(RegisterToken::new(Register::Rg2)),
|
||||
TokenType::Comma,
|
||||
TokenType::Immediate(10),
|
||||
TokenType::Newline,
|
||||
TokenType::Eof,
|
||||
];
|
||||
|
||||
const SOURCE: &str = r"add rg0, rg1 // Another comment
|
||||
subi rg2, 10";
|
||||
|
||||
let tokens =
|
||||
tokenize_source(SOURCE).expect("Failed to tokenise source with comments");
|
||||
let token_types = extract_token_types(&tokens);
|
||||
|
||||
assert_eq!(
|
||||
token_types.len(),
|
||||
EXPECTED_TOKEN_TYPES.len(),
|
||||
"{token_types:#?}"
|
||||
);
|
||||
|
||||
for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) {
|
||||
assert!(!(expected != *got), "Expected {expected:?}, got {got:?}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenise_brainf_interpreter() {
|
||||
const SOURCE: &str = include_str!("../../../../resources/dsa/bf.dsa");
|
||||
|
||||
let tokens =
|
||||
tokenize_source(SOURCE).expect("Failed to tokenise the brainfuck compiler!");
|
||||
|
||||
dbg!(tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_literals() {
|
||||
let test_cases = [
|
||||
(r#""hello world""#, "hello world"),
|
||||
(
|
||||
r#""++++++++++++++++++++++++++++++++++++++++++++""#,
|
||||
"++++++++++++++++++++++++++++++++++++++++++++",
|
||||
),
|
||||
(r#""Invalid Instruction!""#, "Invalid Instruction!"),
|
||||
(r#""""#, ""),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize string literal");
|
||||
|
||||
if let TokenType::String(value) = &tokens[0].token_type {
|
||||
assert_eq!(value, expected);
|
||||
} else {
|
||||
panic!("Expected string token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_data_directives() {
|
||||
let test_cases = [("db", "db"), ("dw", "dw"), ("resb", "resb")];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize data declaration");
|
||||
|
||||
if let TokenType::Directive(decl) = &tokens[0].token_type {
|
||||
assert_eq!(decl.directive, *expected);
|
||||
} else {
|
||||
panic!("Expected data declaration token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_include_directive() {
|
||||
let source = r#"include print "./lib/print.dsa""#;
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenize include directive");
|
||||
|
||||
assert!(tokens.len() >= 3);
|
||||
assert!(matches!(tokens[0].token_type, TokenType::Directive(_)));
|
||||
assert!(matches!(tokens[1].token_type, TokenType::Symbol(_)));
|
||||
assert!(matches!(tokens[2].token_type, TokenType::String(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hex_addresses() {
|
||||
let test_cases = [("0x10000", 0x10000), ("0x30000", 0x30000)];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize hex address");
|
||||
|
||||
if let TokenType::Immediate(value) = &tokens[0].token_type {
|
||||
assert_eq!(*value, *expected);
|
||||
} else {
|
||||
panic!("Expected immediate token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_memory_operations() {
|
||||
let source = "ldw rg1, rg2";
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenize memory operation");
|
||||
|
||||
assert!(tokens.len() >= 4);
|
||||
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
|
||||
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
|
||||
assert!(matches!(tokens[2].token_type, TokenType::Comma));
|
||||
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_function_calls() {
|
||||
let source = "call print::print";
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenize function call");
|
||||
|
||||
assert!(tokens.len() >= 2);
|
||||
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
|
||||
// The symbol might be parsed differently depending on how :: is handled
|
||||
// This test checks basic structure
|
||||
assert!(
|
||||
tokens
|
||||
.iter()
|
||||
.any(|t| matches!(t.token_type, TokenType::Symbol(_)))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comments_are_ignored() {
|
||||
let source = "add rg0, rg1 // this is a comment\nsub rg2, rg3";
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenize with comments");
|
||||
|
||||
// Comments should be stripped, so we should only have instruction tokens
|
||||
let instruction_count = tokens
|
||||
.iter()
|
||||
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
|
||||
.count();
|
||||
|
||||
assert_eq!(instruction_count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_newline_always_present() {
|
||||
// Test that even without explicit newline at end, one is added
|
||||
let source = "add rg0, rg1"; // No newline at end
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenize without newline");
|
||||
|
||||
// Should have newline before EOF
|
||||
let has_newline = tokens
|
||||
.iter()
|
||||
.any(|t| matches!(t.token_type, TokenType::Newline));
|
||||
|
||||
assert!(
|
||||
has_newline,
|
||||
"Expected newline to be added even when missing from input"
|
||||
);
|
||||
|
||||
// EOF should be last.
|
||||
assert!(matches!(
|
||||
tokens
|
||||
.last()
|
||||
.expect("Expected at least one token")
|
||||
.token_type,
|
||||
TokenType::Eof
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_branching_code() {
|
||||
let source = r"
|
||||
cmp rg3, rg8
|
||||
jeq increment
|
||||
cmp rg3, rg9
|
||||
jeq decrement";
|
||||
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenize branching code");
|
||||
|
||||
let instruction_count = tokens
|
||||
.iter()
|
||||
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
|
||||
.count();
|
||||
|
||||
assert_eq!(instruction_count, 4);
|
||||
|
||||
let symbol_count = tokens
|
||||
.iter()
|
||||
.filter(|t| matches!(t.token_type, TokenType::Symbol(_)))
|
||||
.count();
|
||||
|
||||
assert_eq!(symbol_count, 2); // increment and decrement labels
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stack_operations() {
|
||||
let source = "push rg2\npop zero\npusha 2\npopa 2";
|
||||
let tokens = tokenize_source(source).expect("Failed to tokenize stack operations");
|
||||
|
||||
let instruction_count = tokens
|
||||
.iter()
|
||||
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
|
||||
.count();
|
||||
|
||||
assert_eq!(instruction_count, 4);
|
||||
}
|
||||
+15
-14
@@ -46,12 +46,11 @@ impl SymbolTable {
|
||||
&& let Some(existing) = self.symbols.get(&existing_id)
|
||||
&& existing.module_id == module_id
|
||||
{
|
||||
return Err(AssembleError::new_other_error(
|
||||
crate::error::AssembleErrorKind::IO(std::io::Error::new(
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::AlreadyExists,
|
||||
format!("Symbol '{name}' already defined in module"),
|
||||
)),
|
||||
));
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
// Add to all mappings
|
||||
@@ -63,19 +62,22 @@ impl SymbolTable {
|
||||
}
|
||||
|
||||
/// Gets the [`Symbol`] by its [`SymbolId`].
|
||||
#[must_use] pub fn get(&self, id: &SymbolId) -> Option<&Symbol> {
|
||||
#[must_use]
|
||||
pub fn get(&self, id: &SymbolId) -> Option<&Symbol> {
|
||||
self.symbols.get(id)
|
||||
}
|
||||
|
||||
/// Gets the [`Symbol`] by its name.
|
||||
#[must_use] pub fn get_by_name(&self, name: &str) -> Option<&Symbol> {
|
||||
#[must_use]
|
||||
pub fn get_by_name(&self, name: &str) -> Option<&Symbol> {
|
||||
self.name_to_id
|
||||
.get(name)
|
||||
.and_then(|id| self.symbols.get(id))
|
||||
}
|
||||
|
||||
/// Gets all [`Symbol`]s in a module.
|
||||
#[must_use] pub fn get_module_symbols(&self, module_id: &ModuleId) -> Vec<&Symbol> {
|
||||
#[must_use]
|
||||
pub fn get_module_symbols(&self, module_id: &ModuleId) -> Vec<&Symbol> {
|
||||
self.module_symbols
|
||||
.get(module_id)
|
||||
.map(|ids| ids.iter().filter_map(|id| self.symbols.get(id)).collect())
|
||||
@@ -83,7 +85,8 @@ impl SymbolTable {
|
||||
}
|
||||
|
||||
/// Gets all the public symbols.
|
||||
#[must_use] pub fn get_public_symbols(&self) -> Vec<&Symbol> {
|
||||
#[must_use]
|
||||
pub fn get_public_symbols(&self) -> Vec<&Symbol> {
|
||||
self.symbols
|
||||
.values()
|
||||
.filter(|sym| matches!(sym.visibility, Visibility::Public))
|
||||
@@ -104,12 +107,10 @@ impl SymbolTable {
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
Err(AssembleError::new_other_error(
|
||||
crate::error::AssembleErrorKind::IO(std::io::Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
"Symbol not found",
|
||||
)),
|
||||
))
|
||||
Err(
|
||||
std::io::Error::new(std::io::ErrorKind::NotFound, "Symbol not found")
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ pub mod logging;
|
||||
|
||||
use std::io::Write;
|
||||
|
||||
pub fn input(prompt: &str) -> String {
|
||||
pub fn _input(prompt: &str) -> String {
|
||||
print!("{prompt}\n > ");
|
||||
std::io::stdout().flush().expect("Failed to flush stdout");
|
||||
let mut input = String::new();
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
use crate::{instructions::encode::Encode, prelude::*};
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
|
||||
pub enum Interrupt {
|
||||
Software(u8),
|
||||
Breakpoint,
|
||||
#[default]
|
||||
HardFault,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//! Various types of arguments that instructions can take, alongside encoding and decoding logic.
|
||||
//! Various types of arguments that instructions can take, alongside encoding and decoding
|
||||
//! logic.
|
||||
|
||||
use crate::{
|
||||
instructions::{RegisterParseError, encode::Encode},
|
||||
@@ -35,18 +36,20 @@ impl std::fmt::Display for ArgsDecodeError {
|
||||
|
||||
impl std::error::Error for ArgsDecodeError {}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
/// Used by instructions with 2 registers and an immediate argument.
|
||||
pub struct ITypeArgs {
|
||||
pub immediate: u16,
|
||||
pub r1: Register,
|
||||
/// May not actually be used by some instructions taking an immediate e.g. LUI. This is solved by making the constructor take Options.
|
||||
/// May not actually be used by some instructions taking an immediate e.g. LUI. This
|
||||
/// is solved by making the constructor take Options.
|
||||
pub r2: Register,
|
||||
}
|
||||
|
||||
impl ITypeArgs {
|
||||
#[must_use]
|
||||
/// Creates a new [`ITypeArgs`]. If r1 or r2 is unset, they will be replaced with [`Register::NoReg`].
|
||||
/// Creates a new [`ITypeArgs`]. If r1 or r2 is unset, they will be replaced with
|
||||
/// [`Register::NoReg`].
|
||||
pub fn new(immediate: u16, r1: Option<Register>, r2: Option<Register>) -> Self {
|
||||
let r1 = r1.unwrap_or_default();
|
||||
let r2 = r2.unwrap_or_default();
|
||||
@@ -56,8 +59,8 @@ impl ITypeArgs {
|
||||
}
|
||||
|
||||
impl Encode for ITypeArgs {
|
||||
/// Encodes an I-type instruction from its fields. These must have some unused high-order
|
||||
/// bits set to 0 else the bit shifting logic gets fucked.
|
||||
/// Encodes an I-type instruction from its fields. These must have some unused
|
||||
/// high-order bits set to 0 else the bit shifting logic gets fucked.
|
||||
fn encode(self, opcode: u8) -> u32 {
|
||||
let opcode = u32::from(opcode);
|
||||
let r1 = self.r1 as u32;
|
||||
@@ -84,7 +87,7 @@ impl TryFrom<u32> for ITypeArgs {
|
||||
}
|
||||
|
||||
/// Used by instructions not using immediates (besides 5 bit shift values).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub struct RTypeArgs {
|
||||
pub sr1: Register,
|
||||
pub sr2: Register,
|
||||
@@ -95,7 +98,8 @@ pub struct RTypeArgs {
|
||||
|
||||
impl RTypeArgs {
|
||||
#[must_use]
|
||||
/// Creates a new [`RTypeArgs`]. If any registers are unset, they will be replaced with [`Register::NoReg`]. If `shamt` is unset, it will be set to 0.
|
||||
/// Creates a new [`RTypeArgs`]. If any registers are unset, they will be replaced
|
||||
/// with [`Register::NoReg`]. If `shamt` is unset, it will be set to 0.
|
||||
pub fn new(
|
||||
sr1: Option<Register>,
|
||||
sr2: Option<Register>,
|
||||
@@ -122,7 +126,8 @@ impl Encode for RTypeArgs {
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `shamt`: The amount to shift value (used only in shift instructions, otherwise 0).
|
||||
/// - `shamt`: The amount to shift value (used only in shift instructions, otherwise
|
||||
/// 0).
|
||||
fn encode(self, opcode: u8) -> u32 {
|
||||
let opcode = u32::from(opcode);
|
||||
let sr1 = self.sr1 as u32;
|
||||
|
||||
@@ -39,7 +39,9 @@ impl std::fmt::Display for InstructionDecodeError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::InvalidOpcode(code) => write!(f, "invalid opcode, got {code:x}")?,
|
||||
Self::InvalidArgument(err) => write!(f, "invalid arguments, got an error {err}")?,
|
||||
Self::InvalidArgument(err) => {
|
||||
write!(f, "invalid arguments, got an error {err}")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -160,11 +160,12 @@ impl CodeEditor {
|
||||
|
||||
/// Stick to bottom
|
||||
/// The scroll handle will stick to the bottom position even while the content size
|
||||
/// changes dynamically. This can be useful to simulate terminal UIs or log/info scrollers.
|
||||
/// The scroll handle remains stuck until user manually changes position. Once "unstuck"
|
||||
/// it will remain focused on whatever content viewport the user left it on. If the scroll
|
||||
/// handle is dragged to the bottom it will again become stuck and remain there until manually
|
||||
/// pulled from the end position.
|
||||
/// changes dynamically. This can be useful to simulate terminal UIs or log/info
|
||||
/// scrollers. The scroll handle remains stuck until user manually changes
|
||||
/// position. Once "unstuck" it will remain focused on whatever content viewport
|
||||
/// the user left it on. If the scroll handle is dragged to the bottom it will
|
||||
/// again become stuck and remain there until manually pulled from the end
|
||||
/// position.
|
||||
///
|
||||
/// **Default: false**
|
||||
pub fn stick_to_bottom(self, stick_to_bottom: bool) -> Self {
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use assembler::compiler_engine::CompilerEngine;
|
||||
use common::prelude::Instruction;
|
||||
use egui::{Align, Context, Key, Layout, Ui};
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
include print "./lib/print.dsa"
|
||||
|
||||
// "print hello world"
|
||||
db program: "++++++++++++++++++++++++++++++++++++++++++++
|
||||
db program "++++++++++++++++++++++++++++++++++++++++++++
|
||||
>++++++++++++++++++++++++++++++++
|
||||
>++++++++++++++++
|
||||
>
|
||||
@@ -35,10 +35,10 @@ db program: "++++++++++++++++++++++++++++++++++++++++++++
|
||||
]
|
||||
<<++..."
|
||||
|
||||
db error: "Invalid Instruction!"
|
||||
dw stack: 0x10000
|
||||
dw input: 0x30000
|
||||
resb data: 1024
|
||||
db error "Invalid Instruction!"
|
||||
dw stack 0x10000
|
||||
dw input 0x30000
|
||||
resb data 1024
|
||||
|
||||
// set up a stack so we can call functions
|
||||
_init_stack:
|
||||
|
||||
Reference in New Issue
Block a user