refactoring assembler
This commit is contained in:
@@ -1,9 +1,7 @@
|
|||||||
use common::{args, prelude::*};
|
use common::{args, prelude::*};
|
||||||
|
|
||||||
use crate::{
|
use crate::assembler::model::{Node, Opcode};
|
||||||
AssembleError, expect_token,
|
use crate::{assembler::AssembleError, expect_token};
|
||||||
model::{Node, Opcode},
|
|
||||||
};
|
|
||||||
|
|
||||||
pub fn codegen(nodes: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
|
pub fn codegen(nodes: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
|
||||||
let mut instructions = vec![];
|
let mut instructions = vec![];
|
||||||
@@ -140,15 +138,15 @@ fn build_instruction(node: Node) -> Result<Instruction, AssembleError> {
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Opcode::Iadd | Opcode::Isub => {
|
Opcode::AddI | Opcode::SubI => {
|
||||||
let reg = expect_token!(args.first().unwrap(), Register)?;
|
let reg = expect_token!(args.first().unwrap(), Register)?;
|
||||||
let immediate = expect_token!(args.get(1).unwrap(), Immediate)? as u16;
|
let immediate = expect_token!(args.get(1).unwrap(), Immediate)? as u16;
|
||||||
let dest = expect_token!(args.get(2).unwrap(), Register)?;
|
let dest = expect_token!(args.get(2).unwrap(), Register)?;
|
||||||
let args = args!(I, immediate: immediate, r1: reg, r2: dest);
|
let args = args!(I, immediate: immediate, r1: reg, r2: dest);
|
||||||
|
|
||||||
match opcode {
|
match opcode {
|
||||||
Opcode::Iadd => Ok(Instruction::AddImmediate(args)),
|
Opcode::AddI => Ok(Instruction::AddImmediate(args)),
|
||||||
Opcode::Isub => Ok(Instruction::SubImmediate(args)),
|
Opcode::SubI => Ok(Instruction::SubImmediate(args)),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,10 +1,7 @@
|
|||||||
use common::prelude::Register;
|
use common::prelude::Register;
|
||||||
|
|
||||||
use crate::{
|
use crate::assembler::model::{Node, Opcode, Token};
|
||||||
AssembleError, expect_token, expect_type,
|
use crate::{assembler::AssembleError, expect_token, expect_type, node};
|
||||||
model::{Node, Opcode, Token},
|
|
||||||
node,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub fn expand_pseudo_ops(
|
pub fn expand_pseudo_ops(
|
||||||
mut nodes: Vec<Node>,
|
mut nodes: Vec<Node>,
|
||||||
@@ -49,7 +46,7 @@ fn expand_push(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError
|
|||||||
nodes.extend(vec![
|
nodes.extend(vec![
|
||||||
node!(
|
node!(
|
||||||
label,
|
label,
|
||||||
Opcode::Isub,
|
Opcode::SubI,
|
||||||
Token::Register(Register::Spr),
|
Token::Register(Register::Spr),
|
||||||
Token::Immediate(4),
|
Token::Immediate(4),
|
||||||
Token::Register(Register::Spr)
|
Token::Register(Register::Spr)
|
||||||
@@ -80,7 +77,7 @@ fn expand_pop(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError>
|
|||||||
),
|
),
|
||||||
node!(
|
node!(
|
||||||
None,
|
None,
|
||||||
Opcode::Iadd,
|
Opcode::AddI,
|
||||||
Token::Register(Register::Spr),
|
Token::Register(Register::Spr),
|
||||||
Token::Immediate(4),
|
Token::Immediate(4),
|
||||||
Token::Register(Register::Spr)
|
Token::Register(Register::Spr)
|
||||||
@@ -1,9 +1,7 @@
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use crate::{
|
use crate::assembler::AssembleError;
|
||||||
AssembleError,
|
use crate::assembler::model::{Module, Opcode, Symbol, Token};
|
||||||
model::{Module, Opcode, Symbol, Token},
|
|
||||||
};
|
|
||||||
use common::prelude::Register;
|
use common::prelude::Register;
|
||||||
|
|
||||||
pub fn lexer(mut program: String, module: u64) -> Result<Vec<Token>, AssembleError> {
|
pub fn lexer(mut program: String, module: u64) -> Result<Vec<Token>, AssembleError> {
|
||||||
@@ -0,0 +1,248 @@
|
|||||||
|
use std::{
|
||||||
|
collections::HashSet,
|
||||||
|
fmt, fs,
|
||||||
|
hash::{DefaultHasher, Hash, Hasher},
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
};
|
||||||
|
|
||||||
|
use common::prelude::Instruction;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
assembler::{
|
||||||
|
expand::expand_pseudo_ops,
|
||||||
|
model::{Node, Opcode, Symbol, Token, TokenType},
|
||||||
|
parser::{Parser, Program},
|
||||||
|
resolver::{create_sections, resolve_dependencies, resolve_symbols},
|
||||||
|
},
|
||||||
|
codegen, log, node,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub mod codegen;
|
||||||
|
pub mod expand;
|
||||||
|
pub mod lexer;
|
||||||
|
pub mod model;
|
||||||
|
pub mod parser;
|
||||||
|
pub mod resolver;
|
||||||
|
|
||||||
|
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
|
||||||
|
let mut modules = HashSet::<u64>::new();
|
||||||
|
let mut program = Program::new();
|
||||||
|
|
||||||
|
let hash = quick_hash(src);
|
||||||
|
modules.insert(hash);
|
||||||
|
|
||||||
|
prepare_dependency(src, &mut modules, &mut program)?;
|
||||||
|
let mut nodes = program.nodes;
|
||||||
|
|
||||||
|
create_sections(&mut nodes)?;
|
||||||
|
resolve_symbols(&mut nodes)?;
|
||||||
|
|
||||||
|
let instructions = codegen(nodes)?;
|
||||||
|
for inst in instructions.iter() {
|
||||||
|
println!("{inst}");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(instructions)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prepare_dependency(
|
||||||
|
path: &Path,
|
||||||
|
modules: &mut HashSet<u64>,
|
||||||
|
program: &mut Program,
|
||||||
|
) -> Result<(), AssembleError> {
|
||||||
|
let filename = path.file_name().unwrap().to_str().unwrap();
|
||||||
|
if let Ok(path) = path.canonicalize() {
|
||||||
|
log(&format!(
|
||||||
|
"{:20} {:20} [{}]",
|
||||||
|
"Building",
|
||||||
|
filename,
|
||||||
|
path.display()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let src = fs::read_to_string(path)
|
||||||
|
.map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?;
|
||||||
|
let file_hash = quick_hash(path);
|
||||||
|
|
||||||
|
log(&format!("{:20} {:20}", "Tokenising", filename));
|
||||||
|
let tokens = lexer::lexer(src, file_hash)?;
|
||||||
|
|
||||||
|
log(&format!("{:20} {:20}", "Parsing", filename));
|
||||||
|
let parsed = Parser::parse_nodes(tokens)?;
|
||||||
|
|
||||||
|
log(&format!("{:20} {:20}", "Resolving Deps", filename));
|
||||||
|
let nodes = resolve_dependencies(parsed)?;
|
||||||
|
|
||||||
|
let deps = Parser::get_dependencies(&nodes)?;
|
||||||
|
|
||||||
|
log(&format!(
|
||||||
|
"{:20} {:20}",
|
||||||
|
"Expanding PseudoInstructions", filename
|
||||||
|
));
|
||||||
|
let mut nodes = expand_pseudo_ops(nodes, file_hash)?;
|
||||||
|
|
||||||
|
// add a section instruction
|
||||||
|
nodes.insert(
|
||||||
|
0,
|
||||||
|
node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)),
|
||||||
|
);
|
||||||
|
|
||||||
|
for n in nodes.iter() {
|
||||||
|
println!("{n}");
|
||||||
|
}
|
||||||
|
|
||||||
|
program.add_module(nodes);
|
||||||
|
|
||||||
|
for dep in deps {
|
||||||
|
log(&format!(
|
||||||
|
"{:20} {:20}",
|
||||||
|
"Including",
|
||||||
|
dep.file_name().unwrap().to_str().unwrap()
|
||||||
|
));
|
||||||
|
|
||||||
|
if !modules.contains(&quick_hash(&dep)) {
|
||||||
|
modules.insert(quick_hash(&dep));
|
||||||
|
prepare_dependency(dep.as_path(), modules, program)?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn _build(_src: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
|
||||||
|
Ok(vec![])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// TODO: disassembling functionality
|
||||||
|
/// - We probably don't need to implement this for a while yet.
|
||||||
|
/// - This method should recover symbols such as labels and variables from the human
|
||||||
|
/// written assembly, recognising sequences that are expansions of pseudo-instructions
|
||||||
|
/// and reversing this to produce near enough the original source code.
|
||||||
|
pub fn disassemble(_: Vec<Instruction>) -> String {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum AssembleError {
|
||||||
|
Generic,
|
||||||
|
UnexpectedEof,
|
||||||
|
InvalidFile(PathBuf),
|
||||||
|
UnexpectedToken(Token, TokenType),
|
||||||
|
InvalidArg,
|
||||||
|
UndefinedSymbol(Symbol),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for AssembleError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
AssembleError::Generic => write!(f, "Generic error"),
|
||||||
|
AssembleError::UnexpectedToken(tok, expected) => {
|
||||||
|
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
|
||||||
|
}
|
||||||
|
AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"),
|
||||||
|
AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"),
|
||||||
|
AssembleError::InvalidArg => write!(f, "Invalid argument"),
|
||||||
|
AssembleError::UndefinedSymbol(symbol) => {
|
||||||
|
write!(f, "Undefined symbol {symbol}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn quick_hash(value: &Path) -> u64 {
|
||||||
|
let mut hasher = DefaultHasher::new();
|
||||||
|
value.canonicalize().unwrap().to_str().hash(&mut hasher);
|
||||||
|
hasher.finish()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! dsa {
|
||||||
|
// Version with formatting arguments
|
||||||
|
($hash:expr, $input:expr, $($args:expr),+) => {{
|
||||||
|
let input = format!($input, $($args),+);
|
||||||
|
let tokens = $crate::lexer::lexer(input, $hash)?;
|
||||||
|
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
|
||||||
|
parsed
|
||||||
|
}};
|
||||||
|
// Version without formatting
|
||||||
|
($hash:expr, $input:expr) => {{
|
||||||
|
let input = String::from($input);
|
||||||
|
let tokens = $crate::lexer::lexer(input, $hash)?;
|
||||||
|
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
|
||||||
|
parsed
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! expect_token {
|
||||||
|
($token:expr, Symbol) => {
|
||||||
|
match $token {
|
||||||
|
$crate::assembler::model::Token::Symbol(value) => Ok(value.clone()),
|
||||||
|
other => Err($crate::assembler::AssembleError::UnexpectedToken(
|
||||||
|
other.clone(),
|
||||||
|
$crate::assembler::model::TokenType::Symbol,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
($token:expr, Register) => {
|
||||||
|
match $token {
|
||||||
|
$crate::assembler::model::Token::Register(value) => Ok(value.clone()),
|
||||||
|
other => Err($crate::assembler::AssembleError::UnexpectedToken(
|
||||||
|
other.clone(),
|
||||||
|
$crate::assembler::model::TokenType::Register,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
($token:expr, Immediate) => {
|
||||||
|
match $token {
|
||||||
|
$crate::assembler::model::Token::Immediate(value) => Ok(value.clone()),
|
||||||
|
other => Err($crate::assembler::AssembleError::UnexpectedToken(
|
||||||
|
other.clone(),
|
||||||
|
$crate::assembler::model::TokenType::Immediate,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
($token:expr, StringLit) => {
|
||||||
|
match $token {
|
||||||
|
$crate::assembler::model::Token::StringLit(value) => Ok(value.clone()),
|
||||||
|
other => Err($crate::assembler::AssembleError::UnexpectedToken(
|
||||||
|
other.clone(),
|
||||||
|
$crate::assembler::model::TokenType::StringLit,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
($token:expr, Opcode) => {
|
||||||
|
match $token {
|
||||||
|
$crate::assembler::model::Token::Opcode(value) => Ok(value.clone()),
|
||||||
|
other => Err($crate::assembler::AssembleError::UnexpectedToken(
|
||||||
|
other.clone(),
|
||||||
|
$crate::assembler::model::TokenType::Opcode,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! expect_type {
|
||||||
|
($token:expr, $($variant:ident),+) => {{
|
||||||
|
let token = $token;
|
||||||
|
match &token {
|
||||||
|
$(
|
||||||
|
$crate::assembler::model::Token::$variant(_) => Ok(token.clone()),
|
||||||
|
)+
|
||||||
|
other => {
|
||||||
|
let expected_type = expect_type!(@get_first_type $($variant),+);
|
||||||
|
Err($crate::assembler::AssembleError::UnexpectedToken(
|
||||||
|
other.clone().clone(),
|
||||||
|
expected_type,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Symbol };
|
||||||
|
(@get_first_type Register $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Register };
|
||||||
|
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Immediate };
|
||||||
|
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::assembler::model::TokenType::StringLit };
|
||||||
|
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Opcode };
|
||||||
|
}
|
||||||
@@ -2,7 +2,7 @@ use std::{fmt, str::FromStr};
|
|||||||
|
|
||||||
use common::prelude::Register;
|
use common::prelude::Register;
|
||||||
|
|
||||||
use crate::AssembleError;
|
use crate::assembler::AssembleError;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Node {
|
pub struct Node {
|
||||||
@@ -123,8 +123,8 @@ impl fmt::Display for Opcode {
|
|||||||
Opcode::Int => write!(f, "int"),
|
Opcode::Int => write!(f, "int"),
|
||||||
Opcode::Irt => write!(f, "irt"),
|
Opcode::Irt => write!(f, "irt"),
|
||||||
Opcode::Hlt => write!(f, "hlt"),
|
Opcode::Hlt => write!(f, "hlt"),
|
||||||
Opcode::Iadd => write!(f, "iadd"),
|
Opcode::AddI => write!(f, "addi"),
|
||||||
Opcode::Isub => write!(f, "isub"),
|
Opcode::SubI => write!(f, "subi"),
|
||||||
Opcode::Db => write!(f, "db"),
|
Opcode::Db => write!(f, "db"),
|
||||||
Opcode::Dh => write!(f, "dh"),
|
Opcode::Dh => write!(f, "dh"),
|
||||||
Opcode::Dw => write!(f, "dw"),
|
Opcode::Dw => write!(f, "dw"),
|
||||||
@@ -240,8 +240,8 @@ pub enum Opcode {
|
|||||||
Int,
|
Int,
|
||||||
Irt,
|
Irt,
|
||||||
Hlt,
|
Hlt,
|
||||||
Iadd,
|
AddI,
|
||||||
Isub,
|
SubI,
|
||||||
// Pseudo-instructions
|
// Pseudo-instructions
|
||||||
Db,
|
Db,
|
||||||
Dh,
|
Dh,
|
||||||
@@ -316,8 +316,8 @@ impl FromStr for Opcode {
|
|||||||
"int" => Ok(Self::Int),
|
"int" => Ok(Self::Int),
|
||||||
"irt" => Ok(Self::Irt),
|
"irt" => Ok(Self::Irt),
|
||||||
"hlt" => Ok(Self::Hlt),
|
"hlt" => Ok(Self::Hlt),
|
||||||
"iadd" => Ok(Self::Iadd),
|
"addi" => Ok(Self::AddI),
|
||||||
"isub" => Ok(Self::Isub),
|
"subi" => Ok(Self::SubI),
|
||||||
"db" => Ok(Self::Db),
|
"db" => Ok(Self::Db),
|
||||||
"dh" => Ok(Self::Dh),
|
"dh" => Ok(Self::Dh),
|
||||||
"dw" => Ok(Self::Dw),
|
"dw" => Ok(Self::Dw),
|
||||||
@@ -339,7 +339,7 @@ impl Opcode {
|
|||||||
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw",
|
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw",
|
||||||
"lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc",
|
"lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc",
|
||||||
"dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor",
|
"dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor",
|
||||||
"xnor", "int", "irt", "hlt", "iadd", "isub", // Pseudo-instructions
|
"xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions
|
||||||
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "include",
|
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "include",
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -382,8 +382,8 @@ impl Opcode {
|
|||||||
Self::Int => Some(0x22),
|
Self::Int => Some(0x22),
|
||||||
Self::Irt => Some(0x23),
|
Self::Irt => Some(0x23),
|
||||||
Self::Hlt => Some(0x24),
|
Self::Hlt => Some(0x24),
|
||||||
Self::Iadd => Some(0x25),
|
Self::AddI => Some(0x25),
|
||||||
Self::Isub => Some(0x26),
|
Self::SubI => Some(0x26),
|
||||||
Self::Segment => Some(0x27),
|
Self::Segment => Some(0x27),
|
||||||
// Pseudo-instructions don't have opcode values
|
// Pseudo-instructions don't have opcode values
|
||||||
_ => None,
|
_ => None,
|
||||||
@@ -1,11 +1,8 @@
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use crate::{
|
use crate::{assembler::AssembleError, expect_token, expect_type, node};
|
||||||
AssembleError, expect_token, expect_type,
|
|
||||||
model::{Node, Opcode, Token},
|
|
||||||
node,
|
|
||||||
};
|
|
||||||
|
|
||||||
|
use crate::assembler::model::{Node, Opcode, Token};
|
||||||
use common::prelude::*;
|
use common::prelude::*;
|
||||||
|
|
||||||
pub struct Parser {
|
pub struct Parser {
|
||||||
@@ -177,7 +174,7 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Immediate Arithmetic
|
// Immediate Arithmetic
|
||||||
Opcode::Iadd | Opcode::Isub => {
|
Opcode::AddI | Opcode::SubI => {
|
||||||
let reg = expect_type!(self.next()?, Register)?;
|
let reg = expect_type!(self.next()?, Register)?;
|
||||||
let imm = expect_type!(self.next()?, Immediate)?;
|
let imm = expect_type!(self.next()?, Immediate)?;
|
||||||
let reg2 = if expect_type!(self.peek_next()?, Register).is_ok() {
|
let reg2 = if expect_type!(self.peek_next()?, Register).is_ok() {
|
||||||
@@ -2,11 +2,9 @@ use std::{collections::HashMap, path::PathBuf};
|
|||||||
|
|
||||||
use common::prelude::Register;
|
use common::prelude::Register;
|
||||||
|
|
||||||
use crate::{
|
use crate::assembler::model::{Module, Node, Opcode, Symbol, Token};
|
||||||
AssembleError,
|
use crate::assembler::quick_hash;
|
||||||
model::{Module, Node, Opcode, Symbol, Token},
|
use crate::{assembler::AssembleError, node};
|
||||||
node, quick_hash,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub fn resolve_symbols(nodes: &mut [Node]) -> Result<(), AssembleError> {
|
pub fn resolve_symbols(nodes: &mut [Node]) -> Result<(), AssembleError> {
|
||||||
let symbol_table = generate_symbol_table(nodes)?;
|
let symbol_table = generate_symbol_table(nodes)?;
|
||||||
+11
-243
@@ -1,253 +1,21 @@
|
|||||||
use core::fmt;
|
use assembler::codegen::codegen;
|
||||||
use std::{
|
use assembler::expand::expand_pseudo_ops;
|
||||||
collections::HashSet,
|
use assembler::model::{Node, Opcode, Symbol, Token, TokenType};
|
||||||
fs,
|
use assembler::parser::{Parser, Program};
|
||||||
hash::{DefaultHasher, Hash, Hasher},
|
use assembler::resolver::{create_sections, resolve_dependencies, resolve_symbols};
|
||||||
path::{Path, PathBuf},
|
|
||||||
};
|
|
||||||
|
|
||||||
use common::prelude::*;
|
use common::prelude::*;
|
||||||
|
use core::fmt;
|
||||||
|
|
||||||
use crate::{
|
pub mod assembler;
|
||||||
codegen::codegen,
|
|
||||||
expand::expand_pseudo_ops,
|
|
||||||
model::{Node, Opcode, Symbol, Token, TokenType},
|
|
||||||
parser::{Parser, Program},
|
|
||||||
resolver::{create_sections, resolve_dependencies, resolve_symbols},
|
|
||||||
};
|
|
||||||
|
|
||||||
pub mod codegen;
|
use crate::assembler::lexer;
|
||||||
pub mod expand;
|
|
||||||
pub mod lexer;
|
|
||||||
pub mod model;
|
|
||||||
pub mod parser;
|
|
||||||
pub mod resolver;
|
|
||||||
|
|
||||||
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
|
pub mod prelude {
|
||||||
let mut modules = HashSet::<u64>::new();
|
pub use crate::assembler::assemble;
|
||||||
let mut program = Program::new();
|
pub use crate::assembler::disassemble;
|
||||||
|
|
||||||
let hash = quick_hash(src);
|
|
||||||
modules.insert(hash);
|
|
||||||
|
|
||||||
prepare_dependency(src, &mut modules, &mut program)?;
|
|
||||||
let mut nodes = program.nodes;
|
|
||||||
|
|
||||||
create_sections(&mut nodes)?;
|
|
||||||
resolve_symbols(&mut nodes)?;
|
|
||||||
|
|
||||||
let instructions = codegen(nodes)?;
|
|
||||||
for inst in instructions.iter() {
|
|
||||||
println!("{inst}");
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(instructions)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn prepare_dependency(
|
|
||||||
path: &Path,
|
|
||||||
modules: &mut HashSet<u64>,
|
|
||||||
program: &mut Program,
|
|
||||||
) -> Result<(), AssembleError> {
|
|
||||||
let filename = path.file_name().unwrap().to_str().unwrap();
|
|
||||||
if let Ok(path) = path.canonicalize() {
|
|
||||||
log(&format!(
|
|
||||||
"{:20} {:20} [{}]",
|
|
||||||
"Building",
|
|
||||||
filename,
|
|
||||||
path.display()
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let src = fs::read_to_string(path)
|
|
||||||
.map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?;
|
|
||||||
let file_hash = quick_hash(path);
|
|
||||||
|
|
||||||
log(&format!("{:20} {:20}", "Tokenising", filename));
|
|
||||||
let tokens = lexer::lexer(src, file_hash)?;
|
|
||||||
|
|
||||||
log(&format!("{:20} {:20}", "Parsing", filename));
|
|
||||||
let parsed = Parser::parse_nodes(tokens)?;
|
|
||||||
|
|
||||||
log(&format!("{:20} {:20}", "Resolving Deps", filename));
|
|
||||||
let nodes = resolve_dependencies(parsed)?;
|
|
||||||
|
|
||||||
let deps = Parser::get_dependencies(&nodes)?;
|
|
||||||
|
|
||||||
log(&format!(
|
|
||||||
"{:20} {:20}",
|
|
||||||
"Expanding PseudoInstructions", filename
|
|
||||||
));
|
|
||||||
let mut nodes = expand_pseudo_ops(nodes, file_hash)?;
|
|
||||||
|
|
||||||
// add a section instruction
|
|
||||||
nodes.insert(
|
|
||||||
0,
|
|
||||||
node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)),
|
|
||||||
);
|
|
||||||
|
|
||||||
for n in nodes.iter() {
|
|
||||||
println!("{n}");
|
|
||||||
}
|
|
||||||
|
|
||||||
program.add_module(nodes);
|
|
||||||
|
|
||||||
for dep in deps {
|
|
||||||
log(&format!(
|
|
||||||
"{:20} {:20}",
|
|
||||||
"Including",
|
|
||||||
dep.file_name().unwrap().to_str().unwrap()
|
|
||||||
));
|
|
||||||
|
|
||||||
if !modules.contains(&quick_hash(&dep)) {
|
|
||||||
modules.insert(quick_hash(&dep));
|
|
||||||
prepare_dependency(dep.as_path(), modules, program)?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _build(_src: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
|
|
||||||
Ok(vec![])
|
|
||||||
}
|
|
||||||
|
|
||||||
/// TODO: disassembling functionality
|
|
||||||
/// - We probably don't need to implement this for a while yet.
|
|
||||||
/// - This method should recover symbols such as labels and variables from the human
|
|
||||||
/// written assembly, recognising sequences that are expansions of pseudo-instructions
|
|
||||||
/// and reversing this to produce near enough the original source code.
|
|
||||||
pub fn disassemble(_: Vec<Instruction>) -> String {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum AssembleError {
|
|
||||||
Generic,
|
|
||||||
UnexpectedEof,
|
|
||||||
InvalidFile(PathBuf),
|
|
||||||
UnexpectedToken(Token, TokenType),
|
|
||||||
InvalidArg,
|
|
||||||
UndefinedSymbol(Symbol),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for AssembleError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
AssembleError::Generic => write!(f, "Generic error"),
|
|
||||||
AssembleError::UnexpectedToken(tok, expected) => {
|
|
||||||
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
|
|
||||||
}
|
|
||||||
AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"),
|
|
||||||
AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"),
|
|
||||||
AssembleError::InvalidArg => write!(f, "Invalid argument"),
|
|
||||||
AssembleError::UndefinedSymbol(symbol) => {
|
|
||||||
write!(f, "Undefined symbol {symbol}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn quick_hash(value: &Path) -> u64 {
|
|
||||||
let mut hasher = DefaultHasher::new();
|
|
||||||
value.canonicalize().unwrap().to_str().hash(&mut hasher);
|
|
||||||
hasher.finish()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
|
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
|
||||||
fn log(message: &str) {
|
fn log(message: &str) {
|
||||||
println!("\x1b[32mINFO:\x1b[0m {message}");
|
println!("\x1b[32mINFO:\x1b[0m {message}");
|
||||||
}
|
}
|
||||||
|
|
||||||
// create a macro that lexes and parses the input string into Nodes
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! dsa {
|
|
||||||
// Version with formatting arguments
|
|
||||||
($hash:expr, $input:expr, $($args:expr),+) => {{
|
|
||||||
let input = format!($input, $($args),+);
|
|
||||||
let tokens = $crate::lexer::lexer(input, $hash)?;
|
|
||||||
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
|
|
||||||
parsed
|
|
||||||
}};
|
|
||||||
// Version without formatting
|
|
||||||
($hash:expr, $input:expr) => {{
|
|
||||||
let input = String::from($input);
|
|
||||||
let tokens = $crate::lexer::lexer(input, $hash)?;
|
|
||||||
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
|
|
||||||
parsed
|
|
||||||
}};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! expect_token {
|
|
||||||
($token:expr, Symbol) => {
|
|
||||||
match $token {
|
|
||||||
$crate::model::Token::Symbol(value) => Ok(value.clone()),
|
|
||||||
other => Err($crate::AssembleError::UnexpectedToken(
|
|
||||||
other.clone(),
|
|
||||||
$crate::model::TokenType::Symbol,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
($token:expr, Register) => {
|
|
||||||
match $token {
|
|
||||||
$crate::model::Token::Register(value) => Ok(value.clone()),
|
|
||||||
other => Err($crate::AssembleError::UnexpectedToken(
|
|
||||||
other.clone(),
|
|
||||||
$crate::model::TokenType::Register,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
($token:expr, Immediate) => {
|
|
||||||
match $token {
|
|
||||||
$crate::model::Token::Immediate(value) => Ok(value.clone()),
|
|
||||||
other => Err($crate::AssembleError::UnexpectedToken(
|
|
||||||
other.clone(),
|
|
||||||
$crate::model::TokenType::Immediate,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
($token:expr, StringLit) => {
|
|
||||||
match $token {
|
|
||||||
$crate::model::Token::StringLit(value) => Ok(value.clone()),
|
|
||||||
other => Err($crate::AssembleError::UnexpectedToken(
|
|
||||||
other.clone(),
|
|
||||||
$crate::model::TokenType::StringLit,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
($token:expr, Opcode) => {
|
|
||||||
match $token {
|
|
||||||
$crate::model::Token::Opcode(value) => Ok(value.clone()),
|
|
||||||
other => Err($crate::AssembleError::UnexpectedToken(
|
|
||||||
other.clone(),
|
|
||||||
$crate::model::TokenType::Opcode,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! expect_type {
|
|
||||||
($token:expr, $($variant:ident),+) => {{
|
|
||||||
let token = $token;
|
|
||||||
match &token {
|
|
||||||
$(
|
|
||||||
$crate::model::Token::$variant(_) => Ok(token.clone()),
|
|
||||||
)+
|
|
||||||
other => {
|
|
||||||
let expected_type = expect_type!(@get_first_type $($variant),+);
|
|
||||||
Err($crate::AssembleError::UnexpectedToken(
|
|
||||||
other.clone().clone(),
|
|
||||||
expected_type,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}};
|
|
||||||
|
|
||||||
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::model::TokenType::Symbol };
|
|
||||||
(@get_first_type Register $(, $rest:ident)*) => { $crate::model::TokenType::Register };
|
|
||||||
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::model::TokenType::Immediate };
|
|
||||||
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::model::TokenType::StringLit };
|
|
||||||
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::model::TokenType::Opcode };
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ fn main() {
|
|||||||
let src = PathBuf::from(input_path);
|
let src = PathBuf::from(input_path);
|
||||||
let mut output_file = fs::File::create(output_path).unwrap();
|
let mut output_file = fs::File::create(output_path).unwrap();
|
||||||
|
|
||||||
match assembler::assemble(&src) {
|
match assembler::assembler::assemble(&src) {
|
||||||
Ok(res) => {
|
Ok(res) => {
|
||||||
res.iter().map(|i| i.encode()).for_each(|i| {
|
res.iter().map(|i| i.encode()).for_each(|i| {
|
||||||
output_file.write_all(&i.to_le_bytes()).unwrap();
|
output_file.write_all(&i.to_le_bytes()).unwrap();
|
||||||
|
|||||||
Reference in New Issue
Block a user