diff --git a/resources/concept/assemblerconcept.md b/resources/concept/assemblerconcept.md new file mode 100644 index 0000000..72ebfec --- /dev/null +++ b/resources/concept/assemblerconcept.md @@ -0,0 +1,279 @@ + + +```rust +// src/assembler/source.rs +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SourcePosition { + pub line: u32, + pub column: u32, + pub offset: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SourceSpan { + pub start: SourcePosition, + pub end: SourcePosition, + pub file_id: u64, // Hash of the file path +} + +impl SourceSpan { + pub fn new(start: SourcePosition, end: SourcePosition, file_id: u64) -> Self { + Self { start, end, file_id } + } + + pub fn single_char(pos: SourcePosition, file_id: u64) -> Self { + Self { + start: pos, + end: pos, + file_id, + } + } +} +2. Enhanced Token with Source Information +Update the Token type to include source positions: + +```rust +// src/assembler/model.rs +pub struct Token { + pub kind: TokenKind, + pub span: SourceSpan, + pub raw: String, // Original source text +} + +pub enum TokenKind { + // ... existing variants ... +} +3. Enhanced CodeModule Structure +Enhance the +CodeModule + struct to track source information: + +```rust +// src/assembler/mod.rs +pub struct CodeModule { + pub path: PathBuf, + pub hash: u64, + pub source: String, + pub lines: Vec, // Line start offsets for quick lookup + pub tokens: Vec, + pub nodes: Vec, + pub dependencies: Vec, +} + +impl CodeModule { + pub fn new(path: PathBuf, source: String) -> Self { + let hash = quick_hash(&path); + let lines = source.lines() + .scan(0, |offset, line| { + let start = *offset; + *offset += line.len() + 1; // +1 for newline + Some(start) + }) + .collect(); + + Self { + path, + hash, + source, + lines, + tokens: Vec::new(), + nodes: Vec::new(), + dependencies: Vec::new(), + } + } + + pub fn position_from_offset(&self, offset: usize) -> (u32, u32) { + match self.lines.binary_search(&offset) { + Ok(line) => (line as u32 + 1, 1), + Err(0) => (1, offset as u32 + 1), + Err(line) => { + let line_start = self.lines[line - 1]; + (line as u32, (offset - line_start + 1) as u32) + } + } + } +} +4. Enhanced Lexer with Source Positions +Update the lexer to track source positions: + +```rust +// src/assembler/lexer.rs +pub fn lex(module: &mut CodeModule) -> Result<(), AssembleError> { + let source = &module.source; + let mut tokens = Vec::new(); + let mut pos = 0; + let mut line_start = 0; + let mut line = 1; + + while pos < source.len() { + let c = source[pos..].chars().next().unwrap(); + + if c == '\n' { + line += 1; + line_start = pos + 1; + pos += 1; + continue; + } + + if c.is_whitespace() { + pos += 1; + continue; + } + + let token_start = pos; + // ... existing token parsing logic ... + + // When creating a token: + let start_pos = SourcePosition { + line, + column: (token_start - line_start + 1) as u32, + offset: token_start, + }; + + // Update pos based on token length + let token_length = /* calculate token length */; + pos += token_length; + + let end_pos = SourcePosition { + line, + column: (pos - line_start + 1) as u32, + offset: pos, + }; + + tokens.push(Token { + kind: token_kind, + span: SourceSpan::new(start_pos, end_pos, module.hash), + raw: source[token_start..pos].to_string(), + }); + } + + module.tokens = tokens; + Ok(()) +} +5. Enhanced Error Reporting +Create a structured error type with source context: + +```rust +// src/assembler/error.rs +#[derive(Debug)] +pub struct AssemblerError { + pub kind: ErrorKind, + pub span: SourceSpan, + pub message: String, + pub context: Vec, +} + +impl AssemblerError { + pub fn new(kind: ErrorKind, span: SourceSpan, message: impl Into) -> Self { + Self { + kind, + span, + message: message.into(), + context: Vec::new(), + } + } + + pub fn with_context(mut self, context: impl Into) -> Self { + self.context.push(context.into()); + self + } + + pub fn format(&self, module: &CodeModule) -> String { + let (line, col) = module.position_from_offset(self.span.start.offset); + let line_content = module.source.lines().nth(line as usize - 1).unwrap_or(""); + + let mut output = format!( + "{}:{}:{}: {}\n", + module.path.display(), + line, + col, + self.message + ); + + // Add source line with caret + output.push_str(&format!("{}\n", line_content)); + output.push_str(&" ".repeat(col as usize - 1)); + output.push_str("^\n"); + + // Add context if any + for ctx in &self.context { + output.push_str(&format!(" = note: {}\n", ctx)); + } + + output + } +} +6. Integration with Compilation Pipeline +Update the compilation pipeline to use the enhanced types: + +```rust +// src/assembler/mod.rs +pub fn assemble(src: &Path) -> Result, AssemblerError> { + let source = std::fs::read_to_string(src) + .map_err(|e| AssemblerError::io_error(src, e))?; + + let mut module = CodeModule::new(src.to_path_buf(), source); + + // Lexing + lexer::lex(&mut module)?; + + // Parsing + parser::parse(&mut module)?; + + // Resolution + resolver::resolve(&mut module)?; + + // Code generation + codegen::generate(&module) +} +7. Logging Integration +Enhance the logging system to include source context: + +```rust +// src/util/logging.rs +pub trait Loggable { + fn log(&self, level: LogLevel, message: impl std::fmt::Display); + fn log_with_span(&self, level: LogLevel, span: &SourceSpan, message: impl std::fmt::Display); +} + +impl Loggable for CodeModule { + fn log_with_span(&self, level: LogLevel, span: &SourceSpan, message: impl std::fmt::Display) { + if span.file_id != self.hash { + if let Some(dep) = self.find_dependency(span.file_id) { + return dep.log_with_span(level, span, message); + } + } + + let (line, col) = self.position_from_offset(span.start.offset); + let line_content = self.source.lines().nth(line as usize - 1).unwrap_or(""); + + log::log!( + level, + "{}:{}:{}: {}\n {}\n {}{}", + self.path.display(), + line, + col, + message, + line_content, + " ".repeat(col as usize - 1), + "^" + ); + } +} +8. Usage Example +Here's how you'd use this in practice: + +```rust +// In your parser or code that needs to report errors +fn parse_token(&mut self, module: &CodeModule) -> Result { + // ... + if !is_valid_token(&token) { + return Err(AssemblerError::new( + ErrorKind::SyntaxError, + token.span, + "Invalid token" + ).with_context("Expected a valid instruction or directive")); + } + // ... +} +``` diff --git a/resources/dsa/lib/print.dsa b/resources/dsa/lib/print.dsa index b9a4654..d6ad711 100644 --- a/resources/dsa/lib/print.dsa +++ b/resources/dsa/lib/print.dsa @@ -13,11 +13,27 @@ // usage for reset: // push pcx // jmp print::reset +// +// usage for clear: +// push pcx +// jmp print::clear +// +// usage for print_byte: +// push (register containing byte) +// push pcx +// jmp print::print_byte +// +// usage for print_word: +// push (register containing word) +// push pcx +// jmp print::print_word +// dw display: 0x20000 dw current: 0x20000 -// prints the given text to the screen. +// ------------------------------------------ +// prints the string at addr(arg[0]) to the screen. print: push bpr mov spr, bpr @@ -25,7 +41,7 @@ print: ldw bpr, rg0, 8 ldw current, rg1 -print_loop: +_print_loop: ldb rg0, acc stb acc, rg1 @@ -33,24 +49,47 @@ print_loop: addi rg1, 1 cmp acc, zero - jne print_loop - jmp end + jne _print_loop + jmp _end -// return -end: - stw rg1, current - - mov bpr, spr - pop bpr - return +// ------------------------------------------ +// prints the value of arg[0] to the screen. +print_word: + // initialise + push bpr + mov spr, bpr -// resets the cursor position on the screen + // load byte into acc + ldw bpr, rg0, 8 + ldw current, rg1 + + stw rg0, rg1 + addi rg1, 4 + jmp _end + +// ------------------------------------------ +// prints the last byte of arg[0] to the screen. +print_byte: + push bpr + mov spr, bpr + + ldw bpr, rg0, 8 + ldw current, rg1 + + stb rg0, rg1 + addi rg1, 1 + jmp _end + +// ------------------------------------------ +// resets the cursor position on the screen to 0x20000. (0,0) reset: push bpr mov spr, bpr ldw display, rg1 - jmp end + jmp _end +// ------------------------------------------ +// clears the screen clear: push bpr mov spr, bpr @@ -58,10 +97,19 @@ clear: lli 500 rg0 ldw display, rg1 -clear_loop: +_clear_loop: dec rg0 stw zero, rg1 addi rg1, 4 cmp rg0, zero - jgt clear_loop - jmp end \ No newline at end of file + jgt _clear_loop + jmp _end + +// ------------------------------------------ +// return +_end: + stw rg1, current + + mov bpr, spr + pop bpr + return \ No newline at end of file diff --git a/resources/dsa/test.dsa b/resources/dsa/test.dsa index 8faa776..06abc66 100644 --- a/resources/dsa/test.dsa +++ b/resources/dsa/test.dsa @@ -9,10 +9,12 @@ init: mov bpr, spr start: - lwi string, rg1 + // string, rg1 + lli 87, rg1 push rg1 - call print::print + + call print::print_byte pop rg1 hlt \ No newline at end of file diff --git a/resources/dsb/test.dsb b/resources/dsb/test.dsb index 67d1c7f..9c51515 100644 Binary files a/resources/dsb/test.dsb and b/resources/dsb/test.dsb differ