Files
2025-06-22 03:51:39 +01:00

7.2 KiB

// src/assembler/source.rs
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SourcePosition {
    pub line: u32,
    pub column: u32,
    pub offset: usize,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SourceSpan {
    pub start: SourcePosition,
    pub end: SourcePosition,
    pub file_id: u64,  // Hash of the file path
}

impl SourceSpan {
    pub fn new(start: SourcePosition, end: SourcePosition, file_id: u64) -> Self {
        Self { start, end, file_id }
    }
    
    pub fn single_char(pos: SourcePosition, file_id: u64) -> Self {
        Self {
            start: pos,
            end: pos,
            file_id,
        }
    }
}
2. Enhanced Token with Source Information
Update the Token type to include source positions:

```rust
// src/assembler/model.rs
pub struct Token {
    pub kind: TokenKind,
    pub span: SourceSpan,
    pub raw: String,  // Original source text
}

pub enum TokenKind {
    // ... existing variants ...
}
3. Enhanced CodeModule Structure
Enhance the 
CodeModule
 struct to track source information:

```rust
// src/assembler/mod.rs
pub struct CodeModule {
    pub path: PathBuf,
    pub hash: u64,
    pub source: String,
    pub lines: Vec<usize>,  // Line start offsets for quick lookup
    pub tokens: Vec<Token>,
    pub nodes: Vec<Node>,
    pub dependencies: Vec<CodeModule>,
}

impl CodeModule {
    pub fn new(path: PathBuf, source: String) -> Self {
        let hash = quick_hash(&path);
        let lines = source.lines()
            .scan(0, |offset, line| {
                let start = *offset;
                *offset += line.len() + 1;  // +1 for newline
                Some(start)
            })
            .collect();
            
        Self {
            path,
            hash,
            source,
            lines,
            tokens: Vec::new(),
            nodes: Vec::new(),
            dependencies: Vec::new(),
        }
    }
    
    pub fn position_from_offset(&self, offset: usize) -> (u32, u32) {
        match self.lines.binary_search(&offset) {
            Ok(line) => (line as u32 + 1, 1),
            Err(0) => (1, offset as u32 + 1),
            Err(line) => {
                let line_start = self.lines[line - 1];
                (line as u32, (offset - line_start + 1) as u32)
            }
        }
    }
}
4. Enhanced Lexer with Source Positions
Update the lexer to track source positions:

```rust
// src/assembler/lexer.rs
pub fn lex(module: &mut CodeModule) -> Result<(), AssembleError> {
    let source = &module.source;
    let mut tokens = Vec::new();
    let mut pos = 0;
    let mut line_start = 0;
    let mut line = 1;
    
    while pos < source.len() {
        let c = source[pos..].chars().next().unwrap();
        
        if c == '\n' {
            line += 1;
            line_start = pos + 1;
            pos += 1;
            continue;
        }
        
        if c.is_whitespace() {
            pos += 1;
            continue;
        }
        
        let token_start = pos;
        // ... existing token parsing logic ...
        
        // When creating a token:
        let start_pos = SourcePosition {
            line,
            column: (token_start - line_start + 1) as u32,
            offset: token_start,
        };
        
        // Update pos based on token length
        let token_length = /* calculate token length */;
        pos += token_length;
        
        let end_pos = SourcePosition {
            line,
            column: (pos - line_start + 1) as u32,
            offset: pos,
        };
        
        tokens.push(Token {
            kind: token_kind,
            span: SourceSpan::new(start_pos, end_pos, module.hash),
            raw: source[token_start..pos].to_string(),
        });
    }
    
    module.tokens = tokens;
    Ok(())
}
5. Enhanced Error Reporting
Create a structured error type with source context:

```rust
// src/assembler/error.rs
#[derive(Debug)]
pub struct AssemblerError {
    pub kind: ErrorKind,
    pub span: SourceSpan,
    pub message: String,
    pub context: Vec<String>,
}

impl AssemblerError {
    pub fn new(kind: ErrorKind, span: SourceSpan, message: impl Into<String>) -> Self {
        Self {
            kind,
            span,
            message: message.into(),
            context: Vec::new(),
        }
    }
    
    pub fn with_context(mut self, context: impl Into<String>) -> Self {
        self.context.push(context.into());
        self
    }
    
    pub fn format(&self, module: &CodeModule) -> String {
        let (line, col) = module.position_from_offset(self.span.start.offset);
        let line_content = module.source.lines().nth(line as usize - 1).unwrap_or("");
        
        let mut output = format!(
            "{}:{}:{}: {}\n",
            module.path.display(),
            line,
            col,
            self.message
        );
        
        // Add source line with caret
        output.push_str(&format!("{}\n", line_content));
        output.push_str(&" ".repeat(col as usize - 1));
        output.push_str("^\n");
        
        // Add context if any
        for ctx in &self.context {
            output.push_str(&format!("  = note: {}\n", ctx));
        }
        
        output
    }
}
6. Integration with Compilation Pipeline
Update the compilation pipeline to use the enhanced types:

```rust
// src/assembler/mod.rs
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssemblerError> {
    let source = std::fs::read_to_string(src)
        .map_err(|e| AssemblerError::io_error(src, e))?;
    
    let mut module = CodeModule::new(src.to_path_buf(), source);
    
    // Lexing
    lexer::lex(&mut module)?;
    
    // Parsing
    parser::parse(&mut module)?;
    
    // Resolution
    resolver::resolve(&mut module)?;
    
    // Code generation
    codegen::generate(&module)
}
7. Logging Integration
Enhance the logging system to include source context:

```rust
// src/util/logging.rs
pub trait Loggable {
    fn log(&self, level: LogLevel, message: impl std::fmt::Display);
    fn log_with_span(&self, level: LogLevel, span: &SourceSpan, message: impl std::fmt::Display);
}

impl Loggable for CodeModule {
    fn log_with_span(&self, level: LogLevel, span: &SourceSpan, message: impl std::fmt::Display) {
        if span.file_id != self.hash {
            if let Some(dep) = self.find_dependency(span.file_id) {
                return dep.log_with_span(level, span, message);
            }
        }
        
        let (line, col) = self.position_from_offset(span.start.offset);
        let line_content = self.source.lines().nth(line as usize - 1).unwrap_or("");
        
        log::log!(
            level,
            "{}:{}:{}: {}\n  {}\n  {}{}",
            self.path.display(),
            line,
            col,
            message,
            line_content,
            " ".repeat(col as usize - 1),
            "^"
        );
    }
}
8. Usage Example
Here's how you'd use this in practice:

```rust
// In your parser or code that needs to report errors
fn parse_token(&mut self, module: &CodeModule) -> Result<Token, AssemblerError> {
    // ...
    if !is_valid_token(&token) {
        return Err(AssemblerError::new(
            ErrorKind::SyntaxError,
            token.span,
            "Invalid token"
        ).with_context("Expected a valid instruction or directive"));
    }
    // ...
}