280 lines
7.2 KiB
Markdown
280 lines
7.2 KiB
Markdown
|
|
|
|
```rust
|
|
// src/assembler/source.rs
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub struct SourcePosition {
|
|
pub line: u32,
|
|
pub column: u32,
|
|
pub offset: usize,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct SourceSpan {
|
|
pub start: SourcePosition,
|
|
pub end: SourcePosition,
|
|
pub file_id: u64, // Hash of the file path
|
|
}
|
|
|
|
impl SourceSpan {
|
|
pub fn new(start: SourcePosition, end: SourcePosition, file_id: u64) -> Self {
|
|
Self { start, end, file_id }
|
|
}
|
|
|
|
pub fn single_char(pos: SourcePosition, file_id: u64) -> Self {
|
|
Self {
|
|
start: pos,
|
|
end: pos,
|
|
file_id,
|
|
}
|
|
}
|
|
}
|
|
2. Enhanced Token with Source Information
|
|
Update the Token type to include source positions:
|
|
|
|
```rust
|
|
// src/assembler/model.rs
|
|
pub struct Token {
|
|
pub kind: TokenKind,
|
|
pub span: SourceSpan,
|
|
pub raw: String, // Original source text
|
|
}
|
|
|
|
pub enum TokenKind {
|
|
// ... existing variants ...
|
|
}
|
|
3. Enhanced CodeModule Structure
|
|
Enhance the
|
|
CodeModule
|
|
struct to track source information:
|
|
|
|
```rust
|
|
// src/assembler/mod.rs
|
|
pub struct CodeModule {
|
|
pub path: PathBuf,
|
|
pub hash: u64,
|
|
pub source: String,
|
|
pub lines: Vec<usize>, // Line start offsets for quick lookup
|
|
pub tokens: Vec<Token>,
|
|
pub nodes: Vec<Node>,
|
|
pub dependencies: Vec<CodeModule>,
|
|
}
|
|
|
|
impl CodeModule {
|
|
pub fn new(path: PathBuf, source: String) -> Self {
|
|
let hash = quick_hash(&path);
|
|
let lines = source.lines()
|
|
.scan(0, |offset, line| {
|
|
let start = *offset;
|
|
*offset += line.len() + 1; // +1 for newline
|
|
Some(start)
|
|
})
|
|
.collect();
|
|
|
|
Self {
|
|
path,
|
|
hash,
|
|
source,
|
|
lines,
|
|
tokens: Vec::new(),
|
|
nodes: Vec::new(),
|
|
dependencies: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn position_from_offset(&self, offset: usize) -> (u32, u32) {
|
|
match self.lines.binary_search(&offset) {
|
|
Ok(line) => (line as u32 + 1, 1),
|
|
Err(0) => (1, offset as u32 + 1),
|
|
Err(line) => {
|
|
let line_start = self.lines[line - 1];
|
|
(line as u32, (offset - line_start + 1) as u32)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
4. Enhanced Lexer with Source Positions
|
|
Update the lexer to track source positions:
|
|
|
|
```rust
|
|
// src/assembler/lexer.rs
|
|
pub fn lex(module: &mut CodeModule) -> Result<(), AssembleError> {
|
|
let source = &module.source;
|
|
let mut tokens = Vec::new();
|
|
let mut pos = 0;
|
|
let mut line_start = 0;
|
|
let mut line = 1;
|
|
|
|
while pos < source.len() {
|
|
let c = source[pos..].chars().next().unwrap();
|
|
|
|
if c == '\n' {
|
|
line += 1;
|
|
line_start = pos + 1;
|
|
pos += 1;
|
|
continue;
|
|
}
|
|
|
|
if c.is_whitespace() {
|
|
pos += 1;
|
|
continue;
|
|
}
|
|
|
|
let token_start = pos;
|
|
// ... existing token parsing logic ...
|
|
|
|
// When creating a token:
|
|
let start_pos = SourcePosition {
|
|
line,
|
|
column: (token_start - line_start + 1) as u32,
|
|
offset: token_start,
|
|
};
|
|
|
|
// Update pos based on token length
|
|
let token_length = /* calculate token length */;
|
|
pos += token_length;
|
|
|
|
let end_pos = SourcePosition {
|
|
line,
|
|
column: (pos - line_start + 1) as u32,
|
|
offset: pos,
|
|
};
|
|
|
|
tokens.push(Token {
|
|
kind: token_kind,
|
|
span: SourceSpan::new(start_pos, end_pos, module.hash),
|
|
raw: source[token_start..pos].to_string(),
|
|
});
|
|
}
|
|
|
|
module.tokens = tokens;
|
|
Ok(())
|
|
}
|
|
5. Enhanced Error Reporting
|
|
Create a structured error type with source context:
|
|
|
|
```rust
|
|
// src/assembler/error.rs
|
|
#[derive(Debug)]
|
|
pub struct AssemblerError {
|
|
pub kind: ErrorKind,
|
|
pub span: SourceSpan,
|
|
pub message: String,
|
|
pub context: Vec<String>,
|
|
}
|
|
|
|
impl AssemblerError {
|
|
pub fn new(kind: ErrorKind, span: SourceSpan, message: impl Into<String>) -> Self {
|
|
Self {
|
|
kind,
|
|
span,
|
|
message: message.into(),
|
|
context: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn with_context(mut self, context: impl Into<String>) -> Self {
|
|
self.context.push(context.into());
|
|
self
|
|
}
|
|
|
|
pub fn format(&self, module: &CodeModule) -> String {
|
|
let (line, col) = module.position_from_offset(self.span.start.offset);
|
|
let line_content = module.source.lines().nth(line as usize - 1).unwrap_or("");
|
|
|
|
let mut output = format!(
|
|
"{}:{}:{}: {}\n",
|
|
module.path.display(),
|
|
line,
|
|
col,
|
|
self.message
|
|
);
|
|
|
|
// Add source line with caret
|
|
output.push_str(&format!("{}\n", line_content));
|
|
output.push_str(&" ".repeat(col as usize - 1));
|
|
output.push_str("^\n");
|
|
|
|
// Add context if any
|
|
for ctx in &self.context {
|
|
output.push_str(&format!(" = note: {}\n", ctx));
|
|
}
|
|
|
|
output
|
|
}
|
|
}
|
|
6. Integration with Compilation Pipeline
|
|
Update the compilation pipeline to use the enhanced types:
|
|
|
|
```rust
|
|
// src/assembler/mod.rs
|
|
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssemblerError> {
|
|
let source = std::fs::read_to_string(src)
|
|
.map_err(|e| AssemblerError::io_error(src, e))?;
|
|
|
|
let mut module = CodeModule::new(src.to_path_buf(), source);
|
|
|
|
// Lexing
|
|
lexer::lex(&mut module)?;
|
|
|
|
// Parsing
|
|
parser::parse(&mut module)?;
|
|
|
|
// Resolution
|
|
resolver::resolve(&mut module)?;
|
|
|
|
// Code generation
|
|
codegen::generate(&module)
|
|
}
|
|
7. Logging Integration
|
|
Enhance the logging system to include source context:
|
|
|
|
```rust
|
|
// src/util/logging.rs
|
|
pub trait Loggable {
|
|
fn log(&self, level: LogLevel, message: impl std::fmt::Display);
|
|
fn log_with_span(&self, level: LogLevel, span: &SourceSpan, message: impl std::fmt::Display);
|
|
}
|
|
|
|
impl Loggable for CodeModule {
|
|
fn log_with_span(&self, level: LogLevel, span: &SourceSpan, message: impl std::fmt::Display) {
|
|
if span.file_id != self.hash {
|
|
if let Some(dep) = self.find_dependency(span.file_id) {
|
|
return dep.log_with_span(level, span, message);
|
|
}
|
|
}
|
|
|
|
let (line, col) = self.position_from_offset(span.start.offset);
|
|
let line_content = self.source.lines().nth(line as usize - 1).unwrap_or("");
|
|
|
|
log::log!(
|
|
level,
|
|
"{}:{}:{}: {}\n {}\n {}{}",
|
|
self.path.display(),
|
|
line,
|
|
col,
|
|
message,
|
|
line_content,
|
|
" ".repeat(col as usize - 1),
|
|
"^"
|
|
);
|
|
}
|
|
}
|
|
8. Usage Example
|
|
Here's how you'd use this in practice:
|
|
|
|
```rust
|
|
// In your parser or code that needs to report errors
|
|
fn parse_token(&mut self, module: &CodeModule) -> Result<Token, AssemblerError> {
|
|
// ...
|
|
if !is_valid_token(&token) {
|
|
return Err(AssemblerError::new(
|
|
ErrorKind::SyntaxError,
|
|
token.span,
|
|
"Invalid token"
|
|
).with_context("Expected a valid instruction or directive"));
|
|
}
|
|
// ...
|
|
}
|
|
```
|