Compare commits
3 Commits
3afeafc9d4
...
48a74bfde2
| Author | SHA1 | Date | |
|---|---|---|---|
| 48a74bfde2 | |||
| 7973b2afca | |||
| ce2eda72a0 |
+34
-8
@@ -29,8 +29,10 @@ static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
||||
("println", "print::println"),
|
||||
("printnum", "print::print_num"),
|
||||
("print_space", "print::print_whitespace"),
|
||||
("print_newline", "print::print_newline"),
|
||||
("print_char", "print::print_byte"),
|
||||
("print_word", "print::print_word"),
|
||||
("print_hex", "print::print_hex_word"),
|
||||
])
|
||||
});
|
||||
|
||||
@@ -252,6 +254,19 @@ impl CodeGenerator {
|
||||
Statement::Break => unimplemented!(),
|
||||
Statement::Continue => unimplemented!(),
|
||||
|
||||
Statement::PtrWrite { ptr, value } => {
|
||||
let (result_reg, expr_code) = self.generate_expression(value, true)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?;
|
||||
code.extend(ptr_code);
|
||||
|
||||
code.push(format!("\tstw {}, {}", result_reg, ptr_reg));
|
||||
|
||||
self.allocator.free_temp(&result_reg);
|
||||
self.allocator.free_temp(&ptr_reg);
|
||||
}
|
||||
|
||||
Statement::Assign { varname, value } => {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) = self.generate_expression(value, true)?;
|
||||
@@ -540,6 +555,14 @@ impl CodeGenerator {
|
||||
}
|
||||
|
||||
Expression::Call { name, args } => {
|
||||
// first evaluate all the args we're going to need
|
||||
let mut arg_regs = Vec::new();
|
||||
for arg in args.iter().rev() {
|
||||
let (arg_reg, arg_code) = self.generate_expression(arg, true)?;
|
||||
code.extend(arg_code);
|
||||
arg_regs.push(arg_reg);
|
||||
}
|
||||
|
||||
// Save caller-saved registers and track which ones we saved
|
||||
let saved_regs = self.allocator.get_caller_saved_registers();
|
||||
for reg in &saved_regs {
|
||||
@@ -547,12 +570,12 @@ impl CodeGenerator {
|
||||
}
|
||||
|
||||
// Evaluate and push arguments in reverse order
|
||||
let mut arg_regs = Vec::new();
|
||||
for arg in args.iter().rev() {
|
||||
let (arg_reg, arg_code) = self.generate_expression(arg, true)?;
|
||||
code.extend(arg_code);
|
||||
code.push(format!("\tpush {}", arg_reg));
|
||||
arg_regs.push(arg_reg);
|
||||
for (i, arg_reg) in arg_regs.iter().enumerate() {
|
||||
code.push(format!(
|
||||
"\tpush {} // push arg {}",
|
||||
arg_reg,
|
||||
args.len() - 1 - i
|
||||
));
|
||||
}
|
||||
|
||||
if GLOBAL_METHODS.contains_key(name.name.as_str()) {
|
||||
@@ -564,10 +587,11 @@ impl CodeGenerator {
|
||||
return Err(CompilerError::Undefined(name.clone()));
|
||||
}
|
||||
|
||||
let result_reg = String::new();
|
||||
let result_reg: String;
|
||||
|
||||
if use_result {
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
result_reg = temp_result_reg;
|
||||
|
||||
code.extend(result_alloc);
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
@@ -579,6 +603,8 @@ impl CodeGenerator {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result_reg = "zero".to_string();
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 0 {
|
||||
for _ in 0..(args.len()) {
|
||||
|
||||
+505
-112
@@ -20,7 +20,7 @@ pub enum Token {
|
||||
// Identifiers and literals
|
||||
Identifier(String),
|
||||
String(String),
|
||||
Integer(u32),
|
||||
Integer(u64),
|
||||
Char(char),
|
||||
|
||||
// Symbols
|
||||
@@ -31,13 +31,12 @@ pub enum Token {
|
||||
Semicolon, // ;
|
||||
Colon, // :
|
||||
Comma, // ,
|
||||
// Pipe, // |
|
||||
|
||||
// Operators
|
||||
Plus, // +
|
||||
Minus, // -
|
||||
Star, // *
|
||||
Amphersand,
|
||||
Amphersand, // &
|
||||
Slash, // /
|
||||
Assign, // =
|
||||
EqualEqual, // ==
|
||||
@@ -80,7 +79,6 @@ impl Token {
|
||||
Token::Colon => "Colon",
|
||||
Token::Comma => "Comma",
|
||||
Token::RightArrow => "RightArrow",
|
||||
// Token::Pipe => "Pipe",
|
||||
Token::Plus => "Plus",
|
||||
Token::Minus => "Minus",
|
||||
Token::Star => "Star",
|
||||
@@ -139,109 +137,107 @@ impl<'a> Lexer<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_line_comment(&mut self) {
|
||||
// Skip the two slashes
|
||||
self.advance(); // first /
|
||||
self.advance(); // second /
|
||||
|
||||
// Skip until newline or EOF
|
||||
while let Some(c) = self.current {
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
self.advance();
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_block_comment(&mut self) -> Result<(), String> {
|
||||
// Skip the /*
|
||||
self.advance(); // /
|
||||
self.advance(); // *
|
||||
|
||||
let start_line = self.line;
|
||||
|
||||
// Look for */
|
||||
while let Some(c) = self.current {
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
}
|
||||
|
||||
if c == '*' {
|
||||
if let Some(&next) = self.peek() {
|
||||
if next == '/' {
|
||||
self.advance(); // *
|
||||
self.advance(); // /
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Err(format!(
|
||||
"Unterminated block comment starting at line {}",
|
||||
start_line
|
||||
))
|
||||
}
|
||||
|
||||
fn skip_whitespace_and_comments(&mut self) {
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
|
||||
// Check for comments
|
||||
if let Some('/') = self.current {
|
||||
if let Some(&next) = self.peek() {
|
||||
match next {
|
||||
'/' => {
|
||||
self.skip_line_comment();
|
||||
continue;
|
||||
}
|
||||
'*' => {
|
||||
if let Err(e) = self.skip_block_comment() {
|
||||
eprintln!("Lexer error: {}", e);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fn read_identifier(&mut self) -> String {
|
||||
let mut ident = String::new();
|
||||
|
||||
// Include the current character if it's valid
|
||||
if let Some(c) = self.current {
|
||||
if c.is_alphabetic() || c == '_' {
|
||||
ident.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
// Read remaining characters
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_alphanumeric() || c == '_' {
|
||||
ident.push(c);
|
||||
self.advance();
|
||||
ident.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ident
|
||||
}
|
||||
|
||||
fn read_number(&mut self) -> i64 {
|
||||
let mut num_str = String::from(self.current.unwrap());
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_ascii_digit() {
|
||||
num_str.push(c);
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
num_str.parse().unwrap()
|
||||
}
|
||||
fn keyword_or_identifier(&mut self) -> Token {
|
||||
let ident = self.read_identifier();
|
||||
|
||||
fn match_next(&mut self, expected: char) -> bool {
|
||||
match self.peek() {
|
||||
Some(&c) if c == expected => {
|
||||
self.advance();
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> Token {
|
||||
self.skip_whitespace();
|
||||
|
||||
let token = match self.current {
|
||||
Some('(') => Token::LeftParen,
|
||||
Some(')') => Token::RightParen,
|
||||
Some('{') => Token::LeftBrace,
|
||||
Some('}') => Token::RightBrace,
|
||||
Some(';') => Token::Semicolon,
|
||||
Some(':') => Token::Colon,
|
||||
Some(',') => Token::Comma,
|
||||
Some('&') => Token::Amphersand,
|
||||
// Some('|') => Token::Pipe,
|
||||
Some('+') => Token::Plus,
|
||||
Some('*') => Token::Star,
|
||||
Some('/') => Token::Slash,
|
||||
Some('-') => {
|
||||
if self.match_next('>') {
|
||||
Token::RightArrow
|
||||
} else {
|
||||
Token::Minus
|
||||
}
|
||||
}
|
||||
Some('!') => {
|
||||
if self.match_next('=') {
|
||||
Token::BangEqual
|
||||
} else {
|
||||
Token::Bang
|
||||
}
|
||||
}
|
||||
Some('=') => {
|
||||
if self.match_next('=') {
|
||||
Token::EqualEqual
|
||||
} else {
|
||||
Token::Assign
|
||||
}
|
||||
}
|
||||
Some('<') => {
|
||||
if self.match_next('=') {
|
||||
Token::LessEqual
|
||||
} else {
|
||||
Token::Less
|
||||
}
|
||||
}
|
||||
Some('>') => {
|
||||
if self.match_next('=') {
|
||||
Token::GreaterEqual
|
||||
} else {
|
||||
Token::Greater
|
||||
}
|
||||
}
|
||||
Some('"') => {
|
||||
self.advance(); // Skip the opening quote
|
||||
let mut s = String::new();
|
||||
while let Some(c) = self.current {
|
||||
if c == '"' {
|
||||
break;
|
||||
}
|
||||
s.push(c);
|
||||
self.advance();
|
||||
}
|
||||
Token::String(s)
|
||||
}
|
||||
Some(c) => {
|
||||
if c.is_alphabetic() || c == '_' {
|
||||
let mut ident = c.to_string();
|
||||
ident.push_str(&self.read_identifier());
|
||||
match ident.as_str() {
|
||||
"fn" => Token::Fn,
|
||||
"if" => Token::If,
|
||||
@@ -257,22 +253,288 @@ impl<'a> Lexer<'a> {
|
||||
"static" => Token::Static,
|
||||
_ => Token::Identifier(ident),
|
||||
}
|
||||
} else if c.is_ascii_digit() {
|
||||
Token::Integer(self.read_number() as u32)
|
||||
} else {
|
||||
// Skip unknown characters for now
|
||||
}
|
||||
|
||||
fn read_number(&mut self) -> Result<u64, String> {
|
||||
let current = self.current.unwrap();
|
||||
|
||||
// Check for hex (0x) or binary (0b) prefix
|
||||
if current == '0' {
|
||||
if let Some(&next_char) = self.peek() {
|
||||
match next_char {
|
||||
'x' | 'X' => {
|
||||
self.advance(); // consume '0'
|
||||
self.advance(); // consume 'x'
|
||||
return self.read_hex_number();
|
||||
}
|
||||
'b' | 'B' => {
|
||||
self.advance(); // consume '0'
|
||||
self.advance(); // consume 'b'
|
||||
return self.read_binary_number();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read decimal number
|
||||
self.read_decimal_number()
|
||||
}
|
||||
|
||||
fn read_decimal_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
if let Some(c) = self.current {
|
||||
num_str.push(c);
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_ascii_digit() {
|
||||
self.advance();
|
||||
return self.next_token();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
None => Token::Eof,
|
||||
|
||||
num_str
|
||||
.parse::<u64>()
|
||||
.map_err(|_| format!("Invalid decimal number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_hex_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
// Read current character if it's a hex digit
|
||||
if let Some(c) = self.current {
|
||||
if c.is_ascii_hexdigit() {
|
||||
num_str.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_ascii_hexdigit() {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if num_str.is_empty() {
|
||||
return Err("Invalid hexadecimal number: no digits after 0x".to_string());
|
||||
}
|
||||
|
||||
u64::from_str_radix(&num_str, 16)
|
||||
.map_err(|_| format!("Invalid hexadecimal number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_binary_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
// Read current character if it's a binary digit
|
||||
if let Some(c) = self.current {
|
||||
if c == '0' || c == '1' {
|
||||
num_str.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c == '0' || c == '1' {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if num_str.is_empty() {
|
||||
return Err("Invalid binary number: no digits after 0b".to_string());
|
||||
}
|
||||
|
||||
u64::from_str_radix(&num_str, 2)
|
||||
.map_err(|_| format!("Invalid binary number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_string(&mut self) -> Result<String, String> {
|
||||
self.advance(); // Skip the opening quote
|
||||
let mut s = String::new();
|
||||
|
||||
while let Some(c) = self.current {
|
||||
if c == '"' {
|
||||
return Ok(s);
|
||||
}
|
||||
|
||||
// Handle escape sequences
|
||||
if c == '\\' {
|
||||
self.advance();
|
||||
if let Some(escaped) = self.current {
|
||||
let escaped_char = match escaped {
|
||||
'n' => '\n',
|
||||
't' => '\t',
|
||||
'r' => '\r',
|
||||
'\\' => '\\',
|
||||
'"' => '"',
|
||||
_ => escaped, // For now, just use the character as-is
|
||||
};
|
||||
s.push(escaped_char);
|
||||
} else {
|
||||
return Err("Unexpected end of string after escape".to_string());
|
||||
}
|
||||
} else {
|
||||
s.push(c);
|
||||
}
|
||||
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Err("Unterminated string literal".to_string())
|
||||
}
|
||||
|
||||
fn match_next(&mut self, expected: char) -> bool {
|
||||
match self.peek() {
|
||||
Some(&c) if c == expected => {
|
||||
self.advance();
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_single_char_token(&mut self, c: char) -> Option<Token> {
|
||||
match c {
|
||||
'(' => Some(Token::LeftParen),
|
||||
')' => Some(Token::RightParen),
|
||||
'{' => Some(Token::LeftBrace),
|
||||
'}' => Some(Token::RightBrace),
|
||||
';' => Some(Token::Semicolon),
|
||||
':' => Some(Token::Colon),
|
||||
',' => Some(Token::Comma),
|
||||
'&' => Some(Token::Amphersand),
|
||||
'+' => Some(Token::Plus),
|
||||
'*' => Some(Token::Star),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_operator(&mut self, c: char) -> Option<Token> {
|
||||
match c {
|
||||
'-' => Some(if self.match_next('>') {
|
||||
Token::RightArrow
|
||||
} else {
|
||||
Token::Minus
|
||||
}),
|
||||
'!' => Some(if self.match_next('=') {
|
||||
Token::BangEqual
|
||||
} else {
|
||||
Token::Bang
|
||||
}),
|
||||
'=' => Some(if self.match_next('=') {
|
||||
Token::EqualEqual
|
||||
} else {
|
||||
Token::Assign
|
||||
}),
|
||||
'<' => Some(if self.match_next('=') {
|
||||
Token::LessEqual
|
||||
} else {
|
||||
Token::Less
|
||||
}),
|
||||
'>' => Some(if self.match_next('=') {
|
||||
Token::GreaterEqual
|
||||
} else {
|
||||
Token::Greater
|
||||
}),
|
||||
'/' => {
|
||||
// Check if it's a comment or division
|
||||
if let Some(&next) = self.peek() {
|
||||
if next == '/' || next == '*' {
|
||||
// It's a comment, don't consume it here
|
||||
// Let skip_whitespace_and_comments handle it
|
||||
None
|
||||
} else {
|
||||
Some(Token::Slash)
|
||||
}
|
||||
} else {
|
||||
Some(Token::Slash)
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> Token {
|
||||
self.skip_whitespace_and_comments();
|
||||
|
||||
let Some(c) = self.current else {
|
||||
return Token::Eof;
|
||||
};
|
||||
|
||||
if token != Token::Eof {
|
||||
// Try single-character tokens first
|
||||
if let Some(token) = self.scan_single_char_token(c) {
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
token
|
||||
// Try operators (may be multi-character)
|
||||
if let Some(token) = self.scan_operator(c) {
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// String literals
|
||||
if c == '"' {
|
||||
let token = match self.read_string() {
|
||||
Ok(s) => Token::String(s),
|
||||
Err(e) => {
|
||||
eprintln!("Lexer error on line {}: {}", self.line, e);
|
||||
// Skip to next quote or end
|
||||
while let Some(ch) = self.current {
|
||||
if ch == '"' || ch == '\n' {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
Token::String(String::new())
|
||||
}
|
||||
};
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Identifiers and keywords
|
||||
if c.is_alphabetic() || c == '_' {
|
||||
let token = self.keyword_or_identifier();
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Numbers (decimal, hex, binary)
|
||||
if c.is_ascii_digit() {
|
||||
let token = match self.read_number() {
|
||||
Ok(num) => Token::Integer(num),
|
||||
Err(e) => {
|
||||
eprintln!("Lexer error on line {}: {}", self.line, e);
|
||||
// Skip invalid number
|
||||
while let Some(&ch) = self.peek() {
|
||||
if !ch.is_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
Token::Integer(0)
|
||||
}
|
||||
};
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Unknown character - skip it
|
||||
eprintln!(
|
||||
"Lexer warning on line {}: Skipping unknown character '{}'",
|
||||
self.line, c
|
||||
);
|
||||
self.advance();
|
||||
self.next_token()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -318,6 +580,41 @@ mod tests {
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hex_numbers() {
|
||||
let input = "0xFF 0x10 0xDEADBEEF 0x0";
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(lexer.next_token(), Token::Integer(0xFF));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(0x10));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(0xDEADBEEF));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(0x0));
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_numbers() {
|
||||
let input = "0b1010 0b0 0b11111111 0b1";
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(lexer.next_token(), Token::Integer(0b1010));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(0b0));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(0b11111111));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(0b1));
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mixed_number_formats() {
|
||||
let input = "42 0xFF 0b1010";
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(lexer.next_token(), Token::Integer(42));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(255));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(10));
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_operators() {
|
||||
let input = "= == ! != < <= > >=";
|
||||
@@ -334,6 +631,19 @@ mod tests {
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_with_escapes() {
|
||||
let input = r#""hello\nworld" "tab\there""#;
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(
|
||||
lexer.next_token(),
|
||||
Token::String("hello\nworld".to_string())
|
||||
);
|
||||
assert_eq!(lexer.next_token(), Token::String("tab\there".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_example_syntax() {
|
||||
let input = r#"
|
||||
@@ -349,25 +659,108 @@ mod tests {
|
||||
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
// Skip whitespace and newlines
|
||||
while let Some(c) = lexer.current {
|
||||
if !c.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
lexer.advance();
|
||||
}
|
||||
|
||||
// Test the first few tokens
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("main".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Colon);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("Func".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Assign);
|
||||
// assert_eq!(lexer.next_token(), Token::Pipe);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("x".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Colon);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("U32".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Comma);
|
||||
}
|
||||
|
||||
// The rest of the tokens would be tested similarly
|
||||
#[test]
|
||||
fn test_line_comments() {
|
||||
let input = r#"
|
||||
let x = 5; // this is a comment
|
||||
// this is another comment
|
||||
let y = 10;
|
||||
"#;
|
||||
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(lexer.next_token(), Token::Let);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("x".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Assign);
|
||||
assert_eq!(lexer.next_token(), Token::Integer(5));
|
||||
assert_eq!(lexer.next_token(), Token::Semicolon);
|
||||
// Comment should be skipped
|
||||
assert_eq!(lexer.next_token(), Token::Let);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("y".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Assign);
|
||||
assert_eq!(lexer.next_token(), Token::Integer(10));
|
||||
assert_eq!(lexer.next_token(), Token::Semicolon);
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_comments() {
|
||||
let input = r#"
|
||||
let x = 5; /* this is a
|
||||
multiline block comment */
|
||||
let y = 10;
|
||||
"#;
|
||||
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(lexer.next_token(), Token::Let);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("x".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Assign);
|
||||
assert_eq!(lexer.next_token(), Token::Integer(5));
|
||||
assert_eq!(lexer.next_token(), Token::Semicolon);
|
||||
// Block comment should be skipped
|
||||
assert_eq!(lexer.next_token(), Token::Let);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("y".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Assign);
|
||||
assert_eq!(lexer.next_token(), Token::Integer(10));
|
||||
assert_eq!(lexer.next_token(), Token::Semicolon);
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_division_operator() {
|
||||
let input = "x / y";
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("x".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Slash);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("y".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mixed_comments_and_operators() {
|
||||
let input = r#"
|
||||
x / y // division
|
||||
/* block comment */ z = 10
|
||||
a /= b // this won't work yet
|
||||
"#;
|
||||
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("x".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Slash);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("y".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("z".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Assign);
|
||||
assert_eq!(lexer.next_token(), Token::Integer(10));
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("a".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Slash);
|
||||
assert_eq!(lexer.next_token(), Token::Assign);
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("b".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Eof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nested_block_comment_attempt() {
|
||||
// Note: This lexer doesn't support nested block comments
|
||||
let input = "/* outer /* inner */ still in comment? */ x";
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
// The comment ends at the first */
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("still".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("in".to_string()));
|
||||
assert_eq!(lexer.next_token(), Token::Identifier("comment".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -247,6 +247,45 @@ impl Parser {
|
||||
return ParseResult::Accept(Statement::Continue);
|
||||
}
|
||||
|
||||
// handle writes to pointers!
|
||||
if expect_tt!(self.peek_next()?, Star).accepted() {
|
||||
self.next()?;
|
||||
|
||||
let left = if expect_tt!(self.peek_next()?, Identifier).accepted() {
|
||||
let identifier = self.parse_identifier()?;
|
||||
|
||||
Expression::Variable {
|
||||
name: identifier,
|
||||
expr_type: None,
|
||||
}
|
||||
} else if expect_tt!(self.peek_next()?, LeftParen).accepted() {
|
||||
self.next()?;
|
||||
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightParen).accepted();
|
||||
|
||||
expr
|
||||
} else {
|
||||
return ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||
self.peek_next()?,
|
||||
));
|
||||
};
|
||||
|
||||
let _ = expect_tt!(self.next()?, Assign)?;
|
||||
|
||||
let right = self.parse_expression()?;
|
||||
|
||||
// expect semicolon
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::PtrWrite {
|
||||
ptr: left,
|
||||
value: right,
|
||||
});
|
||||
}
|
||||
|
||||
// handle let statements (declarations)
|
||||
if expect_tt!(self.peek_next()?, Let).accepted() {
|
||||
self.next();
|
||||
@@ -573,6 +612,10 @@ pub enum Statement {
|
||||
varname: String,
|
||||
value: Expression,
|
||||
},
|
||||
PtrWrite {
|
||||
ptr: Expression,
|
||||
value: Expression,
|
||||
},
|
||||
Expression {
|
||||
expr: Expression,
|
||||
},
|
||||
|
||||
+107
-5
@@ -1,8 +1,110 @@
|
||||
fn main() -> u32 {
|
||||
let x: u32 = 5;
|
||||
let stringgg: str = "Hello world";
|
||||
let test: str = "test";
|
||||
|
||||
println("hello world 2 electric boogaloo");
|
||||
printnum(213);
|
||||
let x: u32 = 0;
|
||||
let y: u32 = &x;
|
||||
|
||||
let alloc: u32 = arena_create(512);
|
||||
let ptr1: u32 = arena_alloc(alloc, 32);
|
||||
let ptr2: u32 = arena_alloc(alloc, 32);
|
||||
|
||||
print_hex(alloc);
|
||||
print_newline();
|
||||
print_hex(ptr1);
|
||||
print_newline();
|
||||
print_hex(ptr2);
|
||||
print_newline();
|
||||
printnum(*ptr2);
|
||||
print_newline();
|
||||
*ptr2 = 42;
|
||||
|
||||
print_hex(ptr2);
|
||||
print_newline();
|
||||
printnum(*ptr2);
|
||||
print_newline();
|
||||
println("end");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Arena Allocator
|
||||
// Supports multiple arenas that can be destroyed independently
|
||||
// Much more practical than a simple bump allocator
|
||||
|
||||
// Global heap management
|
||||
static heap_start: u32 = 0x30000;
|
||||
static heap_end: u32 = 0x40000;
|
||||
static heap_current: u32 = 0x30000;
|
||||
|
||||
// Arena structure (stored at the start of each arena):
|
||||
// [0-3]: start_address (u32)
|
||||
// [4-7]: current_position (u32)
|
||||
// [8-11]: end_address (u32)
|
||||
// Total header size: 12 bytes
|
||||
|
||||
// Create a new arena with given size
|
||||
// Returns pointer to arena handle (or 0 if failed)
|
||||
fn arena_create(size: u32) -> u32 {
|
||||
let total_size: u32 = size + 12;
|
||||
let arena_ptr: u32 = heap_current;
|
||||
let new_current: u32 = arena_ptr + total_size;
|
||||
|
||||
// Check if we have space
|
||||
if new_current > heap_end {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Calculate arena data region
|
||||
let data_start: u32 = arena_ptr + 12;
|
||||
let data_end: u32 = arena_ptr + total_size;
|
||||
|
||||
// Initialize arena header
|
||||
// Note: In real implementation, you'd use pointer writes here
|
||||
// For now, using placeholder comments:
|
||||
*arena_ptr = data_start; // start_address
|
||||
*(arena_ptr + 4) = data_start; // current_position
|
||||
*(arena_ptr + 8) = data_end; // end_address
|
||||
|
||||
heap_current = new_current;
|
||||
|
||||
return arena_ptr;
|
||||
}
|
||||
|
||||
// Allocate from an arena
|
||||
// Returns pointer to allocated memory (or 0 if failed)
|
||||
fn arena_alloc(arena: u32, size: u32) -> u32 {
|
||||
// Read current position from arena
|
||||
let current: u32 = *(arena + 4);
|
||||
let end: u32 = *(arena + 8);
|
||||
|
||||
let new_current: u32 = current + size;
|
||||
|
||||
// Check if arena has space
|
||||
if new_current > end {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Update current position in arena
|
||||
*(arena + 4) = new_current;
|
||||
|
||||
return current;
|
||||
}
|
||||
|
||||
// Destroy an arena (in bump allocator, this is a no-op)
|
||||
// In a real allocator, you'd mark the memory as free
|
||||
fn arena_destroy(arena: u32) {
|
||||
// In a true allocator, mark memory as reusable
|
||||
// For bump allocator, we can't reclaim memory
|
||||
// unless we destroy ALL arenas and reset
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Reset entire heap (destroys ALL arenas)
|
||||
fn reset_all() {
|
||||
heap_current = heap_start;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -236,19 +236,19 @@
|
||||
**Dependencies:** None
|
||||
**Deliverable:** `docs/language-spec.md`
|
||||
|
||||
- [ ] Define syntax goals (simplicity, systems programming)
|
||||
- [x] Define syntax goals (simplicity, systems programming)
|
||||
- [ ] Design type system
|
||||
- [ ] Primitive types
|
||||
- [ ] Pointers/references
|
||||
- [x] Primitive types
|
||||
- [x] Pointers/references
|
||||
- [ ] Structs
|
||||
- [ ] Arrays
|
||||
- [ ] Function types
|
||||
- [ ] Control flow syntax
|
||||
- [ ] Function declaration syntax
|
||||
- [ ] Module/import system
|
||||
- [ ] Operator precedence
|
||||
- [x] Function types
|
||||
- [x] Control flow syntax
|
||||
- [x] Function declaration syntax
|
||||
- [x] Module/import system
|
||||
- [x] Operator precedence
|
||||
- [ ] Write EBNF grammar
|
||||
- [ ] Create example programs
|
||||
- [x] Create example programs
|
||||
|
||||
---
|
||||
|
||||
@@ -258,9 +258,13 @@
|
||||
**Dependencies:** 2.1.1
|
||||
**Deliverable:** Parser in `dsc-compiler` crate
|
||||
|
||||
- [ ] Adapt existing C lexer to new syntax
|
||||
- [x] Adapt existing C lexer to new syntax
|
||||
- [ ] Implement new parser for designed syntax
|
||||
- [ ] AST node definitions
|
||||
- [ ] Array syntax
|
||||
- [ ] Struct syntax
|
||||
- [x] Pointer syntax
|
||||
- [ ] Namespaced call syntax
|
||||
- [x] AST node definitions
|
||||
- [ ] Error recovery mechanisms
|
||||
- [ ] Comprehensive parser tests
|
||||
- [ ] Syntax error message quality testing
|
||||
@@ -273,16 +277,16 @@
|
||||
**Dependencies:** 2.1.2, 1.2.2
|
||||
**Deliverable:** Working code generator
|
||||
|
||||
- [ ] Review and fix existing codegen issues
|
||||
- [x] Review and fix existing codegen issues
|
||||
- [ ] Implement missing language features
|
||||
- [ ] Structs
|
||||
- [ ] Arrays
|
||||
- [ ] Pointers/memory operations
|
||||
- [x] Pointers/memory operations
|
||||
- [ ] For loops
|
||||
- [ ] Switch statements
|
||||
- [ ] Break/continue
|
||||
- [ ] Optimize register allocation further
|
||||
- [ ] Implement proper function calling conventions
|
||||
- [x] Implement proper function calling conventions
|
||||
- [ ] Add constant folding optimization
|
||||
- [ ] Dead code elimination
|
||||
- [ ] Test each feature thoroughly
|
||||
@@ -321,7 +325,17 @@
|
||||
- [ ] Memory allocation (malloc/free)
|
||||
- [ ] String operations
|
||||
- [ ] Math functions
|
||||
- [x] Multiply
|
||||
- [ ] Divide (fix as very slow and broken)
|
||||
- [ ] I/O functions (improved print, read)
|
||||
- [x] Print number
|
||||
- [x] Print hex value
|
||||
- [x] Print word
|
||||
- [x] Print byte
|
||||
- [x] Print from string ptr
|
||||
- [x] Print whitespace and newline
|
||||
- [x] Reset display
|
||||
- [x] Reset cursor
|
||||
- [ ] System call interface
|
||||
- [ ] Tests for each function
|
||||
|
||||
@@ -786,13 +800,13 @@
|
||||
## Summary Timeline
|
||||
|
||||
| Phase | Duration | Key Dependencies |
|
||||
|---|---|---|
|
||||
| ----------------------------- | --------- | ------------------- |
|
||||
| Phase 1: Foundation | 3–4 weeks | None |
|
||||
| Phase 2: Compiler | 3–4 weeks | Phase 1 complete |
|
||||
| Phase 3: Build System | 2–3 weeks | Phases 1–2 complete |
|
||||
| Phase 4: Debugger | 3–4 weeks | Phases 1–3 complete |
|
||||
| Phase 5: Integration | 1–2 weeks | Phases 1–4 complete |
|
||||
| Phase 6: CLI Emulator *(NTH)* | 4+ weeks | Phase 4 complete |
|
||||
| Phase 6: CLI Emulator _(NTH)_ | 4+ weeks | Phase 4 complete |
|
||||
|
||||
**Total Estimated Time: 12–17 weeks (3–4 months) for Phases 1–5**
|
||||
|
||||
@@ -818,13 +832,13 @@ The following tasks are on the critical path and will block other work if delaye
|
||||
## Recommended Work Order
|
||||
|
||||
| Weeks | Focus | Tasks |
|
||||
|---|---|---|
|
||||
| ----- | ------------------------------------- | ------------------------------------------------- |
|
||||
| 1–2 | Binary Format & Linker | 1.1.1 → 1.1.2 → 1.1.3 |
|
||||
| 3–4 | Assembler Rewrite | 1.2.1 → 1.2.2 |
|
||||
| 5–6 | Compiler Syntax & Parser | 2.1.1 → 2.1.2 *(start 1.3 docs in parallel)* |
|
||||
| 7–9 | Compiler Codegen & Types | 2.1.3 → 2.1.4 *(start 2.2.1 runtime in parallel)* |
|
||||
| 5–6 | Compiler Syntax & Parser | 2.1.1 → 2.1.2 _(start 1.3 docs in parallel)_ |
|
||||
| 7–9 | Compiler Codegen & Types | 2.1.3 → 2.1.4 _(start 2.2.1 runtime in parallel)_ |
|
||||
| 10–11 | Build System | 3.1.1 → 3.1.2 → 3.1.3 |
|
||||
| 12–13 | Package Management *(if desired now)* | 3.2.1 → 3.2.2 → 3.2.3 |
|
||||
| 12–13 | Package Management _(if desired now)_ | 3.2.1 → 3.2.2 → 3.2.3 |
|
||||
| 14–15 | Debug Symbols | 4.1.1 → 4.1.2 → 4.1.3 |
|
||||
| 16–18 | Core Debugger | 4.2.1 → 4.2.2 → 4.2.4 |
|
||||
| 19–20 | Editor Enhancements | 4.3.1 → 4.3.2 → 4.3.3 → 4.3.4 |
|
||||
|
||||
Reference in New Issue
Block a user