continued on register allocator rewrite, slow progress as scoping is

proving to be a challenge
This commit is contained in:
2026-02-14 02:46:29 +00:00
parent d66baf6f99
commit 201b18069b
10 changed files with 1153 additions and 790 deletions
+168 -22
View File
@@ -23,8 +23,9 @@ pub enum Token {
// Identifiers and literals
Identifier(Name),
String(String),
Integer(u64),
Char(char),
SignedInt(i32, Option<TypeId>),
UnsignedInt(u32, Option<TypeId>),
// Delimiters
LeftParen, // (
@@ -86,7 +87,7 @@ pub enum Token {
Eof,
}
use crate::model::Name;
use crate::model::{Name, TypeId};
use std::fmt;
impl fmt::Display for Name {
@@ -118,7 +119,8 @@ impl Token {
Token::As => "As",
Token::Identifier(_) => "Identifier",
Token::String(_) => "String",
Token::Integer(_) => "UnsignedInt",
Token::UnsignedInt(_, _) => "UnsignedInt",
Token::SignedInt(_, _) => "SignedInt",
Token::Char(_) => "Char",
Token::LeftParen => "LeftParen",
Token::RightParen => "RightParen",
@@ -388,8 +390,126 @@ impl<'a> Lexer<'a> {
// ========================================================================
fn scan_number(&mut self) -> Token {
// Check if number is negative
let is_negative = self.current == Some('-');
if is_negative {
self.advance(); // consume '-'
}
match self.read_number() {
Ok(num) => Token::Integer(num),
Ok((value, type_suffix)) => {
// Validate and construct appropriate token
if let Some(type_id) = type_suffix {
match type_id {
TypeId::I8 => {
let signed_val = if is_negative {
-(value as i32)
} else {
value as i32
};
if signed_val < i8::MIN as i32 || signed_val > i8::MAX as i32
{
self.error(&format!(
"Value {} out of range for i8",
signed_val
));
return Token::SignedInt(0, Some(TypeId::I8));
}
Token::SignedInt(signed_val, Some(TypeId::I8))
}
TypeId::I16 => {
let signed_val = if is_negative {
-(value as i32)
} else {
value as i32
};
if signed_val < i16::MIN as i32
|| signed_val > i16::MAX as i32
{
self.error(&format!(
"Value {} out of range for i16",
signed_val
));
return Token::SignedInt(0, Some(TypeId::I16));
}
Token::SignedInt(signed_val, Some(TypeId::I16))
}
TypeId::I32 => {
let signed_val = if is_negative {
if value > i32::MAX as u64 + 1 {
self.error(&format!(
"Value -{} out of range for i32",
value
));
return Token::SignedInt(0, Some(TypeId::I32));
}
-(value as i32)
} else {
if value > i32::MAX as u64 {
self.error(&format!(
"Value {} out of range for i32",
value
));
return Token::SignedInt(0, Some(TypeId::I32));
}
value as i32
};
Token::SignedInt(signed_val, Some(TypeId::I32))
}
TypeId::U8 => {
if is_negative {
self.error("Unsigned type u8 cannot be negative");
return Token::UnsignedInt(0, Some(TypeId::U8));
}
if value > u8::MAX as u64 {
self.error(&format!(
"Value {} out of range for u8",
value
));
return Token::UnsignedInt(0, Some(TypeId::U8));
}
Token::UnsignedInt(value as u32, Some(TypeId::U8))
}
TypeId::U16 => {
if is_negative {
self.error("Unsigned type u16 cannot be negative");
return Token::UnsignedInt(0, Some(TypeId::U16));
}
if value > u16::MAX as u64 {
self.error(&format!(
"Value {} out of range for u16",
value
));
return Token::UnsignedInt(0, Some(TypeId::U16));
}
Token::UnsignedInt(value as u32, Some(TypeId::U16))
}
TypeId::U32 => {
if is_negative {
self.error("Unsigned type u32 cannot be negative");
return Token::UnsignedInt(0, Some(TypeId::U32));
}
if value > u32::MAX as u64 {
self.error(&format!(
"Value {} out of range for u32",
value
));
return Token::UnsignedInt(0, Some(TypeId::U32));
}
Token::UnsignedInt(value as u32, Some(TypeId::U32))
}
_ => unreachable!(),
}
} else {
// No type suffix - decide based on sign
if is_negative {
let signed_val = -(value as i32);
Token::SignedInt(signed_val, None)
} else {
Token::UnsignedInt(value as u32, None)
}
}
}
Err(e) => {
self.error(&e);
// Skip the invalid number
@@ -399,31 +519,66 @@ impl<'a> Lexer<'a> {
}
self.advance();
}
Token::Integer(0)
Token::SignedInt(0, None)
}
}
}
fn read_number(&mut self) -> Result<u64, String> {
fn read_number(&mut self) -> Result<(u64, Option<TypeId>), String> {
// Check for hex (0x) or binary (0b) prefix
if self.current == Some('0') {
match self.peek() {
Some('x') | Some('X') => {
self.advance(); // consume '0'
self.advance(); // consume 'x'
return self.read_hex_number();
let value = self.read_hex_number()?;
let type_suffix = self.read_type_suffix()?;
return Ok((value, type_suffix));
}
Some('b') | Some('B') => {
self.advance(); // consume '0'
self.advance(); // consume 'b'
return self.read_binary_number();
let value = self.read_binary_number()?;
let type_suffix = self.read_type_suffix()?;
return Ok((value, type_suffix));
}
_ => {}
}
}
// Read decimal number
self.read_decimal_number()
let value = self.read_decimal_number()?;
let type_suffix = self.read_type_suffix()?;
Ok((value, type_suffix))
}
fn read_type_suffix(&mut self) -> Result<Option<TypeId>, String> {
// Check for type suffix like _i32, _u8, etc.
if self.peek() == Some('_') {
self.advance(); // consume '_'
let mut suffix = String::new();
while let Some(c) = self.peek() {
if c.is_ascii_alphanumeric() {
self.advance();
suffix.push(c);
} else {
break;
}
}
match suffix.as_str() {
"i8" => Ok(Some(TypeId::I8)),
"i16" => Ok(Some(TypeId::I16)),
"i32" => Ok(Some(TypeId::I32)),
"u8" => Ok(Some(TypeId::U8)),
"u16" => Ok(Some(TypeId::U16)),
"u32" => Ok(Some(TypeId::U32)),
_ => Err(format!("Invalid type suffix: {}", suffix)),
}
} else {
Ok(None)
}
}
fn read_decimal_number(&mut self) -> Result<u64, String> {
@@ -437,8 +592,10 @@ impl<'a> Lexer<'a> {
if c.is_ascii_digit() {
self.advance();
num_str.push(c);
} else if c == '_' {
// Allow underscores as separators (like Rust)
} else if c == '_'
&& self.peek_second().map_or(false, |ch| ch.is_ascii_digit())
{
// Allow underscores as separators only between digits
self.advance();
} else {
break;
@@ -883,17 +1040,6 @@ mod tests {
}
}
#[test]
fn test_numbers() {
let input = "42 0x2A 0b101010 123_456";
let mut lexer = Lexer::new(input);
assert_eq!(lexer.next_token(), Token::Integer(42));
assert_eq!(lexer.next_token(), Token::Integer(42));
assert_eq!(lexer.next_token(), Token::Integer(42));
assert_eq!(lexer.next_token(), Token::Integer(123456));
}
#[test]
fn test_namespaced_identifier() {
let input = "print::println std::io::read";