continued on register allocator rewrite, slow progress as scoping is
proving to be a challenge
This commit is contained in:
@@ -23,8 +23,9 @@ pub enum Token {
|
||||
// Identifiers and literals
|
||||
Identifier(Name),
|
||||
String(String),
|
||||
Integer(u64),
|
||||
Char(char),
|
||||
SignedInt(i32, Option<TypeId>),
|
||||
UnsignedInt(u32, Option<TypeId>),
|
||||
|
||||
// Delimiters
|
||||
LeftParen, // (
|
||||
@@ -86,7 +87,7 @@ pub enum Token {
|
||||
Eof,
|
||||
}
|
||||
|
||||
use crate::model::Name;
|
||||
use crate::model::{Name, TypeId};
|
||||
use std::fmt;
|
||||
|
||||
impl fmt::Display for Name {
|
||||
@@ -118,7 +119,8 @@ impl Token {
|
||||
Token::As => "As",
|
||||
Token::Identifier(_) => "Identifier",
|
||||
Token::String(_) => "String",
|
||||
Token::Integer(_) => "UnsignedInt",
|
||||
Token::UnsignedInt(_, _) => "UnsignedInt",
|
||||
Token::SignedInt(_, _) => "SignedInt",
|
||||
Token::Char(_) => "Char",
|
||||
Token::LeftParen => "LeftParen",
|
||||
Token::RightParen => "RightParen",
|
||||
@@ -388,8 +390,126 @@ impl<'a> Lexer<'a> {
|
||||
// ========================================================================
|
||||
|
||||
fn scan_number(&mut self) -> Token {
|
||||
// Check if number is negative
|
||||
let is_negative = self.current == Some('-');
|
||||
if is_negative {
|
||||
self.advance(); // consume '-'
|
||||
}
|
||||
|
||||
match self.read_number() {
|
||||
Ok(num) => Token::Integer(num),
|
||||
Ok((value, type_suffix)) => {
|
||||
// Validate and construct appropriate token
|
||||
if let Some(type_id) = type_suffix {
|
||||
match type_id {
|
||||
TypeId::I8 => {
|
||||
let signed_val = if is_negative {
|
||||
-(value as i32)
|
||||
} else {
|
||||
value as i32
|
||||
};
|
||||
if signed_val < i8::MIN as i32 || signed_val > i8::MAX as i32
|
||||
{
|
||||
self.error(&format!(
|
||||
"Value {} out of range for i8",
|
||||
signed_val
|
||||
));
|
||||
return Token::SignedInt(0, Some(TypeId::I8));
|
||||
}
|
||||
Token::SignedInt(signed_val, Some(TypeId::I8))
|
||||
}
|
||||
TypeId::I16 => {
|
||||
let signed_val = if is_negative {
|
||||
-(value as i32)
|
||||
} else {
|
||||
value as i32
|
||||
};
|
||||
if signed_val < i16::MIN as i32
|
||||
|| signed_val > i16::MAX as i32
|
||||
{
|
||||
self.error(&format!(
|
||||
"Value {} out of range for i16",
|
||||
signed_val
|
||||
));
|
||||
return Token::SignedInt(0, Some(TypeId::I16));
|
||||
}
|
||||
Token::SignedInt(signed_val, Some(TypeId::I16))
|
||||
}
|
||||
TypeId::I32 => {
|
||||
let signed_val = if is_negative {
|
||||
if value > i32::MAX as u64 + 1 {
|
||||
self.error(&format!(
|
||||
"Value -{} out of range for i32",
|
||||
value
|
||||
));
|
||||
return Token::SignedInt(0, Some(TypeId::I32));
|
||||
}
|
||||
-(value as i32)
|
||||
} else {
|
||||
if value > i32::MAX as u64 {
|
||||
self.error(&format!(
|
||||
"Value {} out of range for i32",
|
||||
value
|
||||
));
|
||||
return Token::SignedInt(0, Some(TypeId::I32));
|
||||
}
|
||||
value as i32
|
||||
};
|
||||
Token::SignedInt(signed_val, Some(TypeId::I32))
|
||||
}
|
||||
TypeId::U8 => {
|
||||
if is_negative {
|
||||
self.error("Unsigned type u8 cannot be negative");
|
||||
return Token::UnsignedInt(0, Some(TypeId::U8));
|
||||
}
|
||||
if value > u8::MAX as u64 {
|
||||
self.error(&format!(
|
||||
"Value {} out of range for u8",
|
||||
value
|
||||
));
|
||||
return Token::UnsignedInt(0, Some(TypeId::U8));
|
||||
}
|
||||
Token::UnsignedInt(value as u32, Some(TypeId::U8))
|
||||
}
|
||||
TypeId::U16 => {
|
||||
if is_negative {
|
||||
self.error("Unsigned type u16 cannot be negative");
|
||||
return Token::UnsignedInt(0, Some(TypeId::U16));
|
||||
}
|
||||
if value > u16::MAX as u64 {
|
||||
self.error(&format!(
|
||||
"Value {} out of range for u16",
|
||||
value
|
||||
));
|
||||
return Token::UnsignedInt(0, Some(TypeId::U16));
|
||||
}
|
||||
Token::UnsignedInt(value as u32, Some(TypeId::U16))
|
||||
}
|
||||
TypeId::U32 => {
|
||||
if is_negative {
|
||||
self.error("Unsigned type u32 cannot be negative");
|
||||
return Token::UnsignedInt(0, Some(TypeId::U32));
|
||||
}
|
||||
if value > u32::MAX as u64 {
|
||||
self.error(&format!(
|
||||
"Value {} out of range for u32",
|
||||
value
|
||||
));
|
||||
return Token::UnsignedInt(0, Some(TypeId::U32));
|
||||
}
|
||||
Token::UnsignedInt(value as u32, Some(TypeId::U32))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
} else {
|
||||
// No type suffix - decide based on sign
|
||||
if is_negative {
|
||||
let signed_val = -(value as i32);
|
||||
Token::SignedInt(signed_val, None)
|
||||
} else {
|
||||
Token::UnsignedInt(value as u32, None)
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
self.error(&e);
|
||||
// Skip the invalid number
|
||||
@@ -399,31 +519,66 @@ impl<'a> Lexer<'a> {
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
Token::Integer(0)
|
||||
Token::SignedInt(0, None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_number(&mut self) -> Result<u64, String> {
|
||||
fn read_number(&mut self) -> Result<(u64, Option<TypeId>), String> {
|
||||
// Check for hex (0x) or binary (0b) prefix
|
||||
if self.current == Some('0') {
|
||||
match self.peek() {
|
||||
Some('x') | Some('X') => {
|
||||
self.advance(); // consume '0'
|
||||
self.advance(); // consume 'x'
|
||||
return self.read_hex_number();
|
||||
let value = self.read_hex_number()?;
|
||||
let type_suffix = self.read_type_suffix()?;
|
||||
return Ok((value, type_suffix));
|
||||
}
|
||||
Some('b') | Some('B') => {
|
||||
self.advance(); // consume '0'
|
||||
self.advance(); // consume 'b'
|
||||
return self.read_binary_number();
|
||||
let value = self.read_binary_number()?;
|
||||
let type_suffix = self.read_type_suffix()?;
|
||||
return Ok((value, type_suffix));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Read decimal number
|
||||
self.read_decimal_number()
|
||||
let value = self.read_decimal_number()?;
|
||||
let type_suffix = self.read_type_suffix()?;
|
||||
Ok((value, type_suffix))
|
||||
}
|
||||
|
||||
fn read_type_suffix(&mut self) -> Result<Option<TypeId>, String> {
|
||||
// Check for type suffix like _i32, _u8, etc.
|
||||
if self.peek() == Some('_') {
|
||||
self.advance(); // consume '_'
|
||||
|
||||
let mut suffix = String::new();
|
||||
while let Some(c) = self.peek() {
|
||||
if c.is_ascii_alphanumeric() {
|
||||
self.advance();
|
||||
suffix.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
match suffix.as_str() {
|
||||
"i8" => Ok(Some(TypeId::I8)),
|
||||
"i16" => Ok(Some(TypeId::I16)),
|
||||
"i32" => Ok(Some(TypeId::I32)),
|
||||
"u8" => Ok(Some(TypeId::U8)),
|
||||
"u16" => Ok(Some(TypeId::U16)),
|
||||
"u32" => Ok(Some(TypeId::U32)),
|
||||
_ => Err(format!("Invalid type suffix: {}", suffix)),
|
||||
}
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_decimal_number(&mut self) -> Result<u64, String> {
|
||||
@@ -437,8 +592,10 @@ impl<'a> Lexer<'a> {
|
||||
if c.is_ascii_digit() {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else if c == '_' {
|
||||
// Allow underscores as separators (like Rust)
|
||||
} else if c == '_'
|
||||
&& self.peek_second().map_or(false, |ch| ch.is_ascii_digit())
|
||||
{
|
||||
// Allow underscores as separators only between digits
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
@@ -883,17 +1040,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numbers() {
|
||||
let input = "42 0x2A 0b101010 123_456";
|
||||
let mut lexer = Lexer::new(input);
|
||||
|
||||
assert_eq!(lexer.next_token(), Token::Integer(42));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(42));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(42));
|
||||
assert_eq!(lexer.next_token(), Token::Integer(123456));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_namespaced_identifier() {
|
||||
let input = "print::println std::io::read";
|
||||
|
||||
Reference in New Issue
Block a user