328741eb51
(only the unary operators from this are implemented for now)
908 lines
26 KiB
Rust
908 lines
26 KiB
Rust
use std::iter::Peekable;
|
|
use std::str::Chars;
|
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
|
pub enum Token {
|
|
// Keywords
|
|
Fn,
|
|
Let,
|
|
If,
|
|
Else,
|
|
Loop,
|
|
While,
|
|
Break,
|
|
Return,
|
|
Continue,
|
|
Include,
|
|
Static,
|
|
Const,
|
|
As,
|
|
SizeOf,
|
|
|
|
// Identifiers and literals
|
|
Identifier(Name),
|
|
String(String),
|
|
Integer(u64),
|
|
Char(char),
|
|
|
|
// Delimiters
|
|
LeftParen, // (
|
|
RightParen, // )
|
|
LeftBrace, // {
|
|
RightBrace, // }
|
|
LeftBracket, // [
|
|
RightBracket, // ]
|
|
Semicolon, // ;
|
|
Colon, // :
|
|
Comma, // ,
|
|
Dot, // .
|
|
RightArrow, // ->
|
|
|
|
// Arithmetic operators
|
|
Plus, // +
|
|
Minus, // -
|
|
Star, // *
|
|
Slash, // /
|
|
Percent, // %
|
|
PlusPlus, // ++
|
|
MinusMinus, // --
|
|
|
|
// Bitwise operators
|
|
Ampersand, // &
|
|
Pipe, // |
|
|
Caret, // ^
|
|
Tilde, // ~
|
|
LeftShift, // <<
|
|
RightShift, // >>
|
|
|
|
// Logical operators
|
|
Bang, // !
|
|
LogicalAnd, // &&
|
|
LogicalOr, // ||
|
|
|
|
// Comparison operators
|
|
EqualEqual, // ==
|
|
BangEqual, // !=
|
|
Less, // <
|
|
LessEqual, // <=
|
|
Greater, // >
|
|
GreaterEqual, // >=
|
|
|
|
// Assignment operators
|
|
Assign, // =
|
|
PlusEqual, // +=
|
|
MinusEqual, // -=
|
|
StarEqual, // *=
|
|
SlashEqual, // /=
|
|
PercentEqual, // %=
|
|
AndEqual, // &=
|
|
OrEqual, // |=
|
|
XorEqual, // ^=
|
|
ShlEqual, // <<=
|
|
ShrEqual, // >>=
|
|
|
|
// Special
|
|
Eof,
|
|
}
|
|
|
|
use crate::model::Name;
|
|
use std::fmt;
|
|
|
|
impl fmt::Display for Name {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
if let Some(ref ns) = self.namespace {
|
|
write!(f, "{}::{}", ns, self.name)
|
|
} else {
|
|
write!(f, "{}", self.name)
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Token {
|
|
pub fn tt(&self) -> &str {
|
|
match self {
|
|
Token::Const => "Const",
|
|
Token::Static => "Static",
|
|
Token::Include => "Include",
|
|
Token::Fn => "Fn",
|
|
Token::If => "If",
|
|
Token::Let => "Let",
|
|
Token::Else => "Else",
|
|
Token::Loop => "Loop",
|
|
Token::While => "While",
|
|
Token::Break => "Break",
|
|
Token::Return => "Return",
|
|
Token::Continue => "Continue",
|
|
Token::As => "As",
|
|
Token::Identifier(_) => "Identifier",
|
|
Token::String(_) => "String",
|
|
Token::Integer(_) => "UnsignedInt",
|
|
Token::Char(_) => "Char",
|
|
Token::LeftParen => "LeftParen",
|
|
Token::RightParen => "RightParen",
|
|
Token::LeftBrace => "LeftBrace",
|
|
Token::RightBrace => "RightBrace",
|
|
Token::LeftBracket => "LeftBracket",
|
|
Token::RightBracket => "RightBracket",
|
|
Token::Semicolon => "Semicolon",
|
|
Token::Colon => "Colon",
|
|
Token::Comma => "Comma",
|
|
Token::Dot => "Dot",
|
|
Token::RightArrow => "RightArrow",
|
|
Token::Plus => "Plus",
|
|
Token::Minus => "Minus",
|
|
Token::Star => "Star",
|
|
Token::Slash => "Slash",
|
|
Token::Percent => "Percent",
|
|
Token::PlusPlus => "PlusPlus",
|
|
Token::MinusMinus => "MinusMinus",
|
|
Token::Ampersand => "Ampersand",
|
|
Token::Pipe => "Pipe",
|
|
Token::Caret => "Caret",
|
|
Token::Tilde => "Tilde",
|
|
Token::LeftShift => "LeftShift",
|
|
Token::RightShift => "RightShift",
|
|
Token::Bang => "Bang",
|
|
Token::LogicalAnd => "LogicalAnd",
|
|
Token::LogicalOr => "LogicalOr",
|
|
Token::EqualEqual => "EqualEqual",
|
|
Token::BangEqual => "BangEqual",
|
|
Token::Less => "Less",
|
|
Token::LessEqual => "LessEqual",
|
|
Token::Greater => "Greater",
|
|
Token::GreaterEqual => "GreaterEqual",
|
|
Token::Assign => "Assign",
|
|
Token::PlusEqual => "PlusEqual",
|
|
Token::MinusEqual => "MinusEqual",
|
|
Token::StarEqual => "StarEqual",
|
|
Token::SlashEqual => "SlashEqual",
|
|
Token::PercentEqual => "PercentEqual",
|
|
Token::AndEqual => "AndEqual",
|
|
Token::OrEqual => "OrEqual",
|
|
Token::XorEqual => "XorEqual",
|
|
Token::ShlEqual => "ShlEqual",
|
|
Token::ShrEqual => "ShrEqual",
|
|
Token::SizeOf => "SizeOf",
|
|
Token::Eof => "Eof",
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct Lexer<'a> {
|
|
chars: Peekable<Chars<'a>>,
|
|
current: Option<char>,
|
|
line: usize,
|
|
}
|
|
|
|
impl<'a> Lexer<'a> {
|
|
pub fn new(input: &'a str) -> Self {
|
|
let mut chars = input.chars().peekable();
|
|
let current = chars.next();
|
|
|
|
Lexer {
|
|
chars,
|
|
current,
|
|
line: 1,
|
|
}
|
|
}
|
|
|
|
// ========================================================================
|
|
// Character Navigation
|
|
// ========================================================================
|
|
|
|
/// Advance to the next character and return it
|
|
fn advance(&mut self) -> Option<char> {
|
|
self.current = self.chars.next();
|
|
self.current
|
|
}
|
|
|
|
/// Peek at the next character without consuming it
|
|
fn peek(&mut self) -> Option<char> {
|
|
self.chars.peek().copied()
|
|
}
|
|
|
|
/// Peek two characters ahead
|
|
fn peek_second(&mut self) -> Option<char> {
|
|
let mut temp = self.chars.clone();
|
|
temp.next(); // Skip the first peek
|
|
temp.next()
|
|
}
|
|
|
|
/// Check if the next character matches expected, and consume it if so
|
|
fn match_next(&mut self, expected: char) -> bool {
|
|
if self.peek() == Some(expected) {
|
|
self.advance();
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
// ========================================================================
|
|
// Whitespace and Comments
|
|
// ========================================================================
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
while let Some(c) = self.current {
|
|
if c.is_whitespace() {
|
|
if c == '\n' {
|
|
self.line += 1;
|
|
}
|
|
self.advance();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
fn skip_line_comment(&mut self) {
|
|
// We're at the first '/', advance past '//'
|
|
self.advance(); // consume first '/'
|
|
self.advance(); // consume second '/'
|
|
|
|
// Skip until newline or EOF
|
|
while let Some(c) = self.current {
|
|
if c == '\n' {
|
|
self.line += 1;
|
|
self.advance();
|
|
break;
|
|
}
|
|
self.advance();
|
|
}
|
|
}
|
|
|
|
fn skip_block_comment(&mut self) -> Result<(), String> {
|
|
let start_line = self.line;
|
|
|
|
// We're at '/', advance past '/*'
|
|
self.advance(); // consume '/'
|
|
self.advance(); // consume '*'
|
|
|
|
// Look for closing '*/'
|
|
while let Some(c) = self.current {
|
|
if c == '\n' {
|
|
self.line += 1;
|
|
}
|
|
|
|
if c == '*' && self.peek() == Some('/') {
|
|
self.advance(); // consume '*'
|
|
self.advance(); // consume '/'
|
|
return Ok(());
|
|
}
|
|
|
|
self.advance();
|
|
}
|
|
|
|
Err(format!(
|
|
"Unterminated block comment starting at line {}",
|
|
start_line
|
|
))
|
|
}
|
|
|
|
fn skip_whitespace_and_comments(&mut self) {
|
|
loop {
|
|
self.skip_whitespace();
|
|
|
|
// Check for comments
|
|
if self.current == Some('/') {
|
|
match self.peek() {
|
|
Some('/') => {
|
|
self.skip_line_comment();
|
|
continue;
|
|
}
|
|
Some('*') => {
|
|
if let Err(e) = self.skip_block_comment() {
|
|
self.error(&e);
|
|
}
|
|
continue;
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
// ========================================================================
|
|
// Identifiers and Keywords
|
|
// ========================================================================
|
|
|
|
fn read_identifier(&mut self) -> String {
|
|
let mut ident = String::new();
|
|
|
|
// Include the current character (already validated as alphabetic or '_')
|
|
if let Some(c) = self.current {
|
|
ident.push(c);
|
|
}
|
|
|
|
// Read remaining alphanumeric or underscore characters
|
|
while let Some(c) = self.peek() {
|
|
if c.is_alphanumeric() || c == '_' {
|
|
self.advance();
|
|
ident.push(c);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
ident
|
|
}
|
|
|
|
fn scan_identifier_or_keyword(&mut self) -> Token {
|
|
let first_part = self.read_identifier();
|
|
|
|
// Check if it's a keyword (keywords cannot have namespaces)
|
|
if let Some(keyword) = self.match_keyword(&first_part) {
|
|
return keyword;
|
|
}
|
|
|
|
// Check for namespace separator '::'
|
|
if self.peek() == Some(':') && self.peek_second() == Some(':') {
|
|
// Consume '::'
|
|
self.advance(); // consume first ':'
|
|
self.advance(); // consume second ':'
|
|
self.advance(); // move to the first character of the next identifier
|
|
|
|
// Read the second part (the actual name)
|
|
let second_part = self.read_identifier();
|
|
|
|
return Token::Identifier(Name {
|
|
namespace: Some(first_part),
|
|
name: second_part,
|
|
});
|
|
}
|
|
|
|
// Plain identifier without namespace
|
|
Token::Identifier(Name {
|
|
namespace: None,
|
|
name: first_part,
|
|
})
|
|
}
|
|
|
|
fn match_keyword(&self, word: &str) -> Option<Token> {
|
|
match word {
|
|
"fn" => Some(Token::Fn),
|
|
"let" => Some(Token::Let),
|
|
"if" => Some(Token::If),
|
|
"else" => Some(Token::Else),
|
|
"loop" => Some(Token::Loop),
|
|
"while" => Some(Token::While),
|
|
"break" => Some(Token::Break),
|
|
"return" => Some(Token::Return),
|
|
"continue" => Some(Token::Continue),
|
|
"include" => Some(Token::Include),
|
|
"const" => Some(Token::Const),
|
|
"static" => Some(Token::Static),
|
|
"as" => Some(Token::As),
|
|
"sizeof" => Some(Token::SizeOf),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
// ========================================================================
|
|
// Numbers
|
|
// ========================================================================
|
|
|
|
fn scan_number(&mut self) -> Token {
|
|
match self.read_number() {
|
|
Ok(num) => Token::Integer(num),
|
|
Err(e) => {
|
|
self.error(&e);
|
|
// Skip the invalid number
|
|
while let Some(c) = self.peek() {
|
|
if !c.is_alphanumeric() && c != '_' {
|
|
break;
|
|
}
|
|
self.advance();
|
|
}
|
|
Token::Integer(0)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn read_number(&mut self) -> Result<u64, String> {
|
|
// Check for hex (0x) or binary (0b) prefix
|
|
if self.current == Some('0') {
|
|
match self.peek() {
|
|
Some('x') | Some('X') => {
|
|
self.advance(); // consume '0'
|
|
self.advance(); // consume 'x'
|
|
return self.read_hex_number();
|
|
}
|
|
Some('b') | Some('B') => {
|
|
self.advance(); // consume '0'
|
|
self.advance(); // consume 'b'
|
|
return self.read_binary_number();
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
// Read decimal number
|
|
self.read_decimal_number()
|
|
}
|
|
|
|
fn read_decimal_number(&mut self) -> Result<u64, String> {
|
|
let mut num_str = String::new();
|
|
|
|
if let Some(c) = self.current {
|
|
num_str.push(c);
|
|
}
|
|
|
|
while let Some(c) = self.peek() {
|
|
if c.is_ascii_digit() {
|
|
self.advance();
|
|
num_str.push(c);
|
|
} else if c == '_' {
|
|
// Allow underscores as separators (like Rust)
|
|
self.advance();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
num_str
|
|
.parse::<u64>()
|
|
.map_err(|_| format!("Invalid decimal number: {}", num_str))
|
|
}
|
|
|
|
fn read_hex_number(&mut self) -> Result<u64, String> {
|
|
let mut num_str = String::new();
|
|
|
|
// Read the first hex digit (current character)
|
|
if let Some(c) = self.current {
|
|
if c.is_ascii_hexdigit() {
|
|
num_str.push(c);
|
|
}
|
|
}
|
|
|
|
while let Some(c) = self.peek() {
|
|
if c.is_ascii_hexdigit() {
|
|
self.advance();
|
|
num_str.push(c);
|
|
} else if c == '_' {
|
|
self.advance(); // Allow underscores as separators
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if num_str.is_empty() {
|
|
return Err("Invalid hexadecimal number: no digits after 0x".to_string());
|
|
}
|
|
|
|
u64::from_str_radix(&num_str, 16)
|
|
.map_err(|_| format!("Invalid hexadecimal number: {}", num_str))
|
|
}
|
|
|
|
fn read_binary_number(&mut self) -> Result<u64, String> {
|
|
let mut num_str = String::new();
|
|
|
|
// Read the first binary digit (current character)
|
|
if let Some(c) = self.current {
|
|
if c == '0' || c == '1' {
|
|
num_str.push(c);
|
|
}
|
|
}
|
|
|
|
while let Some(c) = self.peek() {
|
|
if c == '0' || c == '1' {
|
|
self.advance();
|
|
num_str.push(c);
|
|
} else if c == '_' {
|
|
self.advance(); // Allow underscores as separators
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if num_str.is_empty() {
|
|
return Err("Invalid binary number: no digits after 0b".to_string());
|
|
}
|
|
|
|
u64::from_str_radix(&num_str, 2)
|
|
.map_err(|_| format!("Invalid binary number: {}", num_str))
|
|
}
|
|
|
|
// ========================================================================
|
|
// String and Character Literals
|
|
// ========================================================================
|
|
|
|
fn scan_string(&mut self) -> Token {
|
|
match self.read_string() {
|
|
Ok(s) => Token::String(s),
|
|
Err(e) => {
|
|
self.error(&e);
|
|
// Skip to the end of the string or newline
|
|
while let Some(c) = self.current {
|
|
if c == '"' || c == '\n' {
|
|
break;
|
|
}
|
|
self.advance();
|
|
}
|
|
Token::String(String::new())
|
|
}
|
|
}
|
|
}
|
|
|
|
fn read_string(&mut self) -> Result<String, String> {
|
|
self.advance(); // Skip the opening quote
|
|
let mut s = String::new();
|
|
|
|
while let Some(c) = self.current {
|
|
if c == '"' {
|
|
return Ok(s);
|
|
}
|
|
|
|
if c == '\n' {
|
|
return Err("Unterminated string literal (newline)".to_string());
|
|
}
|
|
|
|
// Handle escape sequences
|
|
if c == '\\' {
|
|
self.advance();
|
|
if let Some(escaped) = self.current {
|
|
let escaped_char = match escaped {
|
|
'n' => '\n',
|
|
't' => '\t',
|
|
'r' => '\r',
|
|
'\\' => '\\',
|
|
'"' => '"',
|
|
'\'' => '\'',
|
|
'0' => '\0',
|
|
_ => {
|
|
return Err(format!(
|
|
"Invalid escape sequence: \\{}",
|
|
escaped
|
|
));
|
|
}
|
|
};
|
|
s.push(escaped_char);
|
|
} else {
|
|
return Err("Unexpected end of string after escape".to_string());
|
|
}
|
|
} else {
|
|
s.push(c);
|
|
}
|
|
|
|
self.advance();
|
|
}
|
|
|
|
Err("Unterminated string literal".to_string())
|
|
}
|
|
|
|
fn scan_char(&mut self) -> Token {
|
|
match self.read_char() {
|
|
Ok(ch) => Token::Char(ch),
|
|
Err(e) => {
|
|
self.error(&e);
|
|
// Skip to the end of the char literal
|
|
while let Some(c) = self.current {
|
|
if c == '\'' || c == '\n' {
|
|
break;
|
|
}
|
|
self.advance();
|
|
}
|
|
Token::Char('\0')
|
|
}
|
|
}
|
|
}
|
|
|
|
fn read_char(&mut self) -> Result<char, String> {
|
|
self.advance(); // Skip opening quote
|
|
|
|
let ch = match self.current {
|
|
Some('\\') => {
|
|
// Handle escape sequences
|
|
self.advance();
|
|
match self.current {
|
|
Some('n') => '\n',
|
|
Some('t') => '\t',
|
|
Some('r') => '\r',
|
|
Some('\\') => '\\',
|
|
Some('\'') => '\'',
|
|
Some('"') => '"',
|
|
Some('0') => '\0',
|
|
Some(c) => return Err(format!("Invalid escape sequence: \\{}", c)),
|
|
None => {
|
|
return Err(
|
|
"Unexpected end after escape in char literal".to_string()
|
|
);
|
|
}
|
|
}
|
|
}
|
|
Some('\'') => return Err("Empty character literal".to_string()),
|
|
Some('\n') => return Err("Unterminated character literal".to_string()),
|
|
Some(c) => c,
|
|
None => return Err("Unterminated character literal".to_string()),
|
|
};
|
|
|
|
self.advance(); // Move to closing quote
|
|
|
|
if self.current != Some('\'') {
|
|
return Err(
|
|
"Character literal must contain exactly one character".to_string()
|
|
);
|
|
}
|
|
|
|
Ok(ch)
|
|
}
|
|
|
|
// ========================================================================
|
|
// Operators and Punctuation
|
|
// ========================================================================
|
|
|
|
fn scan_operator(&mut self, c: char) -> Token {
|
|
match c {
|
|
// Single-character tokens that can't be extended
|
|
'(' => Token::LeftParen,
|
|
')' => Token::RightParen,
|
|
'{' => Token::LeftBrace,
|
|
'}' => Token::RightBrace,
|
|
'[' => Token::LeftBracket,
|
|
']' => Token::RightBracket,
|
|
';' => Token::Semicolon,
|
|
',' => Token::Comma,
|
|
'.' => Token::Dot,
|
|
'~' => Token::Tilde,
|
|
':' => Token::Colon, // '::' is handled in identifier scanning
|
|
|
|
// Operators that may have compound forms
|
|
'+' => {
|
|
if self.match_next('+') {
|
|
Token::PlusPlus
|
|
} else if self.match_next('=') {
|
|
Token::PlusEqual
|
|
} else {
|
|
Token::Plus
|
|
}
|
|
}
|
|
|
|
'-' => {
|
|
if self.match_next('-') {
|
|
Token::MinusMinus
|
|
} else if self.match_next('>') {
|
|
Token::RightArrow
|
|
} else if self.match_next('=') {
|
|
Token::MinusEqual
|
|
} else {
|
|
Token::Minus
|
|
}
|
|
}
|
|
|
|
'*' => {
|
|
if self.match_next('=') {
|
|
Token::StarEqual
|
|
} else {
|
|
Token::Star
|
|
}
|
|
}
|
|
|
|
'/' => {
|
|
// Comments are handled in skip_whitespace_and_comments
|
|
if self.match_next('=') {
|
|
Token::SlashEqual
|
|
} else {
|
|
Token::Slash
|
|
}
|
|
}
|
|
|
|
'%' => {
|
|
if self.match_next('=') {
|
|
Token::PercentEqual
|
|
} else {
|
|
Token::Percent
|
|
}
|
|
}
|
|
|
|
'&' => {
|
|
if self.match_next('&') {
|
|
Token::LogicalAnd
|
|
} else if self.match_next('=') {
|
|
Token::AndEqual
|
|
} else {
|
|
Token::Ampersand
|
|
}
|
|
}
|
|
|
|
'|' => {
|
|
if self.match_next('|') {
|
|
Token::LogicalOr
|
|
} else if self.match_next('=') {
|
|
Token::OrEqual
|
|
} else {
|
|
Token::Pipe
|
|
}
|
|
}
|
|
|
|
'^' => {
|
|
if self.match_next('=') {
|
|
Token::XorEqual
|
|
} else {
|
|
Token::Caret
|
|
}
|
|
}
|
|
|
|
'!' => {
|
|
if self.match_next('=') {
|
|
Token::BangEqual
|
|
} else {
|
|
Token::Bang
|
|
}
|
|
}
|
|
|
|
'=' => {
|
|
if self.match_next('=') {
|
|
Token::EqualEqual
|
|
} else {
|
|
Token::Assign
|
|
}
|
|
}
|
|
|
|
'<' => {
|
|
if self.match_next('<') {
|
|
if self.match_next('=') {
|
|
Token::ShlEqual
|
|
} else {
|
|
Token::LeftShift
|
|
}
|
|
} else if self.match_next('=') {
|
|
Token::LessEqual
|
|
} else {
|
|
Token::Less
|
|
}
|
|
}
|
|
|
|
'>' => {
|
|
if self.match_next('>') {
|
|
if self.match_next('=') {
|
|
Token::ShrEqual
|
|
} else {
|
|
Token::RightShift
|
|
}
|
|
} else if self.match_next('=') {
|
|
Token::GreaterEqual
|
|
} else {
|
|
Token::Greater
|
|
}
|
|
}
|
|
|
|
_ => {
|
|
self.error(&format!("Unexpected character: '{}'", c));
|
|
Token::Eof // This shouldn't happen
|
|
}
|
|
}
|
|
}
|
|
|
|
// ========================================================================
|
|
// Main Token Scanning
|
|
// ========================================================================
|
|
|
|
pub fn next_token(&mut self) -> Token {
|
|
self.skip_whitespace_and_comments();
|
|
|
|
let Some(c) = self.current else {
|
|
return Token::Eof;
|
|
};
|
|
|
|
let token = match c {
|
|
// Identifiers and keywords
|
|
'a'..='z' | 'A'..='Z' | '_' => self.scan_identifier_or_keyword(),
|
|
|
|
// Numbers
|
|
'0'..='9' => self.scan_number(),
|
|
|
|
// String literals
|
|
'"' => self.scan_string(),
|
|
|
|
// Character literals
|
|
'\'' => self.scan_char(),
|
|
|
|
// Operators and punctuation
|
|
_ => self.scan_operator(c),
|
|
};
|
|
|
|
self.advance();
|
|
token
|
|
}
|
|
|
|
// ========================================================================
|
|
// Error Handling
|
|
// ========================================================================
|
|
|
|
fn error(&self, message: &str) {
|
|
eprintln!("Lexer error on line {}: {}", self.line, message);
|
|
}
|
|
}
|
|
|
|
// ========================================================================
|
|
// Iterator Implementation
|
|
// ========================================================================
|
|
|
|
impl<'a> Iterator for Lexer<'a> {
|
|
type Item = Token;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
match self.next_token() {
|
|
Token::Eof => None,
|
|
token => Some(token),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_operators() {
|
|
let input = "+ ++ += - -- -= * *= / /= % %= & &= && | |= || ^ ^= ! != = == < <= << <<= > >= >> >>=";
|
|
let mut lexer = Lexer::new(input);
|
|
|
|
let expected = vec![
|
|
Token::Plus,
|
|
Token::PlusPlus,
|
|
Token::PlusEqual,
|
|
Token::Minus,
|
|
Token::MinusMinus,
|
|
Token::MinusEqual,
|
|
Token::Star,
|
|
Token::StarEqual,
|
|
Token::Slash,
|
|
Token::SlashEqual,
|
|
Token::Percent,
|
|
Token::PercentEqual,
|
|
Token::Ampersand,
|
|
Token::AndEqual,
|
|
Token::LogicalAnd,
|
|
Token::Pipe,
|
|
Token::OrEqual,
|
|
Token::LogicalOr,
|
|
Token::Caret,
|
|
Token::XorEqual,
|
|
Token::Bang,
|
|
Token::BangEqual,
|
|
Token::Assign,
|
|
Token::EqualEqual,
|
|
Token::Less,
|
|
Token::LessEqual,
|
|
Token::LeftShift,
|
|
Token::ShlEqual,
|
|
Token::Greater,
|
|
Token::GreaterEqual,
|
|
Token::RightShift,
|
|
Token::ShrEqual,
|
|
];
|
|
|
|
for expected_token in expected {
|
|
assert_eq!(lexer.next_token(), expected_token);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_numbers() {
|
|
let input = "42 0x2A 0b101010 123_456";
|
|
let mut lexer = Lexer::new(input);
|
|
|
|
assert_eq!(lexer.next_token(), Token::Integer(42));
|
|
assert_eq!(lexer.next_token(), Token::Integer(42));
|
|
assert_eq!(lexer.next_token(), Token::Integer(42));
|
|
assert_eq!(lexer.next_token(), Token::Integer(123456));
|
|
}
|
|
|
|
#[test]
|
|
fn test_namespaced_identifier() {
|
|
let input = "print::println std::io::read";
|
|
let mut lexer = Lexer::new(input);
|
|
|
|
let first = lexer.next_token();
|
|
if let Token::Identifier(name) = first {
|
|
assert_eq!(name.namespace, Some("print".to_string()));
|
|
assert_eq!(name.name, "println");
|
|
} else {
|
|
panic!("Expected namespaced identifier");
|
|
}
|
|
}
|
|
}
|