use std::iter::Peekable; use std::str::Chars; #[derive(Debug, PartialEq, Clone)] pub enum Token { // Keywords Fn, Let, If, Else, Loop, Break, Return, Continue, Include, // Identifiers and literals Identifier(String), String(String), Number(i64), // Symbols LeftParen, // ( RightParen, // ) LeftBrace, // { RightBrace, // } Semicolon, // ; Colon, // : Comma, // , // Pipe, // | // Operators Plus, // + Minus, // - Star, // * Slash, // / Assign, // = EqualEqual, // == Bang, // ! BangEqual, // != Less, // < LessEqual, // <= Greater, // > GreaterEqual, // >= RightArrow, // -> // Special Eof, } impl Token { pub fn tt(&self) -> &str { match self { Token::Include => "Include", Token::Fn => "Fn", Token::If => "If", Token::Let => "Let", Token::Else => "Else", Token::Loop => "Loop", Token::Break => "Break", Token::Return => "Return", Token::Continue => "Continue", Token::Identifier(_) => "Identifier", Token::String(_) => "String", Token::Number(_) => "Number", Token::LeftParen => "LeftParen", Token::RightParen => "RightParen", Token::LeftBrace => "LeftBrace", Token::RightBrace => "RightBrace", Token::Semicolon => "Semicolon", Token::Colon => "Colon", Token::Comma => "Comma", Token::RightArrow => "RightArrow", // Token::Pipe => "Pipe", Token::Plus => "Plus", Token::Minus => "Minus", Token::Star => "Star", Token::Slash => "Slash", Token::Assign => "Assign", Token::EqualEqual => "EqualEqual", Token::Bang => "Bang", Token::BangEqual => "BangEqual", Token::Less => "Less", Token::LessEqual => "LessEqual", Token::Greater => "Greater", Token::GreaterEqual => "GreaterEqual", Token::Eof => "Eof", } } } #[derive(Debug)] pub struct Lexer<'a> { chars: Peekable>, current: Option, line: usize, } impl<'a> Lexer<'a> { pub fn new(input: &'a str) -> Self { let mut chars = input.chars().peekable(); let current = chars.next(); Lexer { chars, current, line: 1, } } fn advance(&mut self) -> Option { self.current = self.chars.next(); self.current } fn peek(&mut self) -> Option<&char> { self.chars.peek() } fn skip_whitespace(&mut self) { while let Some(c) = self.current { if !c.is_whitespace() { break; } if c == '\n' { self.line += 1; } self.advance(); } } fn read_identifier(&mut self) -> String { let mut ident = String::new(); while let Some(&c) = self.peek() { if c.is_alphanumeric() || c == '_' { ident.push(c); self.advance(); } else { break; } } ident } fn read_number(&mut self) -> i64 { let mut num_str = String::new(); while let Some(&c) = self.peek() { if c.is_ascii_digit() { num_str.push(c); self.advance(); } else { break; } } num_str.parse().unwrap_or(0) } fn match_next(&mut self, expected: char) -> bool { match self.peek() { Some(&c) if c == expected => { self.advance(); true } _ => false, } } pub fn next_token(&mut self) -> Token { self.skip_whitespace(); let token = match self.current { Some('(') => Token::LeftParen, Some(')') => Token::RightParen, Some('{') => Token::LeftBrace, Some('}') => Token::RightBrace, Some(';') => Token::Semicolon, Some(':') => Token::Colon, Some(',') => Token::Comma, // Some('|') => Token::Pipe, Some('+') => Token::Plus, Some('*') => Token::Star, Some('/') => Token::Slash, Some('-') => { if self.match_next('>') { Token::RightArrow } else { Token::Minus } } Some('!') => { if self.match_next('=') { Token::BangEqual } else { Token::Bang } } Some('=') => { if self.match_next('=') { Token::EqualEqual } else { Token::Assign } } Some('<') => { if self.match_next('=') { Token::LessEqual } else { Token::Less } } Some('>') => { if self.match_next('=') { Token::GreaterEqual } else { Token::Greater } } Some('"') => { self.advance(); // Skip the opening quote let mut s = String::new(); while let Some(c) = self.current { if c == '"' { break; } s.push(c); self.advance(); } Token::String(s) } Some(c) => { if c.is_alphabetic() || c == '_' { let mut ident = c.to_string(); ident.push_str(&self.read_identifier()); match ident.as_str() { "fn" => Token::Fn, "if" => Token::If, "else" => Token::Else, "loop" => Token::Loop, "break" => Token::Break, "return" => Token::Return, "continue" => Token::Continue, "include" => Token::Include, _ => Token::Identifier(ident), } } else if c.is_ascii_digit() { Token::Number(self.read_number()) } else { // Skip unknown characters for now self.advance(); return self.next_token(); } } None => Token::Eof, }; if token != Token::Eof { self.advance(); } token } } impl<'a> Iterator for Lexer<'a> { type Item = Token; fn next(&mut self) -> Option { match self.next_token() { Token::Eof => None, token => Some(token), } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_keywords() { let input = "if else loop break return continue"; let mut lexer = Lexer::new(input); assert_eq!(lexer.next_token(), Token::If); assert_eq!(lexer.next_token(), Token::Else); assert_eq!(lexer.next_token(), Token::Loop); assert_eq!(lexer.next_token(), Token::Break); assert_eq!(lexer.next_token(), Token::Return); assert_eq!(lexer.next_token(), Token::Continue); assert_eq!(lexer.next_token(), Token::Eof); } #[test] fn test_identifiers_and_numbers() { let input = "x y42 _test 123 45"; let mut lexer = Lexer::new(input); assert_eq!(lexer.next_token(), Token::Identifier("x".to_string())); assert_eq!(lexer.next_token(), Token::Identifier("y42".to_string())); assert_eq!(lexer.next_token(), Token::Identifier("_test".to_string())); assert_eq!(lexer.next_token(), Token::Number(123)); assert_eq!(lexer.next_token(), Token::Number(45)); assert_eq!(lexer.next_token(), Token::Eof); } #[test] fn test_operators() { let input = "= == ! != < <= > >="; let mut lexer = Lexer::new(input); assert_eq!(lexer.next_token(), Token::Assign); assert_eq!(lexer.next_token(), Token::EqualEqual); assert_eq!(lexer.next_token(), Token::Bang); assert_eq!(lexer.next_token(), Token::BangEqual); assert_eq!(lexer.next_token(), Token::Less); assert_eq!(lexer.next_token(), Token::LessEqual); assert_eq!(lexer.next_token(), Token::Greater); assert_eq!(lexer.next_token(), Token::GreaterEqual); assert_eq!(lexer.next_token(), Token::Eof); } #[test] fn test_example_syntax() { let input = r#" main: Func = | x: U32, y: U32 | { res = add(x, y); print(res); if res > 10 { print("res is greater than 10"); } } "#; let mut lexer = Lexer::new(input); // Skip whitespace and newlines while let Some(c) = lexer.current { if !c.is_whitespace() { break; } lexer.advance(); } // Test the first few tokens assert_eq!(lexer.next_token(), Token::Identifier("main".to_string())); assert_eq!(lexer.next_token(), Token::Colon); assert_eq!(lexer.next_token(), Token::Identifier("Func".to_string())); assert_eq!(lexer.next_token(), Token::Assign); // assert_eq!(lexer.next_token(), Token::Pipe); assert_eq!(lexer.next_token(), Token::Identifier("x".to_string())); assert_eq!(lexer.next_token(), Token::Colon); assert_eq!(lexer.next_token(), Token::Identifier("U32".to_string())); assert_eq!(lexer.next_token(), Token::Comma); // The rest of the tokens would be tested similarly } }