// ============================================================================ // Token Types // ============================================================================ #[derive(Debug, Clone, PartialEq)] pub enum TokenType { // Keywords Int, If, Else, While, Return, Include, // Identifiers and literals Identifier(String), Number(i32), String(String), Char(char), // Operators Plus, Minus, Star, Slash, Assign, Eq, Ne, Lt, Gt, Le, Ge, // Delimiters LParen, RParen, LBrace, RBrace, Semicolon, Comma, Colon, Namespace, Eof, } pub enum Type { Int32, Int16, Int8, Uint32, Uint16, Uint8, Char, } #[derive(Debug, Clone)] pub struct Token { pub token_type: TokenType, pub line: usize, pub col: usize, } impl Token { pub fn new(token_type: TokenType, line: usize, col: usize) -> Self { Self { token_type, line, col, } } } // ============================================================================ // Lexer // ============================================================================ pub struct Lexer { source: Vec, pos: usize, line: usize, col: usize, } impl Lexer { pub fn new(source: &str) -> Self { Self { source: source.chars().collect(), pos: 0, line: 1, col: 1, } } fn error(&self, msg: &str) -> String { format!( "Lexer error at line {}, col {}: {}", self.line, self.col, msg ) } fn peek(&self, offset: usize) -> Option { self.source.get(self.pos + offset).copied() } fn advance(&mut self) -> Option { if self.pos >= self.source.len() { return None; } let ch = self.source[self.pos]; self.pos += 1; if ch == '\n' { self.line += 1; self.col = 1; } else { self.col += 1; } Some(ch) } fn skip_whitespace(&mut self) { while let Some(ch) = self.peek(0) { if ch.is_whitespace() { self.advance(); } else { break; } } } fn skip_comment(&mut self) { if self.peek(0) == Some('/') && self.peek(1) == Some('/') { while let Some(ch) = self.peek(0) { if ch == '\n' { break; } self.advance(); } } } fn read_number(&mut self) -> i32 { let mut num_str = String::new(); while let Some(ch) = self.peek(0) { if ch.is_ascii_digit() { num_str.push(ch); self.advance(); } else { break; } } num_str.parse().unwrap_or(0) } fn read_identifier(&mut self) -> String { let mut ident = String::new(); while let Some(ch) = self.peek(0) { if ch.is_alphanumeric() || ch == '_' { ident.push(ch); self.advance(); } else { break; } } ident } fn read_string(&mut self) -> Result { let mut string = String::new(); self.advance(); // Consume the opening quote while let Some(ch) = self.peek(0) { if ch == '"' { self.advance(); // Consume the closing quote return Ok(string); } else if ch == '\\' { self.advance(); // Consume the backslash if let Some(escaped_char) = self.peek(0) { string.push(escaped_char); self.advance(); } } else { string.push(ch); self.advance(); } } Err(String::from("Unexpected EOF")) } fn read_char(&mut self) -> Result { self.advance(); // Consume the opening quote if let Some(ch) = self.peek(0) { self.advance(); if self.peek(0) == Some('\'') { self.advance(); return Ok(ch); } else { Err(String::from("expected closing quote")) } } else { Err(String::from("expected character")) } } pub fn tokenize(&mut self) -> Result, String> { let mut tokens = Vec::new(); loop { self.skip_whitespace(); self.skip_comment(); if self.pos >= self.source.len() { break; } let line = self.line; let col = self.col; let ch = self.peek(0).unwrap(); let token_type = if ch.is_ascii_digit() { let num = self.read_number(); TokenType::Number(num) } else if ch == '"' { let string = self.read_string()?; TokenType::String(string) } else if ch == '\'' { let char = self.read_char()?; TokenType::Char(char) } else if ch.is_alphabetic() || ch == '_' { let ident = self.read_identifier(); match ident.as_str() { "int" => TokenType::Int, "if" => TokenType::If, "else" => TokenType::Else, "while" => TokenType::While, "return" => TokenType::Return, "include" => TokenType::Include, _ => TokenType::Identifier(ident), } } else { match ch { ':' if self.peek(1) == Some(':') => { self.advance(); self.advance(); TokenType::Namespace } ':' => { self.advance(); TokenType::Colon } '=' if self.peek(1) == Some('=') => { self.advance(); self.advance(); TokenType::Eq } '!' if self.peek(1) == Some('=') => { self.advance(); self.advance(); TokenType::Ne } '<' if self.peek(1) == Some('=') => { self.advance(); self.advance(); TokenType::Le } '>' if self.peek(1) == Some('=') => { self.advance(); self.advance(); TokenType::Ge } '+' => { self.advance(); TokenType::Plus } '-' => { self.advance(); TokenType::Minus } '*' => { self.advance(); TokenType::Star } '/' => { self.advance(); TokenType::Slash } '=' => { self.advance(); TokenType::Assign } '<' => { self.advance(); TokenType::Lt } '>' => { self.advance(); TokenType::Gt } '(' => { self.advance(); TokenType::LParen } ')' => { self.advance(); TokenType::RParen } '{' => { self.advance(); TokenType::LBrace } '}' => { self.advance(); TokenType::RBrace } ';' => { self.advance(); TokenType::Semicolon } ',' => { self.advance(); TokenType::Comma } _ => return Err(self.error(&format!("Unexpected character: {}", ch))), } }; tokens.push(Token::new(token_type, line, col)); } tokens.push(Token::new(TokenType::Eof, self.line, self.col)); Ok(tokens) } }