52ef7872f0
simple conditionals
336 lines
8.8 KiB
Rust
336 lines
8.8 KiB
Rust
// ============================================================================
|
|
// Token Types
|
|
// ============================================================================
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum TokenType {
|
|
// Keywords
|
|
Int,
|
|
If,
|
|
Else,
|
|
While,
|
|
Return,
|
|
Include,
|
|
|
|
// Identifiers and literals
|
|
Identifier(String),
|
|
Number(i32),
|
|
String(String),
|
|
Char(char),
|
|
|
|
// Operators
|
|
Plus,
|
|
Minus,
|
|
Star,
|
|
Slash,
|
|
Assign,
|
|
Eq,
|
|
Ne,
|
|
Lt,
|
|
Gt,
|
|
Le,
|
|
Ge,
|
|
|
|
// Delimiters
|
|
LParen,
|
|
RParen,
|
|
LBrace,
|
|
RBrace,
|
|
Semicolon,
|
|
Comma,
|
|
Colon,
|
|
Namespace,
|
|
|
|
Eof,
|
|
}
|
|
|
|
pub enum Type {
|
|
Int32,
|
|
Int16,
|
|
Int8,
|
|
Uint32,
|
|
Uint16,
|
|
Uint8,
|
|
Char,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct Token {
|
|
pub token_type: TokenType,
|
|
pub line: usize,
|
|
pub col: usize,
|
|
}
|
|
|
|
impl Token {
|
|
pub fn new(token_type: TokenType, line: usize, col: usize) -> Self {
|
|
Self {
|
|
token_type,
|
|
line,
|
|
col,
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Lexer
|
|
// ============================================================================
|
|
|
|
pub struct Lexer {
|
|
source: Vec<char>,
|
|
pos: usize,
|
|
line: usize,
|
|
col: usize,
|
|
}
|
|
|
|
impl Lexer {
|
|
pub fn new(source: &str) -> Self {
|
|
Self {
|
|
source: source.chars().collect(),
|
|
pos: 0,
|
|
line: 1,
|
|
col: 1,
|
|
}
|
|
}
|
|
|
|
fn error(&self, msg: &str) -> String {
|
|
format!(
|
|
"Lexer error at line {}, col {}: {}",
|
|
self.line, self.col, msg
|
|
)
|
|
}
|
|
|
|
fn peek(&self, offset: usize) -> Option<char> {
|
|
self.source.get(self.pos + offset).copied()
|
|
}
|
|
|
|
fn advance(&mut self) -> Option<char> {
|
|
if self.pos >= self.source.len() {
|
|
return None;
|
|
}
|
|
let ch = self.source[self.pos];
|
|
self.pos += 1;
|
|
if ch == '\n' {
|
|
self.line += 1;
|
|
self.col = 1;
|
|
} else {
|
|
self.col += 1;
|
|
}
|
|
Some(ch)
|
|
}
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
while let Some(ch) = self.peek(0) {
|
|
if ch.is_whitespace() {
|
|
self.advance();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
fn skip_comment(&mut self) {
|
|
if self.peek(0) == Some('/') && self.peek(1) == Some('/') {
|
|
while let Some(ch) = self.peek(0) {
|
|
if ch == '\n' {
|
|
break;
|
|
}
|
|
self.advance();
|
|
}
|
|
}
|
|
}
|
|
|
|
fn read_number(&mut self) -> i32 {
|
|
let mut num_str = String::new();
|
|
while let Some(ch) = self.peek(0) {
|
|
if ch.is_ascii_digit() {
|
|
num_str.push(ch);
|
|
self.advance();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
num_str.parse().unwrap_or(0)
|
|
}
|
|
|
|
fn read_identifier(&mut self) -> String {
|
|
let mut ident = String::new();
|
|
while let Some(ch) = self.peek(0) {
|
|
if ch.is_alphanumeric() || ch == '_' {
|
|
ident.push(ch);
|
|
self.advance();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
ident
|
|
}
|
|
|
|
fn read_string(&mut self) -> Result<String, String> {
|
|
let mut string = String::new();
|
|
self.advance(); // Consume the opening quote
|
|
|
|
while let Some(ch) = self.peek(0) {
|
|
if ch == '"' {
|
|
self.advance(); // Consume the closing quote
|
|
return Ok(string);
|
|
} else if ch == '\\' {
|
|
self.advance(); // Consume the backslash
|
|
if let Some(escaped_char) = self.peek(0) {
|
|
string.push(escaped_char);
|
|
self.advance();
|
|
}
|
|
} else {
|
|
string.push(ch);
|
|
self.advance();
|
|
}
|
|
}
|
|
|
|
Err(String::from("Unexpected EOF"))
|
|
}
|
|
|
|
fn read_char(&mut self) -> Result<char, String> {
|
|
self.advance(); // Consume the opening quote
|
|
|
|
if let Some(ch) = self.peek(0) {
|
|
self.advance();
|
|
if self.peek(0) == Some('\'') {
|
|
self.advance();
|
|
return Ok(ch);
|
|
} else {
|
|
Err(String::from("expected closing quote"))
|
|
}
|
|
} else {
|
|
Err(String::from("expected character"))
|
|
}
|
|
}
|
|
|
|
pub fn tokenize(&mut self) -> Result<Vec<Token>, String> {
|
|
let mut tokens = Vec::new();
|
|
|
|
loop {
|
|
self.skip_whitespace();
|
|
self.skip_comment();
|
|
|
|
if self.pos >= self.source.len() {
|
|
break;
|
|
}
|
|
|
|
let line = self.line;
|
|
let col = self.col;
|
|
let ch = self.peek(0).unwrap();
|
|
|
|
let token_type = if ch.is_ascii_digit() {
|
|
let num = self.read_number();
|
|
TokenType::Number(num)
|
|
} else if ch == '"' {
|
|
let string = self.read_string()?;
|
|
TokenType::String(string)
|
|
} else if ch == '\'' {
|
|
let char = self.read_char()?;
|
|
TokenType::Char(char)
|
|
} else if ch.is_alphabetic() || ch == '_' {
|
|
let ident = self.read_identifier();
|
|
match ident.as_str() {
|
|
"int" => TokenType::Int,
|
|
"if" => TokenType::If,
|
|
"else" => TokenType::Else,
|
|
"while" => TokenType::While,
|
|
"return" => TokenType::Return,
|
|
"include" => TokenType::Include,
|
|
_ => TokenType::Identifier(ident),
|
|
}
|
|
} else {
|
|
match ch {
|
|
':' if self.peek(1) == Some(':') => {
|
|
self.advance();
|
|
self.advance();
|
|
TokenType::Namespace
|
|
}
|
|
':' => {
|
|
self.advance();
|
|
TokenType::Colon
|
|
}
|
|
'=' if self.peek(1) == Some('=') => {
|
|
self.advance();
|
|
self.advance();
|
|
TokenType::Eq
|
|
}
|
|
'!' if self.peek(1) == Some('=') => {
|
|
self.advance();
|
|
self.advance();
|
|
TokenType::Ne
|
|
}
|
|
'<' if self.peek(1) == Some('=') => {
|
|
self.advance();
|
|
self.advance();
|
|
TokenType::Le
|
|
}
|
|
'>' if self.peek(1) == Some('=') => {
|
|
self.advance();
|
|
self.advance();
|
|
TokenType::Ge
|
|
}
|
|
'+' => {
|
|
self.advance();
|
|
TokenType::Plus
|
|
}
|
|
'-' => {
|
|
self.advance();
|
|
TokenType::Minus
|
|
}
|
|
'*' => {
|
|
self.advance();
|
|
TokenType::Star
|
|
}
|
|
'/' => {
|
|
self.advance();
|
|
TokenType::Slash
|
|
}
|
|
'=' => {
|
|
self.advance();
|
|
TokenType::Assign
|
|
}
|
|
'<' => {
|
|
self.advance();
|
|
TokenType::Lt
|
|
}
|
|
'>' => {
|
|
self.advance();
|
|
TokenType::Gt
|
|
}
|
|
'(' => {
|
|
self.advance();
|
|
TokenType::LParen
|
|
}
|
|
')' => {
|
|
self.advance();
|
|
TokenType::RParen
|
|
}
|
|
'{' => {
|
|
self.advance();
|
|
TokenType::LBrace
|
|
}
|
|
'}' => {
|
|
self.advance();
|
|
TokenType::RBrace
|
|
}
|
|
';' => {
|
|
self.advance();
|
|
TokenType::Semicolon
|
|
}
|
|
',' => {
|
|
self.advance();
|
|
TokenType::Comma
|
|
}
|
|
_ => return Err(self.error(&format!("Unexpected character: {}", ch))),
|
|
}
|
|
};
|
|
|
|
tokens.push(Token::new(token_type, line, col));
|
|
}
|
|
|
|
tokens.push(Token::new(TokenType::Eof, self.line, self.col));
|
|
Ok(tokens)
|
|
}
|
|
}
|