From 2582ad10fa402e1d92bded52241b46c2d8656496 Mon Sep 17 00:00:00 2001 From: zxq5 Date: Mon, 30 Jun 2025 20:44:39 +0100 Subject: [PATCH] started work on compiler --- Cargo.lock | 4 + Cargo.toml | 2 +- compiler/Cargo.toml | 7 + compiler/src/lexer.rs | 342 ++++++++++++++++++++++++++++++++++++++ compiler/src/main.rs | 25 +++ compiler/src/parser.rs | 146 ++++++++++++++++ resources/dsc/example.dsc | 8 + 7 files changed, 533 insertions(+), 1 deletion(-) create mode 100644 compiler/Cargo.toml create mode 100644 compiler/src/lexer.rs create mode 100644 compiler/src/main.rs create mode 100644 compiler/src/parser.rs create mode 100644 resources/dsc/example.dsc diff --git a/Cargo.lock b/Cargo.lock index a84889a..51d0e25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -642,6 +642,10 @@ dependencies = [ name = "common" version = "0.2.0" +[[package]] +name = "compiler" +version = "0.2.0" + [[package]] name = "concurrent-queue" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index d5039da..9ae7c73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ cargo-features = ["codegen-backend"] [workspace] -members = ["emulator", "common", "assembler", "dsa_editor"] +members = ["emulator", "common", "assembler", "dsa_editor", "compiler"] resolver = "3" [workspace.package] diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml new file mode 100644 index 0000000..15e1473 --- /dev/null +++ b/compiler/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "compiler" +version.workspace = true +edition.workspace = true +authors.workspace = true + +[dependencies] diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs new file mode 100644 index 0000000..e616b93 --- /dev/null +++ b/compiler/src/lexer.rs @@ -0,0 +1,342 @@ +use std::iter::Peekable; +use std::str::Chars; + +#[derive(Debug, PartialEq, Clone)] +pub enum Token { + // Keywords + If, + Else, + Loop, + Break, + Return, + Continue, + + // Identifiers and literals + Identifier(String), + String(String), + Number(i64), + + // Symbols + LeftParen, // ( + RightParen, // ) + LeftBrace, // { + RightBrace, // } + Semicolon, // ; + Colon, // : + Comma, // , + Pipe, // | + + // Operators + Plus, // + + Minus, // - + Star, // * + Slash, // / + Assign, // = + EqualEqual, // == + Bang, // ! + BangEqual, // != + Less, // < + LessEqual, // <= + Greater, // > + GreaterEqual, // >= + + // Special + Eof, +} + +impl Token { + pub fn tt(&self) -> &str { + match self { + Token::If => "If", + Token::Else => "Else", + Token::Loop => "Loop", + Token::Break => "Break", + Token::Return => "Return", + Token::Continue => "Continue", + Token::Identifier(_) => "Identifier", + Token::String(_) => "String", + Token::Number(_) => "Number", + Token::LeftParen => "LeftParen", + Token::RightParen => "RightParen", + Token::LeftBrace => "LeftBrace", + Token::RightBrace => "RightBrace", + Token::Semicolon => "Semicolon", + Token::Colon => "Colon", + Token::Comma => "Comma", + Token::Pipe => "Pipe", + Token::Plus => "Plus", + Token::Minus => "Minus", + Token::Star => "Star", + Token::Slash => "Slash", + Token::Assign => "Assign", + Token::EqualEqual => "EqualEqual", + Token::Bang => "Bang", + Token::BangEqual => "BangEqual", + Token::Less => "Less", + Token::LessEqual => "LessEqual", + Token::Greater => "Greater", + Token::GreaterEqual => "GreaterEqual", + Token::Eof => "Eof", + } + } +} + +#[derive(Debug)] +pub struct Lexer<'a> { + chars: Peekable>, + current: Option, + line: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + let mut chars = input.chars().peekable(); + let current = chars.next(); + + Lexer { + chars, + current, + line: 1, + } + } + + fn advance(&mut self) -> Option { + self.current = self.chars.next(); + self.current + } + + fn peek(&mut self) -> Option<&char> { + self.chars.peek() + } + + fn skip_whitespace(&mut self) { + while let Some(c) = self.current { + if !c.is_whitespace() { + break; + } + if c == '\n' { + self.line += 1; + } + self.advance(); + } + } + + fn read_identifier(&mut self) -> String { + let mut ident = String::new(); + while let Some(&c) = self.peek() { + if c.is_alphanumeric() || c == '_' { + ident.push(c); + self.advance(); + } else { + break; + } + } + ident + } + + fn read_number(&mut self) -> i64 { + let mut num_str = String::new(); + while let Some(&c) = self.peek() { + if c.is_ascii_digit() { + num_str.push(c); + self.advance(); + } else { + break; + } + } + num_str.parse().unwrap_or(0) + } + + fn match_next(&mut self, expected: char) -> bool { + match self.peek() { + Some(&c) if c == expected => { + self.advance(); + true + } + _ => false, + } + } + + pub fn next_token(&mut self) -> Token { + self.skip_whitespace(); + + let token = match self.current { + Some('(') => Token::LeftParen, + Some(')') => Token::RightParen, + Some('{') => Token::LeftBrace, + Some('}') => Token::RightBrace, + Some(';') => Token::Semicolon, + Some(':') => Token::Colon, + Some(',') => Token::Comma, + Some('|') => Token::Pipe, + Some('+') => Token::Plus, + Some('-') => Token::Minus, + Some('*') => Token::Star, + Some('/') => Token::Slash, + Some('!') => { + if self.match_next('=') { + Token::BangEqual + } else { + Token::Bang + } + } + Some('=') => { + if self.match_next('=') { + Token::EqualEqual + } else { + Token::Assign + } + } + Some('<') => { + if self.match_next('=') { + Token::LessEqual + } else { + Token::Less + } + } + Some('>') => { + if self.match_next('=') { + Token::GreaterEqual + } else { + Token::Greater + } + } + Some('"') => { + self.advance(); // Skip the opening quote + let mut s = String::new(); + while let Some(c) = self.current { + if c == '"' { + break; + } + s.push(c); + self.advance(); + } + Token::String(s) + } + Some(c) => { + if c.is_alphabetic() || c == '_' { + let mut ident = c.to_string(); + ident.push_str(&self.read_identifier()); + match ident.as_str() { + "if" => Token::If, + "else" => Token::Else, + "loop" => Token::Loop, + "break" => Token::Break, + "return" => Token::Return, + "continue" => Token::Continue, + _ => Token::Identifier(ident), + } + } else if c.is_ascii_digit() { + Token::Number(self.read_number()) + } else { + // Skip unknown characters for now + self.advance(); + return self.next_token(); + } + } + None => Token::Eof, + }; + + if token != Token::Eof { + self.advance(); + } + + token + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Token; + + fn next(&mut self) -> Option { + match self.next_token() { + Token::Eof => None, + token => Some(token), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_keywords() { + let input = "if else loop break return continue"; + let mut lexer = Lexer::new(input); + + assert_eq!(lexer.next_token(), Token::If); + assert_eq!(lexer.next_token(), Token::Else); + assert_eq!(lexer.next_token(), Token::Loop); + assert_eq!(lexer.next_token(), Token::Break); + assert_eq!(lexer.next_token(), Token::Return); + assert_eq!(lexer.next_token(), Token::Continue); + assert_eq!(lexer.next_token(), Token::Eof); + } + + #[test] + fn test_identifiers_and_numbers() { + let input = "x y42 _test 123 45"; + let mut lexer = Lexer::new(input); + + assert_eq!(lexer.next_token(), Token::Identifier("x".to_string())); + assert_eq!(lexer.next_token(), Token::Identifier("y42".to_string())); + assert_eq!(lexer.next_token(), Token::Identifier("_test".to_string())); + assert_eq!(lexer.next_token(), Token::Number(123)); + assert_eq!(lexer.next_token(), Token::Number(45)); + assert_eq!(lexer.next_token(), Token::Eof); + } + + #[test] + fn test_operators() { + let input = "= == ! != < <= > >="; + let mut lexer = Lexer::new(input); + + assert_eq!(lexer.next_token(), Token::Assign); + assert_eq!(lexer.next_token(), Token::EqualEqual); + assert_eq!(lexer.next_token(), Token::Bang); + assert_eq!(lexer.next_token(), Token::BangEqual); + assert_eq!(lexer.next_token(), Token::Less); + assert_eq!(lexer.next_token(), Token::LessEqual); + assert_eq!(lexer.next_token(), Token::Greater); + assert_eq!(lexer.next_token(), Token::GreaterEqual); + assert_eq!(lexer.next_token(), Token::Eof); + } + + #[test] + fn test_example_syntax() { + let input = r#" + main: Func = | x: U32, y: U32 | { + res = add(x, y); + print(res); + + if res > 10 { + print("res is greater than 10"); + } + } + "#; + + let mut lexer = Lexer::new(input); + + // Skip whitespace and newlines + while let Some(c) = lexer.current { + if !c.is_whitespace() { + break; + } + lexer.advance(); + } + + // Test the first few tokens + assert_eq!(lexer.next_token(), Token::Identifier("main".to_string())); + assert_eq!(lexer.next_token(), Token::Colon); + assert_eq!(lexer.next_token(), Token::Identifier("Func".to_string())); + assert_eq!(lexer.next_token(), Token::Assign); + assert_eq!(lexer.next_token(), Token::Pipe); + assert_eq!(lexer.next_token(), Token::Identifier("x".to_string())); + assert_eq!(lexer.next_token(), Token::Colon); + assert_eq!(lexer.next_token(), Token::Identifier("U32".to_string())); + assert_eq!(lexer.next_token(), Token::Comma); + + // The rest of the tokens would be tested similarly + } +} diff --git a/compiler/src/main.rs b/compiler/src/main.rs new file mode 100644 index 0000000..031ecc8 --- /dev/null +++ b/compiler/src/main.rs @@ -0,0 +1,25 @@ +use std::{fs, path::Path}; + +pub mod lexer; +pub mod parser; + +fn main() { + println!("Hello, world!"); + + let path = Path::new("../resources/dsc/example.dsc"); + let contents = fs::read_to_string(path).expect("Failed to read file"); + + let lexer = lexer::Lexer::new(&contents); + let tokens = lexer.collect::>(); + println!("{tokens:?}"); + + let mut parser = parser::Parser::new(tokens); + let ast = match parser.parse() { + Ok(ast) => ast, + Err(e) => { + eprintln!("Error: {e:?}"); + return; + } + }; + println!("{ast:?}"); +} diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs new file mode 100644 index 0000000..e21e049 --- /dev/null +++ b/compiler/src/parser.rs @@ -0,0 +1,146 @@ +use crate::expect_type; +use crate::lexer::Token; + +pub struct Parser { + ast: Node, + idx: usize, + tokens: Vec, +} + +impl Parser { + pub fn new(tokens: Vec) -> Self { + Self { + ast: Node::Scope { + children: Vec::new(), + }, + idx: 0, + tokens, + } + } + + pub fn parse(&mut self) -> Result { + let mut statements = Vec::new(); + + while let Some(_) = self.peek_next() { + statements.push(self.parse_statement()?); + } + + Ok(Node::Scope { + children: statements, + }) + } + + fn parse_statement(&mut self) -> Result { + // first token in a statement is always an identifier + let left = if let Ok(typed_var) = self.parse_typed_var() { + Box::new(typed_var) + } else { + let tok = expect_type!(self.next()?, Identifier)?; + Box::new(Node::Terminal { value: tok }) + }; + + let _ = expect_type!(self.next()?, Assign)?; + + let right = Box::new(self.parse_expression()?); + + Ok(Node::Statement { left, right }) + } + + fn parse_typed_var(&mut self) -> Result { + let name = expect_type!(self.next()?, Identifier)?; + let _ = expect_type!(self.next()?, Colon)?; + let type_ = expect_type!(self.next()?, Identifier)?; + + Ok(Node::TypedVar { name, type_ }) + } + + fn parse_expression(&mut self) -> Result { + Err(CompileError::Generic) + } + + fn next(&mut self) -> Result { + if self.idx >= self.tokens.len() { + return Err(CompileError::UnexpectedEOF); + } + + let token = self.tokens[self.idx].clone(); + self.idx += 1; + + Ok(token) + } + + fn peek_next(&mut self) -> Option { + if self.idx >= self.tokens.len() { + return None; + } + + Some(self.tokens[self.idx].clone()) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Node { + Scope { + children: Vec, + }, + Terminal { + value: Token, + }, + UnaryOp { + op: Token, + right: Box, + }, + BinaryOp { + left: Box, + op: Token, + right: Box, + }, + Statement { + left: Box, + right: Box, + }, + If { + condition: Box, + then_branch: Box, + else_branch: Option>, + }, + FunctionDef { + params: Vec, + body: Box, + }, + TypedVar { + name: Token, + type_: Token, + }, + TypeDef { + name: Token, + fields: Vec, + }, +} + +#[derive(Debug)] +pub enum CompileError { + Generic, + ExpectedToken { expected: String, found: Token }, + UnexpectedEOF, +} + +#[macro_export] +macro_rules! expect_type { + ($token:expr, $($variant:ident),+) => {{ + match $token.tt() { + $( + stringify!($variant) => Ok($token.clone()), + )+ + _ => { + // return an expected token error + let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | ")); + + Err(CompileError::ExpectedToken { + expected, + found: $token.clone(), + }) + } + } + }}; +} diff --git a/resources/dsc/example.dsc b/resources/dsc/example.dsc new file mode 100644 index 0000000..550a707 --- /dev/null +++ b/resources/dsc/example.dsc @@ -0,0 +1,8 @@ +main: Func = | x: U32, y: U32 | { + res = add(x, y); + print(res); + + if res > 10 { + print("res is greater than 10"); + } +}