started work on compiler

This commit is contained in:
2025-06-30 20:44:39 +01:00
parent ae92510fb8
commit 2582ad10fa
7 changed files with 533 additions and 1 deletions
Generated
+4
View File
@@ -642,6 +642,10 @@ dependencies = [
name = "common"
version = "0.2.0"
[[package]]
name = "compiler"
version = "0.2.0"
[[package]]
name = "concurrent-queue"
version = "2.5.0"
+1 -1
View File
@@ -1,7 +1,7 @@
cargo-features = ["codegen-backend"]
[workspace]
members = ["emulator", "common", "assembler", "dsa_editor"]
members = ["emulator", "common", "assembler", "dsa_editor", "compiler"]
resolver = "3"
[workspace.package]
+7
View File
@@ -0,0 +1,7 @@
[package]
name = "compiler"
version.workspace = true
edition.workspace = true
authors.workspace = true
[dependencies]
+342
View File
@@ -0,0 +1,342 @@
use std::iter::Peekable;
use std::str::Chars;
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
// Keywords
If,
Else,
Loop,
Break,
Return,
Continue,
// Identifiers and literals
Identifier(String),
String(String),
Number(i64),
// Symbols
LeftParen, // (
RightParen, // )
LeftBrace, // {
RightBrace, // }
Semicolon, // ;
Colon, // :
Comma, // ,
Pipe, // |
// Operators
Plus, // +
Minus, // -
Star, // *
Slash, // /
Assign, // =
EqualEqual, // ==
Bang, // !
BangEqual, // !=
Less, // <
LessEqual, // <=
Greater, // >
GreaterEqual, // >=
// Special
Eof,
}
impl Token {
pub fn tt(&self) -> &str {
match self {
Token::If => "If",
Token::Else => "Else",
Token::Loop => "Loop",
Token::Break => "Break",
Token::Return => "Return",
Token::Continue => "Continue",
Token::Identifier(_) => "Identifier",
Token::String(_) => "String",
Token::Number(_) => "Number",
Token::LeftParen => "LeftParen",
Token::RightParen => "RightParen",
Token::LeftBrace => "LeftBrace",
Token::RightBrace => "RightBrace",
Token::Semicolon => "Semicolon",
Token::Colon => "Colon",
Token::Comma => "Comma",
Token::Pipe => "Pipe",
Token::Plus => "Plus",
Token::Minus => "Minus",
Token::Star => "Star",
Token::Slash => "Slash",
Token::Assign => "Assign",
Token::EqualEqual => "EqualEqual",
Token::Bang => "Bang",
Token::BangEqual => "BangEqual",
Token::Less => "Less",
Token::LessEqual => "LessEqual",
Token::Greater => "Greater",
Token::GreaterEqual => "GreaterEqual",
Token::Eof => "Eof",
}
}
}
#[derive(Debug)]
pub struct Lexer<'a> {
chars: Peekable<Chars<'a>>,
current: Option<char>,
line: usize,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut chars = input.chars().peekable();
let current = chars.next();
Lexer {
chars,
current,
line: 1,
}
}
fn advance(&mut self) -> Option<char> {
self.current = self.chars.next();
self.current
}
fn peek(&mut self) -> Option<&char> {
self.chars.peek()
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.current {
if !c.is_whitespace() {
break;
}
if c == '\n' {
self.line += 1;
}
self.advance();
}
}
fn read_identifier(&mut self) -> String {
let mut ident = String::new();
while let Some(&c) = self.peek() {
if c.is_alphanumeric() || c == '_' {
ident.push(c);
self.advance();
} else {
break;
}
}
ident
}
fn read_number(&mut self) -> i64 {
let mut num_str = String::new();
while let Some(&c) = self.peek() {
if c.is_ascii_digit() {
num_str.push(c);
self.advance();
} else {
break;
}
}
num_str.parse().unwrap_or(0)
}
fn match_next(&mut self, expected: char) -> bool {
match self.peek() {
Some(&c) if c == expected => {
self.advance();
true
}
_ => false,
}
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace();
let token = match self.current {
Some('(') => Token::LeftParen,
Some(')') => Token::RightParen,
Some('{') => Token::LeftBrace,
Some('}') => Token::RightBrace,
Some(';') => Token::Semicolon,
Some(':') => Token::Colon,
Some(',') => Token::Comma,
Some('|') => Token::Pipe,
Some('+') => Token::Plus,
Some('-') => Token::Minus,
Some('*') => Token::Star,
Some('/') => Token::Slash,
Some('!') => {
if self.match_next('=') {
Token::BangEqual
} else {
Token::Bang
}
}
Some('=') => {
if self.match_next('=') {
Token::EqualEqual
} else {
Token::Assign
}
}
Some('<') => {
if self.match_next('=') {
Token::LessEqual
} else {
Token::Less
}
}
Some('>') => {
if self.match_next('=') {
Token::GreaterEqual
} else {
Token::Greater
}
}
Some('"') => {
self.advance(); // Skip the opening quote
let mut s = String::new();
while let Some(c) = self.current {
if c == '"' {
break;
}
s.push(c);
self.advance();
}
Token::String(s)
}
Some(c) => {
if c.is_alphabetic() || c == '_' {
let mut ident = c.to_string();
ident.push_str(&self.read_identifier());
match ident.as_str() {
"if" => Token::If,
"else" => Token::Else,
"loop" => Token::Loop,
"break" => Token::Break,
"return" => Token::Return,
"continue" => Token::Continue,
_ => Token::Identifier(ident),
}
} else if c.is_ascii_digit() {
Token::Number(self.read_number())
} else {
// Skip unknown characters for now
self.advance();
return self.next_token();
}
}
None => Token::Eof,
};
if token != Token::Eof {
self.advance();
}
token
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
match self.next_token() {
Token::Eof => None,
token => Some(token),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_keywords() {
let input = "if else loop break return continue";
let mut lexer = Lexer::new(input);
assert_eq!(lexer.next_token(), Token::If);
assert_eq!(lexer.next_token(), Token::Else);
assert_eq!(lexer.next_token(), Token::Loop);
assert_eq!(lexer.next_token(), Token::Break);
assert_eq!(lexer.next_token(), Token::Return);
assert_eq!(lexer.next_token(), Token::Continue);
assert_eq!(lexer.next_token(), Token::Eof);
}
#[test]
fn test_identifiers_and_numbers() {
let input = "x y42 _test 123 45";
let mut lexer = Lexer::new(input);
assert_eq!(lexer.next_token(), Token::Identifier("x".to_string()));
assert_eq!(lexer.next_token(), Token::Identifier("y42".to_string()));
assert_eq!(lexer.next_token(), Token::Identifier("_test".to_string()));
assert_eq!(lexer.next_token(), Token::Number(123));
assert_eq!(lexer.next_token(), Token::Number(45));
assert_eq!(lexer.next_token(), Token::Eof);
}
#[test]
fn test_operators() {
let input = "= == ! != < <= > >=";
let mut lexer = Lexer::new(input);
assert_eq!(lexer.next_token(), Token::Assign);
assert_eq!(lexer.next_token(), Token::EqualEqual);
assert_eq!(lexer.next_token(), Token::Bang);
assert_eq!(lexer.next_token(), Token::BangEqual);
assert_eq!(lexer.next_token(), Token::Less);
assert_eq!(lexer.next_token(), Token::LessEqual);
assert_eq!(lexer.next_token(), Token::Greater);
assert_eq!(lexer.next_token(), Token::GreaterEqual);
assert_eq!(lexer.next_token(), Token::Eof);
}
#[test]
fn test_example_syntax() {
let input = r#"
main: Func = | x: U32, y: U32 | {
res = add(x, y);
print(res);
if res > 10 {
print("res is greater than 10");
}
}
"#;
let mut lexer = Lexer::new(input);
// Skip whitespace and newlines
while let Some(c) = lexer.current {
if !c.is_whitespace() {
break;
}
lexer.advance();
}
// Test the first few tokens
assert_eq!(lexer.next_token(), Token::Identifier("main".to_string()));
assert_eq!(lexer.next_token(), Token::Colon);
assert_eq!(lexer.next_token(), Token::Identifier("Func".to_string()));
assert_eq!(lexer.next_token(), Token::Assign);
assert_eq!(lexer.next_token(), Token::Pipe);
assert_eq!(lexer.next_token(), Token::Identifier("x".to_string()));
assert_eq!(lexer.next_token(), Token::Colon);
assert_eq!(lexer.next_token(), Token::Identifier("U32".to_string()));
assert_eq!(lexer.next_token(), Token::Comma);
// The rest of the tokens would be tested similarly
}
}
+25
View File
@@ -0,0 +1,25 @@
use std::{fs, path::Path};
pub mod lexer;
pub mod parser;
fn main() {
println!("Hello, world!");
let path = Path::new("../resources/dsc/example.dsc");
let contents = fs::read_to_string(path).expect("Failed to read file");
let lexer = lexer::Lexer::new(&contents);
let tokens = lexer.collect::<Vec<_>>();
println!("{tokens:?}");
let mut parser = parser::Parser::new(tokens);
let ast = match parser.parse() {
Ok(ast) => ast,
Err(e) => {
eprintln!("Error: {e:?}");
return;
}
};
println!("{ast:?}");
}
+146
View File
@@ -0,0 +1,146 @@
use crate::expect_type;
use crate::lexer::Token;
pub struct Parser {
ast: Node,
idx: usize,
tokens: Vec<Token>,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self {
ast: Node::Scope {
children: Vec::new(),
},
idx: 0,
tokens,
}
}
pub fn parse(&mut self) -> Result<Node, CompileError> {
let mut statements = Vec::new();
while let Some(_) = self.peek_next() {
statements.push(self.parse_statement()?);
}
Ok(Node::Scope {
children: statements,
})
}
fn parse_statement(&mut self) -> Result<Node, CompileError> {
// first token in a statement is always an identifier
let left = if let Ok(typed_var) = self.parse_typed_var() {
Box::new(typed_var)
} else {
let tok = expect_type!(self.next()?, Identifier)?;
Box::new(Node::Terminal { value: tok })
};
let _ = expect_type!(self.next()?, Assign)?;
let right = Box::new(self.parse_expression()?);
Ok(Node::Statement { left, right })
}
fn parse_typed_var(&mut self) -> Result<Node, CompileError> {
let name = expect_type!(self.next()?, Identifier)?;
let _ = expect_type!(self.next()?, Colon)?;
let type_ = expect_type!(self.next()?, Identifier)?;
Ok(Node::TypedVar { name, type_ })
}
fn parse_expression(&mut self) -> Result<Node, CompileError> {
Err(CompileError::Generic)
}
fn next(&mut self) -> Result<Token, CompileError> {
if self.idx >= self.tokens.len() {
return Err(CompileError::UnexpectedEOF);
}
let token = self.tokens[self.idx].clone();
self.idx += 1;
Ok(token)
}
fn peek_next(&mut self) -> Option<Token> {
if self.idx >= self.tokens.len() {
return None;
}
Some(self.tokens[self.idx].clone())
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Node {
Scope {
children: Vec<Node>,
},
Terminal {
value: Token,
},
UnaryOp {
op: Token,
right: Box<Node>,
},
BinaryOp {
left: Box<Node>,
op: Token,
right: Box<Node>,
},
Statement {
left: Box<Node>,
right: Box<Node>,
},
If {
condition: Box<Node>,
then_branch: Box<Node>,
else_branch: Option<Box<Node>>,
},
FunctionDef {
params: Vec<Node>,
body: Box<Node>,
},
TypedVar {
name: Token,
type_: Token,
},
TypeDef {
name: Token,
fields: Vec<Node>,
},
}
#[derive(Debug)]
pub enum CompileError {
Generic,
ExpectedToken { expected: String, found: Token },
UnexpectedEOF,
}
#[macro_export]
macro_rules! expect_type {
($token:expr, $($variant:ident),+) => {{
match $token.tt() {
$(
stringify!($variant) => Ok($token.clone()),
)+
_ => {
// return an expected token error
let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | "));
Err(CompileError::ExpectedToken {
expected,
found: $token.clone(),
})
}
}
}};
}
+8
View File
@@ -0,0 +1,8 @@
main: Func = | x: U32, y: U32 | {
res = add(x, y);
print(res);
if res > 10 {
print("res is greater than 10");
}
}