added a (very incomplete) C frontend for DSAC

This commit is contained in:
2026-02-05 01:10:47 +00:00
parent 2f91c4127c
commit c2bf9f6667
3 changed files with 832 additions and 0 deletions
+336
View File
@@ -0,0 +1,336 @@
// ============================================================================
// Token Types
// ============================================================================
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
// Keywords
Int,
If,
Else,
While,
Return,
Include,
// Identifiers and literals
Identifier(String),
Number(i32),
String(String),
Char(char),
// Operators
Plus,
Minus,
Star,
Slash,
Assign,
Eq,
Ne,
Lt,
Gt,
Le,
Ge,
// Delimiters
LParen,
RParen,
LBrace,
RBrace,
Semicolon,
Comma,
Colon,
Namespace,
Eof,
}
#[allow(unused)]
pub enum Type {
Int32,
Int16,
Int8,
Uint32,
Uint16,
Uint8,
Char,
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub line: usize,
pub col: usize,
}
impl Token {
pub fn new(token_type: TokenType, line: usize, col: usize) -> Self {
Self {
token_type,
line,
col,
}
}
}
// ============================================================================
// Lexer
// ============================================================================
pub struct Lexer {
source: Vec<char>,
pos: usize,
line: usize,
col: usize,
}
impl Lexer {
pub fn new(source: &str) -> Self {
Self {
source: source.chars().collect(),
pos: 0,
line: 1,
col: 1,
}
}
fn error(&self, msg: &str) -> String {
format!(
"Lexer error at line {}, col {}: {}",
self.line, self.col, msg
)
}
fn peek(&self, offset: usize) -> Option<char> {
self.source.get(self.pos + offset).copied()
}
fn advance(&mut self) -> Option<char> {
if self.pos >= self.source.len() {
return None;
}
let ch = self.source[self.pos];
self.pos += 1;
if ch == '\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
Some(ch)
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek(0) {
if ch.is_whitespace() {
self.advance();
} else {
break;
}
}
}
fn skip_comment(&mut self) {
if self.peek(0) == Some('/') && self.peek(1) == Some('/') {
while let Some(ch) = self.peek(0) {
if ch == '\n' {
break;
}
self.advance();
}
}
}
fn read_number(&mut self) -> i32 {
let mut num_str = String::new();
while let Some(ch) = self.peek(0) {
if ch.is_ascii_digit() {
num_str.push(ch);
self.advance();
} else {
break;
}
}
num_str.parse().unwrap_or(0)
}
fn read_identifier(&mut self) -> String {
let mut ident = String::new();
while let Some(ch) = self.peek(0) {
if ch.is_alphanumeric() || ch == '_' {
ident.push(ch);
self.advance();
} else {
break;
}
}
ident
}
fn read_string(&mut self) -> Result<String, String> {
let mut string = String::new();
self.advance(); // Consume the opening quote
while let Some(ch) = self.peek(0) {
if ch == '"' {
self.advance(); // Consume the closing quote
return Ok(string);
} else if ch == '\\' {
self.advance(); // Consume the backslash
if let Some(escaped_char) = self.peek(0) {
string.push(escaped_char);
self.advance();
}
} else {
string.push(ch);
self.advance();
}
}
Err(String::from("Unexpected EOF"))
}
fn read_char(&mut self) -> Result<char, String> {
self.advance(); // Consume the opening quote
if let Some(ch) = self.peek(0) {
self.advance();
if self.peek(0) == Some('\'') {
self.advance();
return Ok(ch);
} else {
Err(String::from("expected closing quote"))
}
} else {
Err(String::from("expected character"))
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, String> {
let mut tokens = Vec::new();
loop {
self.skip_whitespace();
self.skip_comment();
if self.pos >= self.source.len() {
break;
}
let line = self.line;
let col = self.col;
let ch = self.peek(0).unwrap();
let token_type = if ch.is_ascii_digit() {
let num = self.read_number();
TokenType::Number(num)
} else if ch == '"' {
let string = self.read_string()?;
TokenType::String(string)
} else if ch == '\'' {
let char = self.read_char()?;
TokenType::Char(char)
} else if ch.is_alphabetic() || ch == '_' {
let ident = self.read_identifier();
match ident.as_str() {
"int" => TokenType::Int,
"if" => TokenType::If,
"else" => TokenType::Else,
"while" => TokenType::While,
"return" => TokenType::Return,
"include" => TokenType::Include,
_ => TokenType::Identifier(ident),
}
} else {
match ch {
':' if self.peek(1) == Some(':') => {
self.advance();
self.advance();
TokenType::Namespace
}
':' => {
self.advance();
TokenType::Colon
}
'=' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Eq
}
'!' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Ne
}
'<' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Le
}
'>' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Ge
}
'+' => {
self.advance();
TokenType::Plus
}
'-' => {
self.advance();
TokenType::Minus
}
'*' => {
self.advance();
TokenType::Star
}
'/' => {
self.advance();
TokenType::Slash
}
'=' => {
self.advance();
TokenType::Assign
}
'<' => {
self.advance();
TokenType::Lt
}
'>' => {
self.advance();
TokenType::Gt
}
'(' => {
self.advance();
TokenType::LParen
}
')' => {
self.advance();
TokenType::RParen
}
'{' => {
self.advance();
TokenType::LBrace
}
'}' => {
self.advance();
TokenType::RBrace
}
';' => {
self.advance();
TokenType::Semicolon
}
',' => {
self.advance();
TokenType::Comma
}
_ => return Err(self.error(&format!("Unexpected character: {}", ch))),
}
};
tokens.push(Token::new(token_type, line, col));
}
tokens.push(Token::new(TokenType::Eof, self.line, self.col));
Ok(tokens)
}
}
+25
View File
@@ -0,0 +1,25 @@
use common::logging::log;
use crate::model::{CompilerError, Program};
use parser::Parser;
pub mod lexer;
pub mod parser;
pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
log("Tokenising Input...");
let mut lexer = lexer::Lexer::new(&input);
let tokens = lexer.tokenize().map_err(|e| CompilerError::Generic(e))?;
// println!("{tokens:?}");
log(&format!("Parsing {} Tokens...", tokens.len()));
let mut parser = Parser::new(tokens);
let ast = match parser.parse() {
Ok(ast) => ast,
Err(e) => return Err(CompilerError::Generic(e)),
};
Ok(ast)
}
+471
View File
@@ -0,0 +1,471 @@
// ============================================================================
// AST Node Types
// ============================================================================
use crate::model::{
BinaryOperator, Block, ConstExpr, Declaration, Dependency, Expression, Name, Program,
Statement, TypeId, UnaryOperator, Variable,
};
use super::lexer::{Token, TokenType};
// ============================================================================
// Parser
// ============================================================================
pub struct Parser {
tokens: Vec<Token>,
pos: usize,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, pos: 0 }
}
fn error(&self, msg: &str) -> String {
let token = self.current();
format!(
"Parser error at line {}, col {}: {}",
token.line, token.col, msg
)
}
fn current(&self) -> &Token {
self.tokens
.get(self.pos)
.unwrap_or_else(|| self.tokens.last().unwrap())
}
fn peek(&self, offset: usize) -> &Token {
self.tokens
.get(self.pos + offset)
.unwrap_or_else(|| self.tokens.last().unwrap())
}
fn advance(&mut self) -> &Token {
if self.pos < self.tokens.len() - 1 {
self.pos += 1;
}
self.current()
}
fn expect(&mut self, expected: TokenType) -> Result<Token, String> {
let token = self.current().clone();
if std::mem::discriminant(&token.token_type) != std::mem::discriminant(&expected)
{
return Err(self.error(&format!(
"Expected {:?}, got {:?}",
expected, token.token_type
)));
}
self.advance();
Ok(token)
}
pub fn parse(&mut self) -> Result<Program, String> {
let mut declarations = Vec::new();
while !matches!(self.current().token_type, TokenType::Eof) {
declarations.push(self.parse_declaration()?);
}
Ok(Program { declarations })
}
fn parse_declaration(&mut self) -> Result<Declaration, String> {
// check for an import
if let TokenType::Include = self.current().token_type {
self.advance();
let name =
if let TokenType::Identifier(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected identifier"))?;
self.advance();
self.expect(TokenType::Colon)?;
let path = if let TokenType::String(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected string literal"))?;
self.advance();
return Ok(Declaration::Dependency(Dependency { name, path }));
}
self.expect(TokenType::Int)?;
let name = match &self.current().token_type {
TokenType::Identifier(s) => s.clone(),
_ => return Err(self.error("Expected identifier")),
};
self.advance();
match &self.current().token_type {
TokenType::LParen => {
// Function declaration
self.advance();
let mut params = Vec::<Variable>::new();
if !matches!(self.current().token_type, TokenType::RParen) {
self.expect(TokenType::Int)?;
match &self.current().token_type {
TokenType::Identifier(s) => {
params.push(Variable {
name: s.clone(),
type_id: TypeId::U32,
});
self.advance();
}
_ => return Err(self.error("Expected parameter name")),
}
while matches!(self.current().token_type, TokenType::Comma) {
self.advance();
self.expect(TokenType::Int)?;
match &self.current().token_type {
TokenType::Identifier(s) => {
params.push(Variable {
name: s.clone(),
type_id: TypeId::U32,
});
self.advance();
}
_ => return Err(self.error("Expected parameter name")),
}
}
}
self.expect(TokenType::RParen)?;
let body = self.parse_block()?;
Ok(Declaration::Function {
name,
params,
body,
return_type: TypeId::U32,
})
}
_ => {
// Variable declaration
let init = if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
if let TokenType::Number(n) = self.current().token_type {
self.advance();
Some(ConstExpr::Number(n))
} else {
return Err(self
.error("Expected constant in global variable declaration"));
}
} else {
None
};
self.expect(TokenType::Semicolon)?;
Ok(Declaration::Variable {
var: Variable {
name,
type_id: TypeId::U32,
},
init,
is_const: false,
})
}
}
}
fn parse_block(&mut self) -> Result<Block, String> {
self.expect(TokenType::LBrace)?;
let mut statements = Vec::new();
while !matches!(self.current().token_type, TokenType::RBrace) {
statements.push(self.parse_statement()?);
}
self.expect(TokenType::RBrace)?;
Ok(statements)
}
fn parse_statement(&mut self) -> Result<Statement, String> {
match &self.current().token_type {
TokenType::LBrace => Ok(Statement::Block(self.parse_block()?)),
TokenType::If => self.parse_if_stmt(),
TokenType::While => self.parse_while_stmt(),
TokenType::Return => self.parse_return_stmt(),
TokenType::Identifier(name) => {
let name = name.clone();
// peek ahead for open paren (func call expr)
if matches!(self.peek(1).token_type, TokenType::LParen) {
let expr = self.parse_expression()?; // a function call expr
self.expect(TokenType::Semicolon)?;
return Ok(Statement::Expression { expr });
}
self.advance(); // advance past identifier
// assignment expression
if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
let expr = self.parse_expression()?;
self.expect(TokenType::Semicolon)?;
Ok(Statement::Assign {
varname: name,
value: expr,
})
}
// var expression
else {
self.expect(TokenType::Semicolon)?;
Ok(Statement::Expression {
expr: Expression::Variable {
name: Name {
name,
namespace: None,
},
expr_type: None,
},
})
}
}
TokenType::Int => {
// Local variable declaration
self.advance();
let name = match &self.current().token_type {
TokenType::Identifier(s) => s.clone(),
_ => return Err(self.error("Expected variable name")),
};
self.advance();
let init = if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::Semicolon)?;
// Convert to assignment expression statement
let expr = if let Some(init_expr) = init {
Statement::Assign {
varname: name,
value: init_expr,
}
} else {
Statement::Assign {
varname: name,
value: Expression::Empty,
}
};
Ok(expr)
}
_ => {
let expr = if matches!(self.current().token_type, TokenType::Semicolon) {
Expression::Empty
} else {
self.parse_expression()?
};
self.expect(TokenType::Semicolon)?;
Ok(Statement::Expression { expr })
}
}
}
fn parse_if_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::If)?;
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let then_stmt = self.parse_block()?;
let else_stmt = if matches!(self.current().token_type, TokenType::Else) {
self.advance();
self.parse_block()?
} else {
Vec::new()
};
Ok(Statement::If {
condition,
then_stmt,
else_stmt,
})
}
fn parse_while_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::While)?;
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let body = self.parse_block()?;
Ok(Statement::While { condition, body })
}
fn parse_return_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::Return)?;
let expr = if matches!(self.current().token_type, TokenType::Semicolon) {
None
} else {
Some(self.parse_expression()?)
};
self.expect(TokenType::Semicolon)?;
Ok(Statement::Return(expr))
}
fn parse_expression(&mut self) -> Result<Expression, String> {
self.parse_comparison()
}
fn parse_comparison(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_additive()?;
while let Some(op) = match &self.current().token_type {
TokenType::Eq => Some(BinaryOperator::Eq),
TokenType::Ne => Some(BinaryOperator::Ne),
TokenType::Lt => Some(BinaryOperator::Lt),
TokenType::Gt => Some(BinaryOperator::Gt),
TokenType::Le => Some(BinaryOperator::Le),
TokenType::Ge => Some(BinaryOperator::Ge),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_additive()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
};
}
Ok(expr)
}
fn parse_additive(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_multiplicative()?;
while let Some(op) = match &self.current().token_type {
TokenType::Plus => Some(BinaryOperator::Add),
TokenType::Minus => Some(BinaryOperator::Sub),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_multiplicative()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
};
}
Ok(expr)
}
fn parse_multiplicative(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_unary()?;
while let Some(op) = match &self.current().token_type {
TokenType::Star => Some(BinaryOperator::Mul),
TokenType::Slash => Some(BinaryOperator::Div),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_unary()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
};
}
Ok(expr)
}
fn parse_unary(&mut self) -> Result<Expression, String> {
let op = match &self.current().token_type {
TokenType::Plus => Some(UnaryOperator::Plus),
TokenType::Minus => Some(UnaryOperator::Minus),
_ => None,
};
if let Some(op) = op {
self.advance();
let operand = Box::new(self.parse_unary()?);
return Ok(Expression::Unary { op, operand });
}
self.parse_primary()
}
fn parse_primary(&mut self) -> Result<Expression, String> {
match &self.current().token_type.clone() {
TokenType::Number(n) => {
let value = *n;
self.advance();
Ok(Expression::Number(value as isize))
}
TokenType::Identifier(name) => {
let name = name.clone();
self.advance();
if matches!(self.current().token_type, TokenType::LParen) {
// Function call
self.advance();
let mut args = Vec::new();
if !matches!(self.current().token_type, TokenType::RParen) {
args.push(self.parse_expression()?);
while matches!(self.current().token_type, TokenType::Comma) {
self.advance();
args.push(self.parse_expression()?);
}
}
self.expect(TokenType::RParen)?;
Ok(Expression::Call {
name: Name {
name,
namespace: None,
},
args,
})
} else {
Ok(Expression::Variable {
name: Name {
name,
namespace: None,
},
expr_type: None,
})
}
}
TokenType::LParen => {
self.advance();
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(expr)
}
_ => Err(self.error(&format!(
"Unexpected token: {:?}",
self.current().token_type
))),
}
}
}