added some documentation and started on compiler for custom language (not C) based on previous prototypes. pretty broken state rn.

This commit is contained in:
2026-02-01 22:16:09 +00:00
parent 52ef7872f0
commit 8f7163c459
9 changed files with 1750 additions and 17 deletions
+21 -5
View File
@@ -4,12 +4,15 @@ use std::str::Chars;
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
// Keywords
Fn,
Let,
If,
Else,
Loop,
Break,
Return,
Continue,
Include,
// Identifiers and literals
Identifier(String),
@@ -24,7 +27,7 @@ pub enum Token {
Semicolon, // ;
Colon, // :
Comma, // ,
Pipe, // |
// Pipe, // |
// Operators
Plus, // +
@@ -39,6 +42,7 @@ pub enum Token {
LessEqual, // <=
Greater, // >
GreaterEqual, // >=
RightArrow, // ->
// Special
Eof,
@@ -47,7 +51,10 @@ pub enum Token {
impl Token {
pub fn tt(&self) -> &str {
match self {
Token::Include => "Include",
Token::Fn => "Fn",
Token::If => "If",
Token::Let => "Let",
Token::Else => "Else",
Token::Loop => "Loop",
Token::Break => "Break",
@@ -63,7 +70,8 @@ impl Token {
Token::Semicolon => "Semicolon",
Token::Colon => "Colon",
Token::Comma => "Comma",
Token::Pipe => "Pipe",
Token::RightArrow => "RightArrow",
// Token::Pipe => "Pipe",
Token::Plus => "Plus",
Token::Minus => "Minus",
Token::Star => "Star",
@@ -168,11 +176,17 @@ impl<'a> Lexer<'a> {
Some(';') => Token::Semicolon,
Some(':') => Token::Colon,
Some(',') => Token::Comma,
Some('|') => Token::Pipe,
// Some('|') => Token::Pipe,
Some('+') => Token::Plus,
Some('-') => Token::Minus,
Some('*') => Token::Star,
Some('/') => Token::Slash,
Some('-') => {
if self.match_next('>') {
Token::RightArrow
} else {
Token::Minus
}
}
Some('!') => {
if self.match_next('=') {
Token::BangEqual
@@ -218,12 +232,14 @@ impl<'a> Lexer<'a> {
let mut ident = c.to_string();
ident.push_str(&self.read_identifier());
match ident.as_str() {
"fn" => Token::Fn,
"if" => Token::If,
"else" => Token::Else,
"loop" => Token::Loop,
"break" => Token::Break,
"return" => Token::Return,
"continue" => Token::Continue,
"include" => Token::Include,
_ => Token::Identifier(ident),
}
} else if c.is_ascii_digit() {
@@ -331,7 +347,7 @@ mod tests {
assert_eq!(lexer.next_token(), Token::Colon);
assert_eq!(lexer.next_token(), Token::Identifier("Func".to_string()));
assert_eq!(lexer.next_token(), Token::Assign);
assert_eq!(lexer.next_token(), Token::Pipe);
// assert_eq!(lexer.next_token(), Token::Pipe);
assert_eq!(lexer.next_token(), Token::Identifier("x".to_string()));
assert_eq!(lexer.next_token(), Token::Colon);
assert_eq!(lexer.next_token(), Token::Identifier("U32".to_string()));
+12 -4
View File
@@ -1,7 +1,12 @@
#![feature(try_trait_v2)]
use std::{fs, path::Path};
pub mod lexer;
pub mod parser;
pub mod parserprototype;
use parserprototype::Parser;
use crate::parserprototype::ParseResult;
fn main() {
println!("Hello, world!");
@@ -13,13 +18,16 @@ fn main() {
let tokens = lexer.collect::<Vec<_>>();
println!("{tokens:?}");
let mut parser = parser::Parser::new(tokens);
let mut parser = Parser::new(tokens);
let ast = match parser.parse() {
Ok(ast) => ast,
Err(e) => {
ParseResult::Accept(ast) => ast,
ParseResult::Reject(e) => {
eprintln!("Error: {e:?}");
return;
}
ParseResult::Deny => {
panic!("Parser denied parsing")
}
};
println!("{ast:?}");
}
+435
View File
@@ -0,0 +1,435 @@
use crate::lexer::Token;
use crate::{expect_tt, expect_value};
use core::fmt;
use std::ops::{ControlFlow, FromResidual, Try};
#[derive(Debug, Clone)]
pub enum ParseResult<T, E> {
Accept(T),
Deny,
Reject(E),
}
#[derive(Debug, Clone)]
pub enum CompilerError {
UnexpectedToken(Token),
UnexpectedEndOfInput,
UnexpectedCharacter(char),
InvalidSyntax(String),
Generic(String),
}
pub struct Parser {
tokens: Vec<Token>,
idx: usize,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, idx: 0 }
}
pub fn parse(&mut self) -> ParseResult<Program, CompilerError> {
let mut declarations = Vec::new();
while let ParseResult::Accept(_) = self.peek_next() {
declarations.push(self.parse_declaration()?);
}
ParseResult::Accept(Program {
imports: vec![],
declarations,
})
}
fn parse_declaration(&mut self) -> ParseResult<Declaration, CompilerError> {
if expect_tt!(self.peek_next()?, Fn).accepted() {
let x = self.parse_func();
println!("function {:?}", x);
return x;
}
println!("{:?}", self.peek_next()?);
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
}
fn parse_func(&mut self) -> ParseResult<Declaration, CompilerError> {
// expect function keyword
//
println!("pre name! {:?}", self.peek_next()?);
let _ = expect_tt!(self.next()?, Fn);
println!("this is the name! {:?}", self.peek_next()?);
// expect function name
let name = match self.next()? {
Token::Identifier(name) => name,
id => return ParseResult::Reject(CompilerError::UnexpectedToken(id)),
};
// expect left paren
let _ = expect_tt!(self.next()?, LParen);
let mut params = Vec::new();
while expect_tt!(self.peek_next()?, Identifier).accepted() {
let arg = self.parse_var_decl()?;
params.push(arg);
}
// expect right paren
let _ = expect_tt!(self.next()?, RParen);
// see if we can parse the return type!
let mut return_type = TypeId::Void;
if expect_tt!(self.peek_next()?, RightArrow).accepted() {
let _ = self.next();
return_type = self.parse_type()?;
}
// expect left brace
let _ = expect_tt!(self.next()?, LBrace);
let mut body = Vec::new();
// expect right brace
let _ = expect_tt!(self.next()?, RBrace);
ParseResult::Accept(Declaration::Function {
name,
params,
return_type,
body,
})
}
fn parse_var_decl(&mut self) -> ParseResult<Variable, CompilerError> {
let name = match self.next()? {
Token::Identifier(name) => name,
id => return ParseResult::Reject(CompilerError::UnexpectedToken(id)),
};
let _ = expect_tt!(self.next()?, Colon);
let type_ = self.parse_type()?;
ParseResult::Accept(Variable {
name,
param_type: Some(type_),
})
}
fn parse_type(&mut self) -> ParseResult<TypeId, CompilerError> {
// get the type name incl namespace
let typename = self.parse_identifier()?;
match typename.name.as_str() {
"u32" => ParseResult::Accept(TypeId::U32),
"u16" => ParseResult::Accept(TypeId::U16),
"u8" => ParseResult::Accept(TypeId::U8),
"i32" => ParseResult::Accept(TypeId::I32),
"i16" => ParseResult::Accept(TypeId::I16),
"i8" => ParseResult::Accept(TypeId::I8),
"void" => ParseResult::Accept(TypeId::Void),
"char" => ParseResult::Accept(TypeId::Char),
_ => todo!("Implement parsing for other types!!"),
}
}
fn parse_identifier(&mut self) -> ParseResult<Name, CompilerError> {
let primary = match self.next()? {
Token::Identifier(namespace) => namespace,
id => return ParseResult::Reject(CompilerError::UnexpectedToken(id)),
};
if expect_tt!(self.peek_next()?, Colon).accepted() {
let _ = expect_tt!(self.next()?, Colon);
let _ = expect_tt!(self.next()?, Colon);
let secondary = match self.next()? {
Token::Identifier(name) => name,
id => return ParseResult::Reject(CompilerError::UnexpectedToken(id)),
};
ParseResult::Accept(Name {
namespace: Some(primary),
name: secondary,
})
} else {
ParseResult::Accept(Name {
namespace: None,
name: primary,
})
}
}
fn next(&mut self) -> ParseResult<Token, CompilerError> {
if self.idx >= self.tokens.len() {
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
} else {
let token = self.tokens[self.idx].clone();
println!("NEXT {:?}", token);
self.idx += 1;
ParseResult::Accept(token)
}
}
fn peek_next(&self) -> ParseResult<Token, CompilerError> {
if self.idx >= self.tokens.len() {
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
} else {
ParseResult::Accept(self.tokens[self.idx].clone())
}
}
}
#[derive(Debug, Clone)]
pub struct Program {
pub imports: Vec<Dependency>,
pub declarations: Vec<Declaration>,
}
#[derive(Debug, Clone)]
pub enum Declaration {
Function {
name: String,
return_type: TypeId,
params: Vec<Variable>,
body: Block,
},
Variable {
name: String,
init: Option<ConstExpr>,
},
}
#[derive(Debug, Clone)]
pub struct Dependency {
pub name: String,
pub path: String,
}
#[derive(Debug, Clone)]
pub struct Variable {
pub name: String,
pub param_type: Option<TypeId>,
}
#[derive(Debug, Clone)]
pub struct Name {
pub name: String,
pub namespace: Option<String>,
}
#[derive(Debug, Clone)]
pub enum TypeId {
U8,
U16,
U32,
I8,
I16,
I32,
Char,
Void,
Ptr(Box<TypeId>),
Ref(Box<TypeId>),
Array(Box<TypeId>, usize),
Struct {
name: Name,
fields: Vec<(String, TypeId)>,
},
}
pub type Block = Vec<Statement>;
#[derive(Debug, Clone)]
pub enum Statement {
Block(Block),
Assign {
var: Variable,
value: Option<Box<Expression>>,
},
Expression {
expr: Expression,
},
If {
condition: Expression,
then_stmt: Block,
else_stmt: Block,
},
While {
condition: Expression,
body: Vec<Statement>,
},
Loop(Block),
Break,
Continue,
Return(Option<Expression>),
}
#[derive(Debug, Clone)]
pub enum ConstExpr {
Number(i32),
String(String),
}
impl fmt::Display for ConstExpr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ConstExpr::Number(n) => write!(f, "{}", n),
ConstExpr::String(s) => write!(f, "\"{}\"", s),
}
}
}
#[derive(Debug, Clone)]
pub enum Expression {
Empty,
Binary {
op: BinaryOperator,
left: Box<Expression>,
right: Box<Expression>,
},
Unary {
op: UnaryOperator,
operand: Box<Expression>,
},
Variable {
name: Name,
expr_type: Option<TypeId>,
},
Number {
value: i32,
},
Call {
name: Name,
args: Vec<Expression>,
},
}
#[derive(Debug, Clone, PartialEq)]
pub enum BinaryOperator {
Add,
Sub,
Mul,
Div,
Eq,
Ne,
Lt,
Gt,
Le,
Ge,
}
impl fmt::Display for BinaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
BinaryOperator::Add => write!(f, "+"),
BinaryOperator::Sub => write!(f, "-"),
BinaryOperator::Mul => write!(f, "*"),
BinaryOperator::Div => write!(f, "/"),
BinaryOperator::Eq => write!(f, "=="),
BinaryOperator::Ne => write!(f, "!="),
BinaryOperator::Lt => write!(f, "<"),
BinaryOperator::Gt => write!(f, ">"),
BinaryOperator::Le => write!(f, "<="),
BinaryOperator::Ge => write!(f, ">="),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum UnaryOperator {
Plus,
Minus,
}
impl fmt::Display for UnaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
UnaryOperator::Plus => write!(f, "+"),
UnaryOperator::Minus => write!(f, "-"),
}
}
}
impl<T, E> ParseResult<T, E> {
pub fn accepted(&self) -> bool {
matches!(self, ParseResult::Accept(_))
}
}
pub enum ParseResultResidual<T> {
Deny,
Reject(T),
}
impl<T, E> Try for ParseResult<T, E> {
type Output = T;
type Residual = ParseResultResidual<E>;
fn from_output(output: T) -> Self {
ParseResult::Accept(output)
}
fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
match self {
ParseResult::Accept(v) => ControlFlow::Continue(v),
ParseResult::Deny => ControlFlow::Break(ParseResultResidual::Deny),
ParseResult::Reject(e) => ControlFlow::Break(ParseResultResidual::Reject(e)),
}
}
}
impl<T, E> FromResidual for ParseResult<T, E> {
fn from_residual(residual: ParseResultResidual<E>) -> Self {
match residual {
ParseResultResidual::Deny => ParseResult::Deny,
ParseResultResidual::Reject(e) => ParseResult::Reject(e),
}
}
}
#[macro_export]
macro_rules! expect_tt {
($token:expr, $($variant:ident),+) => {{
let tt = $token.tt().to_string();
// for some reason the code trips tf out without this line
println!("token {:?}", $token);
let mut vs = String::new();
$(
let s = stringify!($variant);
vs.push_str(s);
vs.push_str("|");
)+
match tt.as_str() {
$(
stringify!($variant) => ParseResult::Accept($token.clone()),
)+
_ => {
println!("EXPECTED!! {} [{}]", tt, vs);
// let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | "));
ParseResult::Reject(CompilerError::UnexpectedToken($token.clone()))
}
}
}};
}
#[macro_export]
macro_rules! expect_value {
($token:expr, $variant:expr) => {{
match $token {
$variant(x) => ParseResult::Accept(x),
_ => {
let expected = format!("[{}]")
ParseResult::Reject(CompilerError::UnexpectedToken($token.clone()))
}
}
}};
}