tokeniser: bugfixes to comma handling, regexes
TODO: Verify output is as expected, perhaps I can dump to file and compare token stream with known valid one? Will add some extra tests of course!
This commit is contained in:
@@ -58,7 +58,7 @@ impl Tokeniser {
|
||||
label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):")
|
||||
.expect("Failed to compile label regex pattern"),
|
||||
register_regex: Regex::new(
|
||||
r"^(rg([0-9]|[a-f])|acc|spr|bpr|ret|idr|mmr|zero|noreg|pcx)\b",
|
||||
r"^(rg[0-9a-f]+|acc|spr|bpr|ret|idr|mmr|zero|noreg|pcx)\b",
|
||||
)
|
||||
.expect("Failed to compile register regex pattern"),
|
||||
immediate_regex: Regex::new(
|
||||
@@ -71,7 +71,7 @@ impl Tokeniser {
|
||||
r"^(nop|movs?|ld[bhw]s?|st[bhw]|l[lu]i|j(mp|[egl][qte])|cmp|[id]nc|sh[lr]|add[i]?|sub[i]?|x?n?or|and|not|i[rd]t|hlt|lhwmm|lidt|push[a]?|pop[a]?|lwi|return|call)\b",
|
||||
)
|
||||
.expect("Failed to compile instruction regex pattern"),
|
||||
symbol_regex: Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*:{2}[a-zA-Z0-9_]*|[a-zA-Z_][a-zA-Z_0-9]*")
|
||||
symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)::{2}([a-zA-Z0-9_]*)|([a-zA-Z_][a-zA-Z0-9_]*)")
|
||||
.expect("Failed to compile symbol regex pattern"),
|
||||
comment_regex: Regex::new("^//.*")
|
||||
.expect("Failed to compile comment regex pattern"),
|
||||
@@ -217,16 +217,11 @@ impl Tokeniser {
|
||||
|
||||
fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.register_regex.captures(input)?;
|
||||
let reg = caps.get(1)?.as_str();
|
||||
|
||||
let captured_group = caps.get(1)?.as_str();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
let reg = match Register::try_from(reg) {
|
||||
Ok(reg) => reg,
|
||||
Err(_why) => {
|
||||
// Probably ignore the error.
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let reg = Register::try_from(captured_group).ok()?;
|
||||
|
||||
Some((TokenType::Register(RegisterToken { reg }), len))
|
||||
}
|
||||
@@ -274,9 +269,18 @@ impl Tokeniser {
|
||||
|
||||
fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||
let caps = self.symbol_regex.captures(input)?;
|
||||
let name = caps.get(1)?.as_str().to_string();
|
||||
let len = caps.get(0)?.len();
|
||||
|
||||
// Check which capture group matched.
|
||||
let name = if let Some(scoped_name) = caps.get(1) {
|
||||
// Matched the scoped symbol pattern (name::scope).
|
||||
format!("{}::{}", scoped_name.as_str(), caps.get(2)?.as_str())
|
||||
} else if let Some(simple_name) = caps.get(3) {
|
||||
simple_name.as_str().to_string()
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some((TokenType::Symbol(SymbolToken { name }), len))
|
||||
}
|
||||
|
||||
@@ -357,6 +361,12 @@ impl Tokeniser {
|
||||
line_number: usize,
|
||||
column: usize,
|
||||
) -> Result<(TokenType, usize), AssembleError> {
|
||||
dbg!(input);
|
||||
|
||||
if input.starts_with(',') {
|
||||
return Ok((TokenType::Comma, 1));
|
||||
}
|
||||
|
||||
// Check for string first (including multiline continuations).
|
||||
if let Some(m) = self.try_match_string(input, line_number, column) {
|
||||
return Ok(m);
|
||||
@@ -390,43 +400,26 @@ impl Tokeniser {
|
||||
return Ok(m);
|
||||
}
|
||||
|
||||
let mut idx_iter = 0..;
|
||||
let mut idx_iter = (column + 1)..;
|
||||
let Some(idx) = idx_iter.next() else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let source = SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
|
||||
|
||||
// Handle miscellaneous characters.
|
||||
match input.chars().next() {
|
||||
Some(',') => {
|
||||
_ = idx_iter.next();
|
||||
|
||||
Ok((TokenType::Comma, 1))
|
||||
}
|
||||
Some(c) => {
|
||||
let Some(idx) = idx_iter.next() else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let source =
|
||||
SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
|
||||
|
||||
Err(AssembleError::new_source_error(
|
||||
source,
|
||||
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedChar(c)),
|
||||
))
|
||||
}
|
||||
None => {
|
||||
let Some(idx) = idx_iter.next() else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let source =
|
||||
SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
|
||||
|
||||
Err(AssembleError::new_source_error(
|
||||
source,
|
||||
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedEndOfInput(
|
||||
input.len(),
|
||||
)),
|
||||
))
|
||||
}
|
||||
if let Some(c) = input.chars().next() {
|
||||
Err(AssembleError::new_source_error(
|
||||
source,
|
||||
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedChar(c)),
|
||||
))
|
||||
} else {
|
||||
Err(AssembleError::new_source_error(
|
||||
source,
|
||||
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedEndOfInput(
|
||||
input.len(),
|
||||
)),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user