tokeniser: bugfixes to comma handling, regexes
TODO: Verify output is as expected, perhaps I can dump to file and compare token stream with known valid one? Will add some extra tests of course!
This commit is contained in:
@@ -58,7 +58,7 @@ impl Tokeniser {
|
|||||||
label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):")
|
label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):")
|
||||||
.expect("Failed to compile label regex pattern"),
|
.expect("Failed to compile label regex pattern"),
|
||||||
register_regex: Regex::new(
|
register_regex: Regex::new(
|
||||||
r"^(rg([0-9]|[a-f])|acc|spr|bpr|ret|idr|mmr|zero|noreg|pcx)\b",
|
r"^(rg[0-9a-f]+|acc|spr|bpr|ret|idr|mmr|zero|noreg|pcx)\b",
|
||||||
)
|
)
|
||||||
.expect("Failed to compile register regex pattern"),
|
.expect("Failed to compile register regex pattern"),
|
||||||
immediate_regex: Regex::new(
|
immediate_regex: Regex::new(
|
||||||
@@ -71,7 +71,7 @@ impl Tokeniser {
|
|||||||
r"^(nop|movs?|ld[bhw]s?|st[bhw]|l[lu]i|j(mp|[egl][qte])|cmp|[id]nc|sh[lr]|add[i]?|sub[i]?|x?n?or|and|not|i[rd]t|hlt|lhwmm|lidt|push[a]?|pop[a]?|lwi|return|call)\b",
|
r"^(nop|movs?|ld[bhw]s?|st[bhw]|l[lu]i|j(mp|[egl][qte])|cmp|[id]nc|sh[lr]|add[i]?|sub[i]?|x?n?or|and|not|i[rd]t|hlt|lhwmm|lidt|push[a]?|pop[a]?|lwi|return|call)\b",
|
||||||
)
|
)
|
||||||
.expect("Failed to compile instruction regex pattern"),
|
.expect("Failed to compile instruction regex pattern"),
|
||||||
symbol_regex: Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*:{2}[a-zA-Z0-9_]*|[a-zA-Z_][a-zA-Z_0-9]*")
|
symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)::{2}([a-zA-Z0-9_]*)|([a-zA-Z_][a-zA-Z0-9_]*)")
|
||||||
.expect("Failed to compile symbol regex pattern"),
|
.expect("Failed to compile symbol regex pattern"),
|
||||||
comment_regex: Regex::new("^//.*")
|
comment_regex: Regex::new("^//.*")
|
||||||
.expect("Failed to compile comment regex pattern"),
|
.expect("Failed to compile comment regex pattern"),
|
||||||
@@ -217,16 +217,11 @@ impl Tokeniser {
|
|||||||
|
|
||||||
fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> {
|
fn try_match_register(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||||
let caps = self.register_regex.captures(input)?;
|
let caps = self.register_regex.captures(input)?;
|
||||||
let reg = caps.get(1)?.as_str();
|
|
||||||
|
let captured_group = caps.get(1)?.as_str();
|
||||||
let len = caps.get(0)?.len();
|
let len = caps.get(0)?.len();
|
||||||
|
|
||||||
let reg = match Register::try_from(reg) {
|
let reg = Register::try_from(captured_group).ok()?;
|
||||||
Ok(reg) => reg,
|
|
||||||
Err(_why) => {
|
|
||||||
// Probably ignore the error.
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
Some((TokenType::Register(RegisterToken { reg }), len))
|
Some((TokenType::Register(RegisterToken { reg }), len))
|
||||||
}
|
}
|
||||||
@@ -274,9 +269,18 @@ impl Tokeniser {
|
|||||||
|
|
||||||
fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> {
|
fn try_match_symbol(&self, input: &str) -> Option<(TokenType, usize)> {
|
||||||
let caps = self.symbol_regex.captures(input)?;
|
let caps = self.symbol_regex.captures(input)?;
|
||||||
let name = caps.get(1)?.as_str().to_string();
|
|
||||||
let len = caps.get(0)?.len();
|
let len = caps.get(0)?.len();
|
||||||
|
|
||||||
|
// Check which capture group matched.
|
||||||
|
let name = if let Some(scoped_name) = caps.get(1) {
|
||||||
|
// Matched the scoped symbol pattern (name::scope).
|
||||||
|
format!("{}::{}", scoped_name.as_str(), caps.get(2)?.as_str())
|
||||||
|
} else if let Some(simple_name) = caps.get(3) {
|
||||||
|
simple_name.as_str().to_string()
|
||||||
|
} else {
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
|
||||||
Some((TokenType::Symbol(SymbolToken { name }), len))
|
Some((TokenType::Symbol(SymbolToken { name }), len))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -357,6 +361,12 @@ impl Tokeniser {
|
|||||||
line_number: usize,
|
line_number: usize,
|
||||||
column: usize,
|
column: usize,
|
||||||
) -> Result<(TokenType, usize), AssembleError> {
|
) -> Result<(TokenType, usize), AssembleError> {
|
||||||
|
dbg!(input);
|
||||||
|
|
||||||
|
if input.starts_with(',') {
|
||||||
|
return Ok((TokenType::Comma, 1));
|
||||||
|
}
|
||||||
|
|
||||||
// Check for string first (including multiline continuations).
|
// Check for string first (including multiline continuations).
|
||||||
if let Some(m) = self.try_match_string(input, line_number, column) {
|
if let Some(m) = self.try_match_string(input, line_number, column) {
|
||||||
return Ok(m);
|
return Ok(m);
|
||||||
@@ -390,36 +400,20 @@ impl Tokeniser {
|
|||||||
return Ok(m);
|
return Ok(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut idx_iter = 0..;
|
let mut idx_iter = (column + 1)..;
|
||||||
|
|
||||||
// Handle miscellaneous characters.
|
|
||||||
match input.chars().next() {
|
|
||||||
Some(',') => {
|
|
||||||
_ = idx_iter.next();
|
|
||||||
|
|
||||||
Ok((TokenType::Comma, 1))
|
|
||||||
}
|
|
||||||
Some(c) => {
|
|
||||||
let Some(idx) = idx_iter.next() else {
|
let Some(idx) = idx_iter.next() else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
|
|
||||||
let source =
|
let source = SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
|
||||||
SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
|
|
||||||
|
|
||||||
|
// Handle miscellaneous characters.
|
||||||
|
if let Some(c) = input.chars().next() {
|
||||||
Err(AssembleError::new_source_error(
|
Err(AssembleError::new_source_error(
|
||||||
source,
|
source,
|
||||||
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedChar(c)),
|
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedChar(c)),
|
||||||
))
|
))
|
||||||
}
|
} else {
|
||||||
None => {
|
|
||||||
let Some(idx) = idx_iter.next() else {
|
|
||||||
unreachable!()
|
|
||||||
};
|
|
||||||
|
|
||||||
let source =
|
|
||||||
SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
|
|
||||||
|
|
||||||
Err(AssembleError::new_source_error(
|
Err(AssembleError::new_source_error(
|
||||||
source,
|
source,
|
||||||
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedEndOfInput(
|
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedEndOfInput(
|
||||||
@@ -428,5 +422,4 @@ impl Tokeniser {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user