- optimised main emulator loop, allowing updates only once every roughly 32,000 instructions.

- optimised memory access patterns, removing unecessary mutability and accesses.
- replaced the standard HashMap with an implementation that uses a faster hashing algorithm.

results:

before:
    - our benchmark program with ~4m instructions would take around for their data to make it to the UI, and a bit over 200ms to actually run

after:
    - our benchmark program with ~4m instructions can run in around 75ms, and the UI receives the update almost instantly.

conclusion:
- emulator performance should be around 2-3x faster than before.
This commit is contained in:
2025-06-28 03:21:46 +01:00
parent eaaefd1b07
commit 56d2abe17f
8 changed files with 105 additions and 121 deletions
Generated
+3 -2
View File
@@ -783,9 +783,9 @@ dependencies = [
[[package]] [[package]]
name = "discord-presence" name = "discord-presence"
version = "1.6.0" version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91d7c2fc01ffdc327e2b66d65dd59b8bd3f31a17e88811ce0540412fa0b84c1" checksum = "91948dab8ccaaefeb7be5f21714ed774411df66830def1f99dd74e7a85f5f111"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"bytes", "bytes",
@@ -1027,6 +1027,7 @@ dependencies = [
"eframe", "eframe",
"egui", "egui",
"egui_file", "egui_file",
"rustc-hash 2.1.1",
"serde", "serde",
"toml", "toml",
"winit", "winit",
+4
View File
@@ -15,3 +15,7 @@ panic = "abort" # Cranelift does not support stack unwinds.
lto = false lto = false
debug = true debug = true
incremental = false # sccache does not support caching incremental crates. incremental = false # sccache does not support caching incremental crates.
[profile.release]
debug = true
lto = "fat"
+1
View File
@@ -0,0 +1 @@
disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"]
+2 -1
View File
@@ -19,10 +19,11 @@ assembler = { path = "../assembler" }
dsa_editor = { path = "../dsa_editor" } dsa_editor = { path = "../dsa_editor" }
egui = "0.31.1" egui = "0.31.1"
dirs = "6.0.0" dirs = "6.0.0"
discord-presence = { version = "1.6.0", optional = true } discord-presence = { version = "2.0.0", optional = true }
toml = { version = "0.8.23", optional = true } toml = { version = "0.8.23", optional = true }
serde = { version = "1.0.219", features = ["derive"], optional = true } serde = { version = "1.0.219", features = ["derive"], optional = true }
egui_file = "0.22.1" egui_file = "0.22.1"
rustc-hash = "2.1.1"
[features] [features]
default = ["config"] default = ["config"]
+17 -31
View File
@@ -36,7 +36,9 @@ pub fn run_emulator(
let mut update = false; let mut update = false;
loop { loop {
let cmd = if running == Running::Running || step > 0 { let cmd = if step > 0 {
None
} else if running == Running::Running && step == 0 {
match cmd_rx.try_recv() { match cmd_rx.try_recv() {
Ok(cmd) => Some(cmd), Ok(cmd) => Some(cmd),
Err(mpsc::TryRecvError::Empty) => { Err(mpsc::TryRecvError::Empty) => {
@@ -52,10 +54,15 @@ pub fn run_emulator(
} }
}; };
if running == Running::Running && step == 0 {
step = 32768;
}
if let Some(cmd) = cmd { if let Some(cmd) = cmd {
match cmd { match cmd {
Command::Start => { Command::Start => {
running = Running::Running; running = Running::Running;
step = 32768;
// Update RPC with current state. TODO: Make this only occur on state // Update RPC with current state. TODO: Make this only occur on state
// changes. // changes.
@@ -71,9 +78,11 @@ pub fn run_emulator(
} }
Command::Stop => { Command::Stop => {
running = Running::Paused; running = Running::Paused;
step = 0;
} }
Command::Reset(x) => { Command::Reset(x) => {
running = Running::Paused; running = Running::Paused;
step = 0;
match x { match x {
0 => { 0 => {
@@ -95,20 +104,12 @@ pub fn run_emulator(
} }
Command::Step(x) => { Command::Step(x) => {
step = x; step = x;
running = Running::Paused;
} }
Command::Write(offset, data) => { Command::Write(offset, data) => {
update = true; update = true;
processor processor.memory.write_range(offset, data);
.memory
.write_range(offset, data)
.unwrap_or_else(|_| {
report_err(
state_tx,
"Failed to write memory range!",
&mut processor,
);
});
} }
Command::Interrupt(_interrupt) => { Command::Interrupt(_interrupt) => {
update = true; update = true;
@@ -118,14 +119,7 @@ pub fn run_emulator(
Command::MemRequest(new, size) if update => { Command::MemRequest(new, size) if update => {
addr = new; addr = new;
let _ = state_tx.send(StateUpdate::MemoryView( let _ = state_tx.send(StateUpdate::MemoryView(
processor.memory.read_range(addr, size).unwrap_or_else(|_| { processor.memory.read_range(addr, size),
report_err(
state_tx,
"Failed to read memory range!",
&mut processor,
);
Vec::new()
}),
)); ));
} }
Command::DisplayRequest if update => { Command::DisplayRequest if update => {
@@ -163,26 +157,16 @@ pub fn run_emulator(
let _ = state_tx.send(StateUpdate::Instructions(instruction_count)); let _ = state_tx.send(StateUpdate::Instructions(instruction_count));
} }
Command::WriteBlock(addr, block) => { Command::WriteBlock(addr, block) => {
processor processor.memory.write_range(addr, block.to_vec());
.memory
.write_range(addr, block.to_vec())
.unwrap_or_else(|_| {
report_err(
state_tx,
"Failed to write memory block!",
&mut processor,
);
});
} }
_ => {} _ => {}
} }
} }
if step > 0 { if running == Running::Paused && step > 0 {
step -= 1; step -= 1;
update = true; update = true;
running = Running::Paused;
// Execute one cycle. // Execute one cycle.
match processor.cycle() { match processor.cycle() {
@@ -207,6 +191,7 @@ pub fn run_emulator(
} }
if running == Running::Running { if running == Running::Running {
step -= 1;
update = true; update = true;
// Execute one cycle. // Execute one cycle.
@@ -230,6 +215,7 @@ pub fn run_emulator(
history.push(instruction); history.push(instruction);
if matches!(instruction.1, Instruction::Halt) { if matches!(instruction.1, Instruction::Halt) {
running = Running::Halted; running = Running::Halted;
step = 0;
} }
instruction_count += 1; instruction_count += 1;
+65 -70
View File
@@ -1,53 +1,51 @@
use std::collections::HashMap; use rustc_hash::FxHashMap;
use crate::emulator::system::model::ProcessorError; use crate::emulator::system::model::ProcessorError;
pub trait MemoryUnit: Send + Sync { pub trait MemoryUnit: Send + Sync {
fn reset(&mut self); fn reset(&mut self);
fn read_byte(&mut self, addr: u32) -> Result<u8, ProcessorError>; fn read_byte(&mut self, addr: u32) -> u8;
fn write_byte(&mut self, addr: u32, value: u8) -> Result<(), ProcessorError>; fn write_byte(&mut self, addr: u32, value: u8);
fn read_word(&mut self, addr: u32) -> Result<u32, ProcessorError>; fn read_word(&mut self, addr: u32) -> Result<u32, ProcessorError>;
fn write_word(&mut self, addr: u32, value: u32) -> Result<(), ProcessorError>; fn write_word(&mut self, addr: u32, value: u32) -> Result<(), ProcessorError>;
fn read_range(&mut self, addr: u32, size: u32) -> Result<Vec<u8>, ProcessorError> { fn read_range(&mut self, addr: u32, size: u32) -> Vec<u8> {
let mut data = Vec::with_capacity(size as usize); let mut data = Vec::with_capacity(size as usize);
for i in 0..size { for i in 0..size {
data.push(self.read_byte(addr + i)?); data.push(self.read_byte(addr + i));
} }
Ok(data) data
} }
fn write_range(&mut self, addr: u32, value: Vec<u8>) -> Result<(), ProcessorError> { fn write_range(&mut self, addr: u32, value: Vec<u8>) {
for (i, byte) in value.into_iter().enumerate() { for (i, byte) in value.into_iter().enumerate() {
self.write_byte(addr + i as u32, byte)?; self.write_byte(addr + i as u32, byte);
} }
Ok(())
} }
fn read_block(&mut self, addr: u32) -> Result<[u8; 256], ProcessorError> { fn read_block(&mut self, addr: u32) -> &[u8; 256];
let mut data = [0; 256];
for (i, byte) in data.iter_mut().enumerate() {
*byte = self.read_byte(addr + i as u32)?;
}
Ok(data)
}
fn write_block(&mut self, addr: u32, data: [u8; 256]) -> Result<(), ProcessorError> { fn write_block(&mut self, addr: u32, data: &[u8; 256]) {
for (i, byte) in data.iter().enumerate() { for (i, byte) in data.iter().enumerate() {
self.write_byte(addr + i as u32, *byte)?; self.write_byte(addr + i as u32, *byte);
} }
Ok(())
} }
} }
pub struct MainStore { pub struct MainStore {
pub data: HashMap<u32, Block>, pub data: FxHashMap<u32, Block>,
} }
pub struct Block { pub struct Block {
data: [u8; 256], data: [u8; 256],
} }
impl Default for Block {
fn default() -> Self {
Self { data: [0; 256] }
}
}
impl Default for MainStore { impl Default for MainStore {
fn default() -> Self { fn default() -> Self {
Self::new() Self::new()
@@ -58,79 +56,73 @@ impl MainStore {
#[must_use] #[must_use]
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
data: HashMap::new(), data: FxHashMap::default(),
} }
} }
#[inline]
const fn segment_addr(addr: u32) -> (u32, u8) { const fn segment_addr(addr: u32) -> (u32, u8) {
(addr / 256, (addr % 256) as u8) (addr / 256, (addr % 256) as u8)
} }
#[inline]
fn mut_block(&mut self, addr: u32) -> &mut Block { fn mut_block(&mut self, addr: u32) -> &mut Block {
self.data self.data.entry(addr).or_default()
.entry(addr)
.or_insert_with(|| Block { data: [0; 256] });
self.data.get_mut(&addr).map_or_else(
|| panic!("Could not fetch block with address {addr:x?}"),
|block| block,
)
} }
#[inline]
fn block(&mut self, addr: u32) -> &Block { fn block(&mut self, addr: u32) -> &Block {
self.data self.data.entry(addr).or_default()
.entry(addr)
.or_insert_with(|| Block { data: [0; 256] });
self.data.get(&addr).map_or_else(
|| panic!("Could not fetch block with address {addr:x?}"),
|block| block,
)
} }
} }
impl MemoryUnit for MainStore { impl MemoryUnit for MainStore {
#[inline]
fn reset(&mut self) { fn reset(&mut self) {
self.data.clear(); self.data.clear();
} }
fn read_byte(&mut self, addr: u32) -> Result<u8, ProcessorError> { #[inline]
fn read_byte(&mut self, addr: u32) -> u8 {
let (block_addr, offset) = Self::segment_addr(addr); let (block_addr, offset) = Self::segment_addr(addr);
let block = self.block(block_addr); let block = self.block(block_addr);
Ok(block.data[offset as usize]) block.data[offset as usize]
} }
#[inline]
fn read_word(&mut self, addr: u32) -> Result<u32, ProcessorError> { fn read_word(&mut self, addr: u32) -> Result<u32, ProcessorError> {
if addr % 4 != 0 { if addr % 4 != 0 {
return Err(ProcessorError::BadMemoryAccess(addr)); return Err(ProcessorError::BadMemoryAccess(addr));
} }
let (block_addr, offset) = Self::segment_addr(addr); let (block_addr, offset) = Self::segment_addr(addr);
let block = self.mut_block(block_addr); let offset = offset as usize;
let mut bytes = [0; 4]; let block = self.block(block_addr);
bytes[0] = block.data[offset as usize]; Ok(u32::from_be_bytes(
bytes[1] = block.data[(offset + 1) as usize]; block.data[offset..=offset + 3]
bytes[2] = block.data[(offset + 2) as usize]; .try_into()
bytes[3] = block.data[(offset + 3) as usize]; .expect("Failed to read word!"),
Ok(u32::from_be_bytes(bytes)) ))
} }
fn read_range(&mut self, addr: u32, size: u32) -> Result<Vec<u8>, ProcessorError> { #[inline]
fn read_range(&mut self, addr: u32, size: u32) -> Vec<u8> {
let mut data = Vec::with_capacity(size as usize); let mut data = Vec::with_capacity(size as usize);
for i in 0..size { for i in 0..size {
data.push(self.read_byte(addr + i)?); data.push(self.read_byte(addr + i));
} }
Ok(data) data
} }
fn write_byte(&mut self, addr: u32, value: u8) -> Result<(), ProcessorError> { #[inline]
fn write_byte(&mut self, addr: u32, value: u8) {
let (block_addr, offset) = Self::segment_addr(addr); let (block_addr, offset) = Self::segment_addr(addr);
let block = self.mut_block(block_addr); let block = self.mut_block(block_addr);
block.data[offset as usize] = value; block.data[offset as usize] = value;
Ok(())
} }
#[inline]
fn write_word(&mut self, addr: u32, value: u32) -> Result<(), ProcessorError> { fn write_word(&mut self, addr: u32, value: u32) -> Result<(), ProcessorError> {
if addr % 4 != 0 { if addr % 4 != 0 {
return Err(ProcessorError::BadMemoryAccess(addr)); return Err(ProcessorError::BadMemoryAccess(addr));
@@ -138,33 +130,36 @@ impl MemoryUnit for MainStore {
let (block_addr, offset) = Self::segment_addr(addr); let (block_addr, offset) = Self::segment_addr(addr);
let block = self.mut_block(block_addr); let block = self.mut_block(block_addr);
block.data[offset as usize] = (value >> 24) as u8; block.data[offset as usize..=(offset + 3) as usize]
block.data[(offset + 1) as usize] = (value >> 16) as u8; .copy_from_slice(&value.to_be_bytes());
block.data[(offset + 2) as usize] = (value >> 8) as u8;
block.data[(offset + 3) as usize] = value as u8;
Ok(()) Ok(())
} }
fn write_range(&mut self, addr: u32, value: Vec<u8>) -> Result<(), ProcessorError> { #[inline]
for (i, byte) in value.into_iter().enumerate() { fn write_range(&mut self, addr: u32, value: Vec<u8>) {
let (block_addr, offset) = Self::segment_addr(addr + i as u32); let mut current_block_addr = addr / 256;
let block = self.mut_block(block_addr); let mut current_block = self.mut_block(current_block_addr);
block.data[offset as usize] = byte; let mut offset = addr % 256;
for byte in value {
current_block.data[offset as usize] = byte;
offset += 1;
if offset >= 256 {
offset = 0;
current_block_addr += 1;
current_block = self.mut_block(current_block_addr);
}
}
} }
Ok(()) #[inline]
} fn read_block(&mut self, addr: u32) -> &[u8; 256] {
fn read_block(&mut self, addr: u32) -> Result<[u8; 256], ProcessorError> {
let (block_addr, _) = Self::segment_addr(addr); let (block_addr, _) = Self::segment_addr(addr);
let block = self.block(block_addr); &self.block(block_addr).data
Ok(block.data)
} }
fn write_block(&mut self, addr: u32, data: [u8; 256]) -> Result<(), ProcessorError> { #[inline]
fn write_block(&mut self, addr: u32, data: &[u8; 256]) {
let (block_addr, _) = Self::segment_addr(addr); let (block_addr, _) = Self::segment_addr(addr);
let block = self.mut_block(block_addr); let _ = self.data.insert(block_addr, Block { data: *data });
block.data = data;
Ok(())
} }
} }
@@ -51,7 +51,7 @@ impl Processor {
// Get value from PCX. // Get value from PCX.
let addr = self.fetch()?; let addr = self.fetch()?;
// Increment PCX. // Increment PCX.
self.advance(); self.advance()?;
// Set MAR to the previous value of PCX. // Set MAR to the previous value of PCX.
*self.reg(Register::Mar)? = addr; *self.reg(Register::Mar)? = addr;
@@ -84,7 +84,7 @@ impl Processor {
} }
pub fn display(&mut self) -> Result<Vec<u8>, ProcessorError> { pub fn display(&mut self) -> Result<Vec<u8>, ProcessorError> {
self.memory.read_range(0x20000, 2000) Ok(self.memory.read_range(0x20000, 2000))
} }
pub fn cmp(&mut self, a: u32, b: u32) { pub fn cmp(&mut self, a: u32, b: u32) {
@@ -163,10 +163,10 @@ impl Processor {
let addr = self.get(Register::Spr)?; let addr = self.get(Register::Spr)?;
let size = n * 4; let size = n * 4;
// returns the stack // returns the stack
self.memory.read_range( Ok(self.memory.read_range(
max(addr, 0), // ensures that we cannot read from a negative address max(addr, 0), // ensures that we cannot read from a negative address
min(size, addr), // ensures we don't read above the top of the stack min(size, addr), // ensures we don't read above the top of the stack
) ))
} }
} }
@@ -209,7 +209,7 @@ impl Executable for Instruction {
Self::LoadByte(a) => { Self::LoadByte(a) => {
*cpu.reg(a.r2)? = u32::from( *cpu.reg(a.r2)? = u32::from(
cpu.memory cpu.memory
.read_byte(cpu.get(a.r1)? + u32::from(a.immediate))?, .read_byte(cpu.get(a.r1)? + u32::from(a.immediate)),
); );
} }
@@ -218,7 +218,7 @@ impl Executable for Instruction {
Self::LoadByteSigned(a) => { Self::LoadByteSigned(a) => {
*cpu.reg(a.r2)? = sign_extend(u32::from( *cpu.reg(a.r2)? = sign_extend(u32::from(
cpu.memory cpu.memory
.read_byte(cpu.get(a.r1)? + u32::from(a.immediate))?, .read_byte(cpu.get(a.r1)? + u32::from(a.immediate)),
)); ));
} }
@@ -257,7 +257,7 @@ impl Executable for Instruction {
cpu.memory.write_byte( cpu.memory.write_byte(
cpu.get(a.r2)? + u32::from(a.immediate), cpu.get(a.r2)? + u32::from(a.immediate),
cpu.get(a.r1)? as u8, cpu.get(a.r1)? as u8,
)?; );
} }
// Stores a half-word from SrcReg in memory address (base + offset) The // Stores a half-word from SrcReg in memory address (base + offset) The
@@ -266,9 +266,9 @@ impl Executable for Instruction {
// split the value into bytes and then write two bytes // split the value into bytes and then write two bytes
let bytes = (cpu.get(a.r1)? as u16).to_le_bytes(); let bytes = (cpu.get(a.r1)? as u16).to_le_bytes();
cpu.memory cpu.memory
.write_byte(cpu.get(a.r2)? + u32::from(a.immediate), bytes[0])?; .write_byte(cpu.get(a.r2)? + u32::from(a.immediate), bytes[0]);
cpu.memory cpu.memory
.write_byte(cpu.get(a.r2)? + u32::from(a.immediate) + 1, bytes[1])?; .write_byte(cpu.get(a.r2)? + u32::from(a.immediate) + 1, bytes[1]);
} }
// Stores a word from SrcReg in memory address (base + offset) The effective // Stores a word from SrcReg in memory address (base + offset) The effective
@@ -81,9 +81,7 @@ fn test_mov_signed_instruction() {
fn test_load_byte_instruction() { fn test_load_byte_instruction() {
let mut cpu = create_test_processor(); let mut cpu = create_test_processor();
let addr = 0x100; let addr = 0x100;
cpu.memory cpu.memory.write_byte(addr, 0xAB);
.write_byte(addr, 0xAB)
.expect("Failed to write byte to memory");
*cpu.reg(Register::Rg1).expect("Failed to get register Rg1") = addr - 4; *cpu.reg(Register::Rg1).expect("Failed to get register Rg1") = addr - 4;
let load_byte_instr = Instruction::LoadByte(ITypeArgs::new( let load_byte_instr = Instruction::LoadByte(ITypeArgs::new(
@@ -105,9 +103,7 @@ fn test_load_byte_instruction() {
fn test_load_byte_signed_instruction() { fn test_load_byte_signed_instruction() {
let mut cpu = create_test_processor(); let mut cpu = create_test_processor();
let addr = 0x100; let addr = 0x100;
cpu.memory cpu.memory.write_byte(addr, 0xFF);
.write_byte(addr, 0xFF)
.expect("Failed to write byte to memory");
*cpu.reg(Register::Rg1).expect("Failed to get register Rg1") = addr; *cpu.reg(Register::Rg1).expect("Failed to get register Rg1") = addr;
let load_byte_signed_instr = Instruction::LoadByteSigned(ITypeArgs::new( let load_byte_signed_instr = Instruction::LoadByteSigned(ITypeArgs::new(
@@ -189,7 +185,7 @@ fn test_store_byte_instruction() {
store_byte_instr.execute(&mut cpu).expect( store_byte_instr.execute(&mut cpu).expect(
"Emulator was slain by losing the game while attempting to execute instruction", "Emulator was slain by losing the game while attempting to execute instruction",
); );
assert_eq!(cpu.memory.read_byte(addr).expect("Emulator was slain by losing the game while attempting to execute instruction"), 0xAB); assert_eq!(cpu.memory.read_byte(addr), 0xAB);
} }
#[test] #[test]