From 56d2abe17f4bb033ed6dcae141fc667d50848a51 Mon Sep 17 00:00:00 2001 From: zxq5 Date: Sat, 28 Jun 2025 03:21:46 +0100 Subject: [PATCH] - optimised main emulator loop, allowing updates only once every roughly 32,000 instructions. - optimised memory access patterns, removing unecessary mutability and accesses. - replaced the standard HashMap with an implementation that uses a faster hashing algorithm. results: before: - our benchmark program with ~4m instructions would take around for their data to make it to the UI, and a bit over 200ms to actually run after: - our benchmark program with ~4m instructions can run in around 75ms, and the UI receives the update almost instantly. conclusion: - emulator performance should be around 2-3x faster than before. --- Cargo.lock | 5 +- Cargo.toml | 8 +- clippy.toml | 1 + emulator/Cargo.toml | 3 +- emulator/src/emulator/system/emulator.rs | 48 +++---- emulator/src/emulator/system/memory.rs | 133 +++++++++--------- emulator/src/emulator/system/processor/mod.rs | 18 +-- .../src/emulator/system/processor/tests.rs | 10 +- 8 files changed, 105 insertions(+), 121 deletions(-) create mode 100644 clippy.toml diff --git a/Cargo.lock b/Cargo.lock index a84889a..1583939 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -783,9 +783,9 @@ dependencies = [ [[package]] name = "discord-presence" -version = "1.6.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91d7c2fc01ffdc327e2b66d65dd59b8bd3f31a17e88811ce0540412fa0b84c1" +checksum = "91948dab8ccaaefeb7be5f21714ed774411df66830def1f99dd74e7a85f5f111" dependencies = [ "byteorder", "bytes", @@ -1027,6 +1027,7 @@ dependencies = [ "eframe", "egui", "egui_file", + "rustc-hash 2.1.1", "serde", "toml", "winit", diff --git a/Cargo.toml b/Cargo.toml index d5039da..3a0bd57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,11 @@ authors = ["zxq5", "nullndvoid"] [profile.dev] codegen-backend = "cranelift" -panic = "abort" # Cranelift does not support stack unwinds. +panic = "abort" # Cranelift does not support stack unwinds. lto = false debug = true -incremental = false # sccache does not support caching incremental crates. +incremental = false # sccache does not support caching incremental crates. + +[profile.release] +debug = true +lto = "fat" diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000..ad9bd11 --- /dev/null +++ b/clippy.toml @@ -0,0 +1 @@ +disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"] diff --git a/emulator/Cargo.toml b/emulator/Cargo.toml index 23f7142..d2037bd 100644 --- a/emulator/Cargo.toml +++ b/emulator/Cargo.toml @@ -19,10 +19,11 @@ assembler = { path = "../assembler" } dsa_editor = { path = "../dsa_editor" } egui = "0.31.1" dirs = "6.0.0" -discord-presence = { version = "1.6.0", optional = true } +discord-presence = { version = "2.0.0", optional = true } toml = { version = "0.8.23", optional = true } serde = { version = "1.0.219", features = ["derive"], optional = true } egui_file = "0.22.1" +rustc-hash = "2.1.1" [features] default = ["config"] diff --git a/emulator/src/emulator/system/emulator.rs b/emulator/src/emulator/system/emulator.rs index f6e5c0c..ebe0f9c 100644 --- a/emulator/src/emulator/system/emulator.rs +++ b/emulator/src/emulator/system/emulator.rs @@ -36,7 +36,9 @@ pub fn run_emulator( let mut update = false; loop { - let cmd = if running == Running::Running || step > 0 { + let cmd = if step > 0 { + None + } else if running == Running::Running && step == 0 { match cmd_rx.try_recv() { Ok(cmd) => Some(cmd), Err(mpsc::TryRecvError::Empty) => { @@ -52,10 +54,15 @@ pub fn run_emulator( } }; + if running == Running::Running && step == 0 { + step = 32768; + } + if let Some(cmd) = cmd { match cmd { Command::Start => { running = Running::Running; + step = 32768; // Update RPC with current state. TODO: Make this only occur on state // changes. @@ -71,9 +78,11 @@ pub fn run_emulator( } Command::Stop => { running = Running::Paused; + step = 0; } Command::Reset(x) => { running = Running::Paused; + step = 0; match x { 0 => { @@ -95,20 +104,12 @@ pub fn run_emulator( } Command::Step(x) => { step = x; + running = Running::Paused; } Command::Write(offset, data) => { update = true; - processor - .memory - .write_range(offset, data) - .unwrap_or_else(|_| { - report_err( - state_tx, - "Failed to write memory range!", - &mut processor, - ); - }); + processor.memory.write_range(offset, data); } Command::Interrupt(_interrupt) => { update = true; @@ -118,14 +119,7 @@ pub fn run_emulator( Command::MemRequest(new, size) if update => { addr = new; let _ = state_tx.send(StateUpdate::MemoryView( - processor.memory.read_range(addr, size).unwrap_or_else(|_| { - report_err( - state_tx, - "Failed to read memory range!", - &mut processor, - ); - Vec::new() - }), + processor.memory.read_range(addr, size), )); } Command::DisplayRequest if update => { @@ -163,26 +157,16 @@ pub fn run_emulator( let _ = state_tx.send(StateUpdate::Instructions(instruction_count)); } Command::WriteBlock(addr, block) => { - processor - .memory - .write_range(addr, block.to_vec()) - .unwrap_or_else(|_| { - report_err( - state_tx, - "Failed to write memory block!", - &mut processor, - ); - }); + processor.memory.write_range(addr, block.to_vec()); } _ => {} } } - if step > 0 { + if running == Running::Paused && step > 0 { step -= 1; update = true; - running = Running::Paused; // Execute one cycle. match processor.cycle() { @@ -207,6 +191,7 @@ pub fn run_emulator( } if running == Running::Running { + step -= 1; update = true; // Execute one cycle. @@ -230,6 +215,7 @@ pub fn run_emulator( history.push(instruction); if matches!(instruction.1, Instruction::Halt) { running = Running::Halted; + step = 0; } instruction_count += 1; diff --git a/emulator/src/emulator/system/memory.rs b/emulator/src/emulator/system/memory.rs index b6a1425..6b83e15 100644 --- a/emulator/src/emulator/system/memory.rs +++ b/emulator/src/emulator/system/memory.rs @@ -1,53 +1,51 @@ -use std::collections::HashMap; +use rustc_hash::FxHashMap; use crate::emulator::system::model::ProcessorError; pub trait MemoryUnit: Send + Sync { fn reset(&mut self); - fn read_byte(&mut self, addr: u32) -> Result; - fn write_byte(&mut self, addr: u32, value: u8) -> Result<(), ProcessorError>; + fn read_byte(&mut self, addr: u32) -> u8; + fn write_byte(&mut self, addr: u32, value: u8); fn read_word(&mut self, addr: u32) -> Result; fn write_word(&mut self, addr: u32, value: u32) -> Result<(), ProcessorError>; - fn read_range(&mut self, addr: u32, size: u32) -> Result, ProcessorError> { + fn read_range(&mut self, addr: u32, size: u32) -> Vec { let mut data = Vec::with_capacity(size as usize); for i in 0..size { - data.push(self.read_byte(addr + i)?); + data.push(self.read_byte(addr + i)); } - Ok(data) + data } - fn write_range(&mut self, addr: u32, value: Vec) -> Result<(), ProcessorError> { + fn write_range(&mut self, addr: u32, value: Vec) { for (i, byte) in value.into_iter().enumerate() { - self.write_byte(addr + i as u32, byte)?; + self.write_byte(addr + i as u32, byte); } - Ok(()) } - fn read_block(&mut self, addr: u32) -> Result<[u8; 256], ProcessorError> { - let mut data = [0; 256]; - for (i, byte) in data.iter_mut().enumerate() { - *byte = self.read_byte(addr + i as u32)?; - } - Ok(data) - } + fn read_block(&mut self, addr: u32) -> &[u8; 256]; - fn write_block(&mut self, addr: u32, data: [u8; 256]) -> Result<(), ProcessorError> { + fn write_block(&mut self, addr: u32, data: &[u8; 256]) { for (i, byte) in data.iter().enumerate() { - self.write_byte(addr + i as u32, *byte)?; + self.write_byte(addr + i as u32, *byte); } - Ok(()) } } pub struct MainStore { - pub data: HashMap, + pub data: FxHashMap, } pub struct Block { data: [u8; 256], } +impl Default for Block { + fn default() -> Self { + Self { data: [0; 256] } + } +} + impl Default for MainStore { fn default() -> Self { Self::new() @@ -58,79 +56,73 @@ impl MainStore { #[must_use] pub fn new() -> Self { Self { - data: HashMap::new(), + data: FxHashMap::default(), } } + #[inline] const fn segment_addr(addr: u32) -> (u32, u8) { (addr / 256, (addr % 256) as u8) } + #[inline] fn mut_block(&mut self, addr: u32) -> &mut Block { - self.data - .entry(addr) - .or_insert_with(|| Block { data: [0; 256] }); - - self.data.get_mut(&addr).map_or_else( - || panic!("Could not fetch block with address {addr:x?}"), - |block| block, - ) + self.data.entry(addr).or_default() } + #[inline] fn block(&mut self, addr: u32) -> &Block { - self.data - .entry(addr) - .or_insert_with(|| Block { data: [0; 256] }); - - self.data.get(&addr).map_or_else( - || panic!("Could not fetch block with address {addr:x?}"), - |block| block, - ) + self.data.entry(addr).or_default() } } impl MemoryUnit for MainStore { + #[inline] fn reset(&mut self) { self.data.clear(); } - fn read_byte(&mut self, addr: u32) -> Result { + #[inline] + fn read_byte(&mut self, addr: u32) -> u8 { let (block_addr, offset) = Self::segment_addr(addr); let block = self.block(block_addr); - Ok(block.data[offset as usize]) + block.data[offset as usize] } + #[inline] fn read_word(&mut self, addr: u32) -> Result { if addr % 4 != 0 { return Err(ProcessorError::BadMemoryAccess(addr)); } let (block_addr, offset) = Self::segment_addr(addr); - let block = self.mut_block(block_addr); - let mut bytes = [0; 4]; - bytes[0] = block.data[offset as usize]; - bytes[1] = block.data[(offset + 1) as usize]; - bytes[2] = block.data[(offset + 2) as usize]; - bytes[3] = block.data[(offset + 3) as usize]; - Ok(u32::from_be_bytes(bytes)) + let offset = offset as usize; + let block = self.block(block_addr); + Ok(u32::from_be_bytes( + block.data[offset..=offset + 3] + .try_into() + .expect("Failed to read word!"), + )) } - fn read_range(&mut self, addr: u32, size: u32) -> Result, ProcessorError> { + #[inline] + fn read_range(&mut self, addr: u32, size: u32) -> Vec { let mut data = Vec::with_capacity(size as usize); for i in 0..size { - data.push(self.read_byte(addr + i)?); + data.push(self.read_byte(addr + i)); } - Ok(data) + data } - fn write_byte(&mut self, addr: u32, value: u8) -> Result<(), ProcessorError> { + #[inline] + fn write_byte(&mut self, addr: u32, value: u8) { let (block_addr, offset) = Self::segment_addr(addr); let block = self.mut_block(block_addr); block.data[offset as usize] = value; - Ok(()) } + #[inline] fn write_word(&mut self, addr: u32, value: u32) -> Result<(), ProcessorError> { if addr % 4 != 0 { return Err(ProcessorError::BadMemoryAccess(addr)); @@ -138,33 +130,36 @@ impl MemoryUnit for MainStore { let (block_addr, offset) = Self::segment_addr(addr); let block = self.mut_block(block_addr); - block.data[offset as usize] = (value >> 24) as u8; - block.data[(offset + 1) as usize] = (value >> 16) as u8; - block.data[(offset + 2) as usize] = (value >> 8) as u8; - block.data[(offset + 3) as usize] = value as u8; + block.data[offset as usize..=(offset + 3) as usize] + .copy_from_slice(&value.to_be_bytes()); Ok(()) } - fn write_range(&mut self, addr: u32, value: Vec) -> Result<(), ProcessorError> { - for (i, byte) in value.into_iter().enumerate() { - let (block_addr, offset) = Self::segment_addr(addr + i as u32); - let block = self.mut_block(block_addr); - block.data[offset as usize] = byte; + #[inline] + fn write_range(&mut self, addr: u32, value: Vec) { + let mut current_block_addr = addr / 256; + let mut current_block = self.mut_block(current_block_addr); + let mut offset = addr % 256; + for byte in value { + current_block.data[offset as usize] = byte; + offset += 1; + if offset >= 256 { + offset = 0; + current_block_addr += 1; + current_block = self.mut_block(current_block_addr); + } } - - Ok(()) } - fn read_block(&mut self, addr: u32) -> Result<[u8; 256], ProcessorError> { + #[inline] + fn read_block(&mut self, addr: u32) -> &[u8; 256] { let (block_addr, _) = Self::segment_addr(addr); - let block = self.block(block_addr); - Ok(block.data) + &self.block(block_addr).data } - fn write_block(&mut self, addr: u32, data: [u8; 256]) -> Result<(), ProcessorError> { + #[inline] + fn write_block(&mut self, addr: u32, data: &[u8; 256]) { let (block_addr, _) = Self::segment_addr(addr); - let block = self.mut_block(block_addr); - block.data = data; - Ok(()) + let _ = self.data.insert(block_addr, Block { data: *data }); } } diff --git a/emulator/src/emulator/system/processor/mod.rs b/emulator/src/emulator/system/processor/mod.rs index d48b80d..29139da 100644 --- a/emulator/src/emulator/system/processor/mod.rs +++ b/emulator/src/emulator/system/processor/mod.rs @@ -51,7 +51,7 @@ impl Processor { // Get value from PCX. let addr = self.fetch()?; // Increment PCX. - self.advance(); + self.advance()?; // Set MAR to the previous value of PCX. *self.reg(Register::Mar)? = addr; @@ -84,7 +84,7 @@ impl Processor { } pub fn display(&mut self) -> Result, ProcessorError> { - self.memory.read_range(0x20000, 2000) + Ok(self.memory.read_range(0x20000, 2000)) } pub fn cmp(&mut self, a: u32, b: u32) { @@ -163,10 +163,10 @@ impl Processor { let addr = self.get(Register::Spr)?; let size = n * 4; // returns the stack - self.memory.read_range( + Ok(self.memory.read_range( max(addr, 0), // ensures that we cannot read from a negative address min(size, addr), // ensures we don't read above the top of the stack - ) + )) } } @@ -209,7 +209,7 @@ impl Executable for Instruction { Self::LoadByte(a) => { *cpu.reg(a.r2)? = u32::from( cpu.memory - .read_byte(cpu.get(a.r1)? + u32::from(a.immediate))?, + .read_byte(cpu.get(a.r1)? + u32::from(a.immediate)), ); } @@ -218,7 +218,7 @@ impl Executable for Instruction { Self::LoadByteSigned(a) => { *cpu.reg(a.r2)? = sign_extend(u32::from( cpu.memory - .read_byte(cpu.get(a.r1)? + u32::from(a.immediate))?, + .read_byte(cpu.get(a.r1)? + u32::from(a.immediate)), )); } @@ -257,7 +257,7 @@ impl Executable for Instruction { cpu.memory.write_byte( cpu.get(a.r2)? + u32::from(a.immediate), cpu.get(a.r1)? as u8, - )?; + ); } // Stores a half-word from SrcReg in memory address (base + offset) The @@ -266,9 +266,9 @@ impl Executable for Instruction { // split the value into bytes and then write two bytes let bytes = (cpu.get(a.r1)? as u16).to_le_bytes(); cpu.memory - .write_byte(cpu.get(a.r2)? + u32::from(a.immediate), bytes[0])?; + .write_byte(cpu.get(a.r2)? + u32::from(a.immediate), bytes[0]); cpu.memory - .write_byte(cpu.get(a.r2)? + u32::from(a.immediate) + 1, bytes[1])?; + .write_byte(cpu.get(a.r2)? + u32::from(a.immediate) + 1, bytes[1]); } // Stores a word from SrcReg in memory address (base + offset) The effective diff --git a/emulator/src/emulator/system/processor/tests.rs b/emulator/src/emulator/system/processor/tests.rs index 6381e3a..c9ca424 100644 --- a/emulator/src/emulator/system/processor/tests.rs +++ b/emulator/src/emulator/system/processor/tests.rs @@ -81,9 +81,7 @@ fn test_mov_signed_instruction() { fn test_load_byte_instruction() { let mut cpu = create_test_processor(); let addr = 0x100; - cpu.memory - .write_byte(addr, 0xAB) - .expect("Failed to write byte to memory"); + cpu.memory.write_byte(addr, 0xAB); *cpu.reg(Register::Rg1).expect("Failed to get register Rg1") = addr - 4; let load_byte_instr = Instruction::LoadByte(ITypeArgs::new( @@ -105,9 +103,7 @@ fn test_load_byte_instruction() { fn test_load_byte_signed_instruction() { let mut cpu = create_test_processor(); let addr = 0x100; - cpu.memory - .write_byte(addr, 0xFF) - .expect("Failed to write byte to memory"); + cpu.memory.write_byte(addr, 0xFF); *cpu.reg(Register::Rg1).expect("Failed to get register Rg1") = addr; let load_byte_signed_instr = Instruction::LoadByteSigned(ITypeArgs::new( @@ -189,7 +185,7 @@ fn test_store_byte_instruction() { store_byte_instr.execute(&mut cpu).expect( "Emulator was slain by losing the game while attempting to execute instruction", ); - assert_eq!(cpu.memory.read_byte(addr).expect("Emulator was slain by losing the game while attempting to execute instruction"), 0xAB); + assert_eq!(cpu.memory.read_byte(addr), 0xAB); } #[test]