Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
407 changes: 407 additions & 0 deletions docs/planning/ARM64_TEST_CATALOG.md

Large diffs are not rendered by default.

32 changes: 17 additions & 15 deletions kernel/src/arch_impl/aarch64/context_switch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -599,21 +599,20 @@ fn switch_ttbr0_if_needed(_thread_id: u64) {
// NOTE: No logging - context switch path must be lock-free

unsafe {
// Write new TTBR0
// CRITICAL: On ARM64, changing TTBR0 does NOT automatically flush the TLB
// (unlike x86-64's CR3). We MUST flush the TLB after switching TTBR0,
// otherwise the parent process may use stale TLB entries from the child
// after a fork/exit cycle, causing CoW memory corruption.
core::arch::asm!(
"msr ttbr0_el1, {}",
"dsb ishst", // Ensure previous stores complete
"msr ttbr0_el1, {}", // Set new page table
"isb", // Synchronize context
"tlbi vmalle1is", // FLUSH ENTIRE TLB - critical for CoW correctness
"dsb ish", // Ensure TLB flush completes
"isb", // Synchronize instruction stream
in(reg) next_ttbr0,
options(nomem, nostack)
);

// Memory barriers required after page table switch
// DSB ISH: Ensure the write to TTBR0 is complete
// ISB: Flush instruction pipeline
core::arch::asm!(
"dsb ish",
"isb",
options(nomem, nostack, preserves_flags)
);
}

// Update saved process TTBR0
Expand Down Expand Up @@ -793,11 +792,14 @@ fn check_and_deliver_signals_for_current_thread_arm64(frame: &mut Aarch64Excepti
if let Some(ref page_table) = process.page_table {
let ttbr0_value = page_table.level_4_frame().start_address().as_u64();
unsafe {
// Write new TTBR0
// CRITICAL: Flush TLB after TTBR0 switch for CoW correctness
core::arch::asm!(
"msr ttbr0_el1, {}",
"dsb ish",
"isb",
"dsb ishst", // Ensure previous stores complete
"msr ttbr0_el1, {}", // Set new page table
"isb", // Synchronize context
"tlbi vmalle1is", // FLUSH ENTIRE TLB
"dsb ish", // Ensure TLB flush completes
"isb", // Synchronize instruction stream
in(reg) ttbr0_value,
options(nomem, nostack)
);
Expand Down
62 changes: 58 additions & 4 deletions kernel/src/arch_impl/aarch64/exception.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,59 @@ pub extern "C" fn handle_sync_exception(frame: *mut Aarch64ExceptionFrame, esr:
return;
}

// Not a CoW fault or couldn't be handled - print debug info and hang
let frame = unsafe { &*frame };
// Not a CoW fault or couldn't be handled
let frame_ref = unsafe { &mut *frame };
crate::serial_println!("[exception] Data abort at address {:#x}", far);
crate::serial_println!(" ELR: {:#x}, ESR: {:#x}", frame.elr, esr);
crate::serial_println!(" ELR: {:#x}, ESR: {:#x}", frame_ref.elr, esr);
crate::serial_println!(" ISS: {:#x} (WnR={}, DFSC={:#x})",
iss, (iss >> 6) & 1, iss & 0x3F);
// Hang on unhandled data abort

// Check if from userspace (EL0) - SPSR[3:0] indicates source EL
let from_el0 = (frame_ref.spsr & 0xF) == 0;

if from_el0 {
// From userspace - terminate the process with SIGSEGV
crate::serial_println!("[exception] Terminating userspace process with SIGSEGV");

// Get current TTBR0 to find the process
let ttbr0: u64;
unsafe {
core::arch::asm!("mrs {}, ttbr0_el1", out(reg) ttbr0, options(nomem, nostack));
}
let page_table_phys = ttbr0 & !0xFFFF_0000_0000_0FFF;

// Find and terminate the process
let mut terminated = false;
crate::process::with_process_manager(|pm| {
if let Some((pid, process)) = pm.find_process_by_cr3_mut(page_table_phys) {
let name = process.name.clone();
crate::serial_println!("[exception] Killing process {} (PID {}) due to data abort",
name, pid.as_u64());
pm.exit_process(pid, -11); // SIGSEGV exit code
terminated = true;
} else {
crate::serial_println!("[exception] Could not find process with TTBR0={:#x}", page_table_phys);
}
});

if terminated {
// Mark scheduler needs reschedule
crate::task::scheduler::set_need_resched();

// Switch scheduler to idle thread
crate::task::scheduler::switch_to_idle();

// Modify exception frame to return to idle loop
// The idle loop runs in EL1 and will handle rescheduling
frame_ref.elr = crate::arch_impl::aarch64::idle_loop_arm64 as *const () as u64;
frame_ref.spsr = 0x3c5; // EL1h, interrupts enabled

// Return to idle loop via ERET
return;
}
}

// From kernel or couldn't terminate - hang
loop { unsafe { core::arch::asm!("wfi"); } }
}

Expand Down Expand Up @@ -422,6 +468,7 @@ fn exception_class_name(ec: u32) -> &'static str {
/// Returns false if this wasn't a CoW fault or couldn't be handled
fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
use crate::memory::arch_stub::{VirtAddr, Page, Size4KiB};
use crate::memory::cow_stats;
use crate::memory::frame_allocator::allocate_frame;
use crate::memory::frame_metadata::{frame_decref, frame_is_shared};
use crate::memory::process_memory::{is_cow_page, make_private_flags};
Expand All @@ -437,6 +484,9 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
return false;
}

// Track CoW fault count
cow_stats::TOTAL_FAULTS.fetch_add(1, core::sync::atomic::Ordering::Relaxed);

let faulting_addr = VirtAddr::new(far);

// Get current TTBR0 (user page table base)
Expand All @@ -458,6 +508,7 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
// Try to acquire process manager lock
match crate::process::try_manager() {
Some(mut guard) => {
cow_stats::MANAGER_PATH.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
let pm = match guard.as_mut() {
Some(pm) => pm,
None => return false,
Expand Down Expand Up @@ -519,6 +570,7 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
options(nostack)
);
}
cow_stats::SOLE_OWNER_OPT.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
crate::serial_println!("[COW] Made sole-owner page writable");
return true;
}
Expand Down Expand Up @@ -568,6 +620,7 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
);
}

cow_stats::PAGES_COPIED.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
crate::serial_println!(
"[COW] Copied page from {:#x} to {:#x}",
old_frame.start_address().as_u64(),
Expand All @@ -577,6 +630,7 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
true
}
None => {
cow_stats::DIRECT_PATH.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
crate::serial_println!("[COW] Manager lock held, cannot handle");
false
}
Expand Down
117 changes: 111 additions & 6 deletions kernel/src/arch_impl/aarch64/syscall_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,14 @@ fn check_and_deliver_signals_aarch64(frame: &mut Aarch64ExceptionFrame) {
if let Some(ref page_table) = process.page_table {
let ttbr0 = page_table.level_4_frame().start_address().as_u64();
unsafe {
// CRITICAL: Flush TLB after TTBR0 switch for CoW correctness
core::arch::asm!(
"dsb ishst",
"msr ttbr0_el1, {}",
"isb",
"dsb ishst", // Ensure previous stores complete
"msr ttbr0_el1, {}", // Set new page table
"isb", // Synchronize context
"tlbi vmalle1is", // FLUSH ENTIRE TLB
"dsb ish", // Ensure TLB flush completes
"isb", // Synchronize instruction stream
in(reg) ttbr0,
options(nostack)
);
Expand Down Expand Up @@ -741,9 +745,12 @@ fn dispatch_syscall(
}
}

// Testing syscalls (stubs)
syscall_nums::COW_STATS | syscall_nums::SIMULATE_OOM => {
(-38_i64) as u64 // -ENOSYS
// Testing/diagnostic syscalls
syscall_nums::COW_STATS => {
sys_cow_stats_aarch64(arg1)
}
syscall_nums::SIMULATE_OOM => {
sys_simulate_oom_aarch64(arg1)
}

syscall_nums::GETPID => sys_getpid(),
Expand Down Expand Up @@ -1360,6 +1367,104 @@ extern "C" {
) -> !;
}

// =============================================================================
// Testing/diagnostic syscall implementations for ARM64
// =============================================================================

/// CowStatsResult structure returned by sys_cow_stats
/// Matches the layout expected by userspace
#[repr(C)]
struct CowStatsResultAarch64 {
total_faults: u64,
manager_path: u64,
direct_path: u64,
pages_copied: u64,
sole_owner_opt: u64,
}

/// sys_cow_stats - Get Copy-on-Write statistics (for testing) - ARM64 implementation
///
/// This syscall is used to verify that the CoW optimization paths are working.
/// It returns the current CoW statistics to userspace.
///
/// Parameters:
/// - stats_ptr: pointer to a CowStatsResult structure in userspace
///
/// Returns: 0 on success, negative error code on failure
fn sys_cow_stats_aarch64(stats_ptr: u64) -> u64 {
use crate::memory::cow_stats;

if stats_ptr == 0 {
return (-14_i64) as u64; // -EFAULT - null pointer
}

// Validate the address is in userspace
if !crate::memory::layout::is_valid_user_address(stats_ptr) {
log::error!("sys_cow_stats_aarch64: Invalid userspace address {:#x}", stats_ptr);
return (-14_i64) as u64; // -EFAULT
}

// Get the current stats from the shared module
let stats = cow_stats::get_stats();

// Copy to userspace
unsafe {
let user_stats = stats_ptr as *mut CowStatsResultAarch64;
(*user_stats).total_faults = stats.total_faults;
(*user_stats).manager_path = stats.manager_path;
(*user_stats).direct_path = stats.direct_path;
(*user_stats).pages_copied = stats.pages_copied;
(*user_stats).sole_owner_opt = stats.sole_owner_opt;
}

log::debug!(
"sys_cow_stats_aarch64: total={}, manager={}, direct={}, copied={}, sole_owner={}",
stats.total_faults,
stats.manager_path,
stats.direct_path,
stats.pages_copied,
stats.sole_owner_opt
);

0
}

/// sys_simulate_oom - Enable or disable OOM simulation (for testing) - ARM64 implementation
///
/// This syscall is used to test the kernel's behavior when frame allocation fails
/// during Copy-on-Write page faults. When OOM simulation is enabled, all frame
/// allocations will return None, causing CoW faults to fail and processes to be
/// terminated with SIGSEGV.
///
/// Parameters:
/// - enable: 1 to enable OOM simulation, 0 to disable
///
/// Returns: 0 on success, -ENOSYS if testing feature is not compiled in
///
/// # Safety
/// Only enable OOM simulation briefly for testing! Extended OOM simulation will
/// crash the kernel because it affects ALL frame allocations.
fn sys_simulate_oom_aarch64(enable: u64) -> u64 {
#[cfg(feature = "testing")]
{
if enable != 0 {
crate::memory::frame_allocator::enable_oom_simulation();
log::info!("sys_simulate_oom_aarch64: OOM simulation ENABLED");
} else {
crate::memory::frame_allocator::disable_oom_simulation();
log::info!("sys_simulate_oom_aarch64: OOM simulation disabled");
}
0
}

#[cfg(not(feature = "testing"))]
{
let _ = enable; // suppress unused warning
log::warn!("sys_simulate_oom_aarch64: testing feature not compiled in");
(-38_i64) as u64 // -ENOSYS - function not implemented
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
65 changes: 2 additions & 63 deletions kernel/src/interrupts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -636,69 +636,8 @@ extern "x86-interrupt" fn invalid_opcode_handler(stack_frame: InterruptStackFram
/// IMPORTANT: This function uses try_manager() to avoid deadlock when called
/// during signal delivery (which holds the process manager lock). If the lock
/// is held, we handle the CoW fault directly by manipulating page tables via CR3.
/// Copy-on-Write statistics for testing and debugging
pub mod cow_stats {
use core::sync::atomic::{AtomicU64, Ordering};

/// Total CoW faults handled
pub static TOTAL_FAULTS: AtomicU64 = AtomicU64::new(0);
/// Faults handled via process manager (normal path)
pub static MANAGER_PATH: AtomicU64 = AtomicU64::new(0);
/// Faults handled via direct page table manipulation (lock-held path)
pub static DIRECT_PATH: AtomicU64 = AtomicU64::new(0);
/// Pages that were copied (frame was shared)
pub static PAGES_COPIED: AtomicU64 = AtomicU64::new(0);
/// Pages made writable without copy (sole owner optimization)
pub static SOLE_OWNER_OPT: AtomicU64 = AtomicU64::new(0);

/// Get current CoW statistics
#[allow(dead_code)]
pub fn get_stats() -> CowStats {
CowStats {
total_faults: TOTAL_FAULTS.load(Ordering::Relaxed),
manager_path: MANAGER_PATH.load(Ordering::Relaxed),
direct_path: DIRECT_PATH.load(Ordering::Relaxed),
pages_copied: PAGES_COPIED.load(Ordering::Relaxed),
sole_owner_opt: SOLE_OWNER_OPT.load(Ordering::Relaxed),
}
}

/// Reset all statistics (for testing)
#[allow(dead_code)]
pub fn reset_stats() {
TOTAL_FAULTS.store(0, Ordering::Relaxed);
MANAGER_PATH.store(0, Ordering::Relaxed);
DIRECT_PATH.store(0, Ordering::Relaxed);
PAGES_COPIED.store(0, Ordering::Relaxed);
SOLE_OWNER_OPT.store(0, Ordering::Relaxed);
}

/// CoW statistics snapshot
#[allow(dead_code)]
#[derive(Debug, Clone, Copy)]
pub struct CowStats {
pub total_faults: u64,
pub manager_path: u64,
pub direct_path: u64,
pub pages_copied: u64,
pub sole_owner_opt: u64,
}

#[allow(dead_code)]
impl CowStats {
/// Print statistics to serial output
pub fn print(&self) {
crate::serial_println!(
"[COW STATS] total={} manager={} direct={} copied={} sole_owner={}",
self.total_faults,
self.manager_path,
self.direct_path,
self.pages_copied,
self.sole_owner_opt
);
}
}
}
/// Copy-on-Write statistics - re-export from architecture-independent module
pub use crate::memory::cow_stats;

fn handle_cow_fault(
faulting_addr: VirtAddr,
Expand Down
Loading
Loading