diff --git a/kernel/src/arch_impl/aarch64/context_switch.rs b/kernel/src/arch_impl/aarch64/context_switch.rs index 6e2ae4ce..c0e1c2dd 100644 --- a/kernel/src/arch_impl/aarch64/context_switch.rs +++ b/kernel/src/arch_impl/aarch64/context_switch.rs @@ -153,10 +153,29 @@ pub extern "C" fn check_need_resched_and_switch_arm64( /// Save userspace context for the current thread. fn save_userspace_context_arm64(thread_id: u64, frame: &Aarch64ExceptionFrame) { crate::task::scheduler::with_thread_mut(thread_id, |thread| { - // Save X0 (return value register) - important for fork/syscall returns + // Save ALL general-purpose registers from exception frame. + // CRITICAL: When a userspace thread is context-switched (e.g., for blocking I/O + // or preemption), its caller-saved registers (x1-x18) may contain important + // values that must be preserved for correct execution when resumed. thread.context.x0 = frame.x0; - - // Save callee-saved registers from exception frame + thread.context.x1 = frame.x1; + thread.context.x2 = frame.x2; + thread.context.x3 = frame.x3; + thread.context.x4 = frame.x4; + thread.context.x5 = frame.x5; + thread.context.x6 = frame.x6; + thread.context.x7 = frame.x7; + thread.context.x8 = frame.x8; + thread.context.x9 = frame.x9; + thread.context.x10 = frame.x10; + thread.context.x11 = frame.x11; + thread.context.x12 = frame.x12; + thread.context.x13 = frame.x13; + thread.context.x14 = frame.x14; + thread.context.x15 = frame.x15; + thread.context.x16 = frame.x16; + thread.context.x17 = frame.x17; + thread.context.x18 = frame.x18; thread.context.x19 = frame.x19; thread.context.x20 = frame.x20; thread.context.x21 = frame.x21; @@ -468,11 +487,30 @@ fn restore_userspace_context_arm64(thread_id: u64, frame: &mut Aarch64ExceptionF // Restore saved context crate::task::scheduler::with_thread_mut(thread_id, |thread| { - // Restore X0 - important for fork() return value - // For forked children, x0 is set to 0; for parent, it will be the child PID + // Restore ALL general-purpose registers + // CRITICAL: For forked children, the caller-saved registers (x1-x18) contain + // important values from the parent's execution state that must be preserved. + // Only restoring callee-saved registers (x19-x30) would leave x1-x18 with + // garbage values from the previous thread's exception frame, causing crashes. frame.x0 = thread.context.x0; - - // Restore callee-saved registers + frame.x1 = thread.context.x1; + frame.x2 = thread.context.x2; + frame.x3 = thread.context.x3; + frame.x4 = thread.context.x4; + frame.x5 = thread.context.x5; + frame.x6 = thread.context.x6; + frame.x7 = thread.context.x7; + frame.x8 = thread.context.x8; + frame.x9 = thread.context.x9; + frame.x10 = thread.context.x10; + frame.x11 = thread.context.x11; + frame.x12 = thread.context.x12; + frame.x13 = thread.context.x13; + frame.x14 = thread.context.x14; + frame.x15 = thread.context.x15; + frame.x16 = thread.context.x16; + frame.x17 = thread.context.x17; + frame.x18 = thread.context.x18; frame.x19 = thread.context.x19; frame.x20 = thread.context.x20; frame.x21 = thread.context.x21; diff --git a/kernel/src/arch_impl/aarch64/syscall_entry.rs b/kernel/src/arch_impl/aarch64/syscall_entry.rs index 6a69772b..bc5806bc 100644 --- a/kernel/src/arch_impl/aarch64/syscall_entry.rs +++ b/kernel/src/arch_impl/aarch64/syscall_entry.rs @@ -815,36 +815,18 @@ fn sys_gettid() -> u64 { crate::task::scheduler::current_thread_id().unwrap_or(0) } -/// sys_clock_gettime implementation - uses architecture-independent time module +/// sys_clock_gettime implementation - delegates to shared syscall/time.rs fn sys_clock_gettime(clock_id: u32, user_timespec_ptr: *mut Timespec) -> u64 { - // Validate pointer - if user_timespec_ptr.is_null() { - return (-14_i64) as u64; // -EFAULT + // Use the shared implementation which properly uses copy_to_user + // This is critical for CoW (Copy-on-Write) support - writing directly + // to userspace memory would fail on CoW pages in forked children. + match crate::syscall::time::sys_clock_gettime( + clock_id, + user_timespec_ptr as *mut crate::syscall::time::Timespec, + ) { + crate::syscall::SyscallResult::Ok(v) => v, + crate::syscall::SyscallResult::Err(e) => (-(e as i64)) as u64, } - - // Get time from arch-agnostic time module - let (tv_sec, tv_nsec) = match clock_id { - 0 => { - // CLOCK_REALTIME - crate::time::get_real_time_ns() - } - 1 => { - // CLOCK_MONOTONIC - let (secs, nanos) = crate::time::get_monotonic_time_ns(); - (secs as i64, nanos as i64) - } - _ => { - return (-22_i64) as u64; // -EINVAL - } - }; - - // Write to userspace - unsafe { - (*user_timespec_ptr).tv_sec = tv_sec; - (*user_timespec_ptr).tv_nsec = tv_nsec; - } - - 0 } // ============================================================================= diff --git a/kernel/src/ipc/fd.rs b/kernel/src/ipc/fd.rs index 10c1ddf8..9f1ce951 100644 --- a/kernel/src/ipc/fd.rs +++ b/kernel/src/ipc/fd.rs @@ -258,6 +258,10 @@ impl Clone for FdTable { // Increment TCP connection reference count for the clone crate::net::tcp::tcp_add_ref(conn_id); } + FdKind::TcpListener(port) => { + // Increment TCP listener reference count for the clone + crate::net::tcp::tcp_listener_ref_inc(*port); + } _ => {} } } @@ -524,9 +528,9 @@ impl Drop for FdTable { log::debug!("FdTable::drop() - releasing TCP socket fd {}", i); } FdKind::TcpListener(port) => { - // Remove from listener table - crate::net::tcp::TCP_LISTENERS.lock().remove(&port); - log::debug!("FdTable::drop() - closed TCP listener fd {} on port {}", i, port); + // Decrement ref count, remove only if it reaches 0 + crate::net::tcp::tcp_listener_ref_dec(port); + log::debug!("FdTable::drop() - released TCP listener fd {} on port {}", i, port); } FdKind::TcpConnection(conn_id) => { // Close the TCP connection diff --git a/kernel/src/net/ipv4.rs b/kernel/src/net/ipv4.rs index cf126717..d1dc55a6 100644 --- a/kernel/src/net/ipv4.rs +++ b/kernel/src/net/ipv4.rs @@ -15,11 +15,9 @@ pub const IPV4_HEADER_MIN_SIZE: usize = 20; pub const PROTOCOL_ICMP: u8 = 1; /// IPv4 protocol number for TCP -#[allow(dead_code)] pub const PROTOCOL_TCP: u8 = 6; /// IPv4 protocol number for UDP -#[allow(dead_code)] pub const PROTOCOL_UDP: u8 = 17; /// Default TTL for outgoing packets @@ -174,16 +172,13 @@ pub fn handle_ipv4(eth_frame: &EthernetFrame, ip: &Ipv4Packet) { icmp::handle_icmp(eth_frame, ip, &icmp_packet); } } - #[cfg(target_arch = "x86_64")] PROTOCOL_TCP => { super::tcp::handle_tcp(ip, ip.payload); } - #[cfg(target_arch = "x86_64")] PROTOCOL_UDP => { super::udp::handle_udp(ip, ip.payload); } _ => { - #[cfg(target_arch = "x86_64")] log::debug!("IPv4: Unknown protocol {}", ip.protocol); } } diff --git a/kernel/src/net/tcp.rs b/kernel/src/net/tcp.rs index 43dd7068..61f94bda 100644 --- a/kernel/src/net/tcp.rs +++ b/kernel/src/net/tcp.rs @@ -389,6 +389,9 @@ pub struct ListenSocket { pub owner_pid: crate::process::process::ProcessId, /// Threads waiting for incoming connections (accept() blocking) pub waiting_threads: Mutex>, + /// Reference count - number of fds pointing to this listener (for fork support) + /// When this reaches 0, the listener is removed from TCP_LISTENERS + pub ref_count: core::sync::atomic::AtomicUsize, } /// Global TCP connection table @@ -901,6 +904,7 @@ pub fn tcp_listen( pending: VecDeque::new(), owner_pid, waiting_threads: Mutex::new(Vec::new()), + ref_count: core::sync::atomic::AtomicUsize::new(1), }); log::info!("TCP: Listening on port {}", local_port); @@ -1300,3 +1304,29 @@ fn wake_connection_waiters(conn: &TcpConnection) { log::debug!("TCP: Woke {} connection waiters", readers.len()); } } + +/// Increment the reference count for a TCP listener (called when fd is duplicated via fork) +pub fn tcp_listener_ref_inc(port: u16) { + let listeners = TCP_LISTENERS.lock(); + if let Some(listener) = listeners.get(&port) { + let old = listener.ref_count.fetch_add(1, core::sync::atomic::Ordering::SeqCst); + log::debug!("TCP: Listener port {} ref_count {} -> {}", port, old, old + 1); + } +} + +/// Decrement the reference count for a TCP listener and remove if it reaches 0 +/// Returns true if the listener was removed, false otherwise +pub fn tcp_listener_ref_dec(port: u16) -> bool { + let mut listeners = TCP_LISTENERS.lock(); + if let Some(listener) = listeners.get(&port) { + let old = listener.ref_count.fetch_sub(1, core::sync::atomic::Ordering::SeqCst); + log::debug!("TCP: Listener port {} ref_count {} -> {}", port, old, old - 1); + if old == 1 { + // Reference count reached 0, remove the listener + listeners.remove(&port); + log::info!("TCP: Removed listener on port {} (ref_count reached 0)", port); + return true; + } + } + false +} diff --git a/kernel/src/process/process.rs b/kernel/src/process/process.rs index 79decb29..ae8d7a3f 100644 --- a/kernel/src/process/process.rs +++ b/kernel/src/process/process.rs @@ -254,9 +254,9 @@ impl Process { log::debug!("Process::close_all_fds() - released TCP socket fd {}", fd); } FdKind::TcpListener(port) => { - // Remove from listener table - crate::net::tcp::TCP_LISTENERS.lock().remove(&port); - log::debug!("Process::close_all_fds() - closed TCP listener fd {} on port {}", fd, port); + // Decrement ref count, remove only if it reaches 0 + crate::net::tcp::tcp_listener_ref_dec(port); + log::debug!("Process::close_all_fds() - released TCP listener fd {} on port {}", fd, port); } FdKind::TcpConnection(conn_id) => { // Close the TCP connection @@ -416,9 +416,9 @@ impl Process { log::debug!("Process::close_all_fds() - closed TCP socket fd {}", fd); } FdKind::TcpListener(port) => { - // Remove from listener table - crate::net::tcp::TCP_LISTENERS.lock().remove(&port); - log::debug!("Process::close_all_fds() - closed TCP listener fd {} port {}", fd, port); + // Decrement ref count, remove only if it reaches 0 + crate::net::tcp::tcp_listener_ref_dec(port); + log::debug!("Process::close_all_fds() - released TCP listener fd {} port {}", fd, port); } FdKind::TcpConnection(conn_id) => { // Close TCP connection diff --git a/kernel/src/syscall/handlers.rs b/kernel/src/syscall/handlers.rs index 7cdd0922..b2129687 100644 --- a/kernel/src/syscall/handlers.rs +++ b/kernel/src/syscall/handlers.rs @@ -864,9 +864,10 @@ pub fn sys_read(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult { SyscallResult::Err(super::errno::ENOTCONN as u64) } FdKind::TcpConnection(conn_id) => { - // Read from TCP connection with blocking - // Clone conn_id so we can drop the manager_guard before blocking + // Read from TCP connection with blocking/non-blocking support + // Clone conn_id and capture flags before dropping manager_guard let conn_id = *conn_id; + let is_nonblocking = (fd_entry.status_flags & crate::ipc::fd::status_flags::O_NONBLOCK) != 0; drop(manager_guard); // Drain loopback queue for localhost connections (127.x.x.x, own IP). @@ -874,7 +875,7 @@ pub fn sys_read(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult { let mut user_buf = alloc::vec![0u8; count as usize]; - // Blocking read loop + // Read loop (may block if O_NONBLOCK not set) loop { // Register as waiter FIRST to avoid race condition crate::net::tcp::tcp_register_recv_waiter(&conn_id, thread_id); @@ -899,7 +900,14 @@ pub fn sys_read(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult { return SyscallResult::Ok(0); } Err(_) => { - // No data available - block + // No data available + if is_nonblocking { + // O_NONBLOCK set: return EAGAIN immediately + crate::net::tcp::tcp_unregister_recv_waiter(&conn_id, thread_id); + log::debug!("sys_read: TCP no data, O_NONBLOCK set - returning EAGAIN"); + return SyscallResult::Err(super::errno::EAGAIN as u64); + } + // Will block below } _ => unreachable!(), } diff --git a/kernel/src/syscall/io.rs b/kernel/src/syscall/io.rs index f9cd68ef..5b284ed6 100644 --- a/kernel/src/syscall/io.rs +++ b/kernel/src/syscall/io.rs @@ -87,6 +87,7 @@ pub fn sys_write(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult { StdIo, Pipe { pipe_buffer: alloc::sync::Arc>, is_nonblocking: bool }, UnixStream { socket: alloc::sync::Arc> }, + TcpConnection { conn_id: crate::net::tcp::ConnectionId }, PtyMaster(u32), PtySlave(u32), Ebadf, @@ -134,9 +135,9 @@ pub fn sys_write(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult { FdKind::UnixListener(_) => WriteOperation::Enotconn, FdKind::PtyMaster(pty_num) => WriteOperation::PtyMaster(*pty_num), FdKind::PtySlave(pty_num) => WriteOperation::PtySlave(*pty_num), - // TCP sockets - write not directly supported, use send/sendto + // TCP sockets FdKind::TcpSocket(_) | FdKind::TcpListener(_) => WriteOperation::Enotconn, - FdKind::TcpConnection(_) => WriteOperation::Eopnotsupp, + FdKind::TcpConnection(conn_id) => WriteOperation::TcpConnection { conn_id: *conn_id }, } }; @@ -161,6 +162,20 @@ pub fn sys_write(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult { Err(e) => SyscallResult::Err(e as u64), } } + WriteOperation::TcpConnection { conn_id } => { + // Write to established TCP connection + match crate::net::tcp::tcp_send(&conn_id, &buffer) { + Ok(n) => SyscallResult::Ok(n as u64), + Err(e) => { + // Return EPIPE if the connection was shutdown for writing + if e.contains("shutdown") { + SyscallResult::Err(super::errno::EPIPE as u64) + } else { + SyscallResult::Err(super::errno::EIO as u64) + } + } + } + } WriteOperation::PtyMaster(pty_num) => { // Write to PTY master - data goes to slave through line discipline if let Some(pair) = crate::tty::pty::get(pty_num) { @@ -472,12 +487,119 @@ pub fn sys_read(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult { SyscallResult::Err(5) // EIO } } - // TCP sockets - read not directly supported, use recv/recvfrom + // TCP sockets FdKind::TcpSocket(_) | FdKind::TcpListener(_) => { SyscallResult::Err(super::errno::ENOTCONN as u64) } - FdKind::TcpConnection(_) => { - SyscallResult::Err(super::errno::EOPNOTSUPP as u64) + FdKind::TcpConnection(conn_id) => { + // Read from TCP connection with blocking/non-blocking support + // Clone conn_id and capture flags before dropping manager_guard + let conn_id = *conn_id; + let is_nonblocking = (fd_entry.status_flags & crate::ipc::fd::status_flags::O_NONBLOCK) != 0; + drop(manager_guard); + + // Drain loopback queue for localhost connections (127.x.x.x, own IP). + crate::net::drain_loopback_queue(); + + let mut user_buf = alloc::vec![0u8; count as usize]; + + // Read loop (may block if O_NONBLOCK not set) + loop { + // Register as waiter FIRST to avoid race condition + crate::net::tcp::tcp_register_recv_waiter(&conn_id, thread_id); + + // Drain loopback queue in case data arrived + crate::net::drain_loopback_queue(); + + // Try to receive + match crate::net::tcp::tcp_recv(&conn_id, &mut user_buf) { + Ok(n) if n > 0 => { + // Data received - unregister and return + crate::net::tcp::tcp_unregister_recv_waiter(&conn_id, thread_id); + if copy_to_user_bytes(buf_ptr, &user_buf[..n]).is_err() { + return SyscallResult::Err(14); // EFAULT + } + return SyscallResult::Ok(n as u64); + } + Ok(0) => { + // EOF (connection closed) + crate::net::tcp::tcp_unregister_recv_waiter(&conn_id, thread_id); + return SyscallResult::Ok(0); + } + Err(_) => { + // No data available + if is_nonblocking { + // O_NONBLOCK set: return EAGAIN immediately + crate::net::tcp::tcp_unregister_recv_waiter(&conn_id, thread_id); + return SyscallResult::Err(super::errno::EAGAIN as u64); + } + // Will block below + } + _ => unreachable!(), + } + + // No data - block the thread + crate::task::scheduler::with_scheduler(|sched| { + sched.block_current(); + if let Some(thread) = sched.current_thread_mut() { + thread.blocked_in_syscall = true; + } + }); + + // Double-check for data after setting Blocked state + if crate::net::tcp::tcp_has_data(&conn_id) { + crate::task::scheduler::with_scheduler(|sched| { + if let Some(thread) = sched.current_thread_mut() { + thread.blocked_in_syscall = false; + thread.set_ready(); + } + }); + crate::net::tcp::tcp_unregister_recv_waiter(&conn_id, thread_id); + continue; + } + + // Re-enable preemption before WFI loop + crate::per_cpu::preempt_enable(); + + // WFI loop - wait for data to arrive + loop { + // CRITICAL: Drain loopback queue - ARM64 has no NIC interrupts, + // so we must poll for localhost packets during blocking waits + crate::net::drain_loopback_queue(); + + let still_blocked = crate::task::scheduler::with_scheduler(|sched| { + if let Some(thread) = sched.current_thread_mut() { + thread.state == crate::task::thread::ThreadState::Blocked + } else { + false + } + }).unwrap_or(false); + + if !still_blocked { + crate::per_cpu::preempt_disable(); + break; + } + + crate::task::scheduler::yield_current(); + unsafe { core::arch::asm!("wfi", options(nomem, nostack)); } + } + + // Reset quantum after blocking wait to prevent immediate preemption + crate::arch_impl::aarch64::timer_interrupt::reset_quantum(); + + // Clear blocked_in_syscall + crate::task::scheduler::with_scheduler(|sched| { + if let Some(thread) = sched.current_thread_mut() { + thread.blocked_in_syscall = false; + } + }); + + // Unregister from wait queue (will re-register at top of loop) + crate::net::tcp::tcp_unregister_recv_waiter(&conn_id, thread_id); + + // Drain loopback again before retrying + crate::net::drain_loopback_queue(); + } } } } diff --git a/kernel/src/syscall/socket.rs b/kernel/src/syscall/socket.rs index 83345776..437150de 100644 --- a/kernel/src/syscall/socket.rs +++ b/kernel/src/syscall/socket.rs @@ -1763,8 +1763,8 @@ pub fn sys_socketpair(domain: u64, sock_type: u64, protocol: u64, sv_ptr: u64) - let sv: [i32; 2] = [fd_a, fd_b]; unsafe { let sv_user = sv_ptr as *mut [i32; 2]; - // Validate pointer is in userspace range - if sv_ptr < 0x1000 || sv_ptr > 0x7FFFFFFFFFFF { + // Validate pointer is in valid userspace range (code/data, mmap, or stack) + if !crate::memory::layout::is_valid_user_address(sv_ptr) { let _ = process.fd_table.close(fd_a); let _ = process.fd_table.close(fd_b); return SyscallResult::Err(EFAULT as u64); diff --git a/libs/libbreenix/src/time.rs b/libs/libbreenix/src/time.rs index 21f3b21b..e7c0b5b2 100644 --- a/libs/libbreenix/src/time.rs +++ b/libs/libbreenix/src/time.rs @@ -72,16 +72,22 @@ pub fn sleep_ms(ms: u64) { loop { let now = now_monotonic(); - let elapsed_sec = now.tv_sec - start.tv_sec; - let elapsed_nsec = if now.tv_nsec >= start.tv_nsec { - now.tv_nsec - start.tv_nsec - } else { - // Handle nanosecond underflow - 1_000_000_000 - (start.tv_nsec - now.tv_nsec) - }; - let elapsed_ns = (elapsed_sec as u64) * 1_000_000_000 + (elapsed_nsec as u64); - if elapsed_ns >= target_ns { + // Calculate elapsed time in nanoseconds using signed arithmetic. + // This handles all edge cases including timer jitter where nanoseconds + // might appear to briefly go backwards within the same second. + let total_start_ns = (start.tv_sec as i128) * 1_000_000_000 + (start.tv_nsec as i128); + let total_now_ns = (now.tv_sec as i128) * 1_000_000_000 + (now.tv_nsec as i128); + let elapsed_ns = total_now_ns - total_start_ns; + + // If elapsed time is negative (shouldn't happen with monotonic clock, + // but could due to jitter or bugs), treat as 0 and keep waiting + if elapsed_ns < 0 { + crate::process::yield_now(); + continue; + } + + if elapsed_ns as u64 >= target_ns { break; } diff --git a/userspace/tests/Cargo.toml b/userspace/tests/Cargo.toml index 4852e0b2..6a69695d 100644 --- a/userspace/tests/Cargo.toml +++ b/userspace/tests/Cargo.toml @@ -504,6 +504,10 @@ path = "sigsuspend_test.rs" name = "itimer_test" path = "itimer_test.rs" +[[bin]] +name = "sleep_debug_test" +path = "sleep_debug_test.rs" + [profile.release] panic = "abort" lto = true diff --git a/userspace/tests/sleep_debug_test.rs b/userspace/tests/sleep_debug_test.rs new file mode 100644 index 00000000..c49eec9f --- /dev/null +++ b/userspace/tests/sleep_debug_test.rs @@ -0,0 +1,310 @@ +//! Sleep/Clock Debug Test for ARM64 +//! +//! This test investigates why sleep_ms() hangs in forked child processes on ARM64. +//! It tests: +//! 1. clock_gettime in the parent (baseline) +//! 2. clock_gettime in a forked child (to see if time is advancing) +//! 3. yield_now in a forked child (to ensure scheduler works) +//! 4. A short sleep_ms() call in a forked child +//! +//! The goal is to identify whether: +//! - clock_gettime returns stale data in forked children +//! - yield_now() causes issues +//! - The elapsed time calculation has bugs + +#![no_std] +#![no_main] + +use core::panic::PanicInfo; +use libbreenix::io; +use libbreenix::process; +use libbreenix::time::{now_monotonic, sleep_ms}; +use libbreenix::types::Timespec; + +/// Print a string +fn print(s: &str) { + io::print(s); +} + +/// Print a number (signed) +fn print_num(n: i64) { + if n < 0 { + print("-"); + print_num(-n); + return; + } + if n == 0 { + print("0"); + return; + } + + let mut buf = [0u8; 20]; + let mut i = 0; + let mut val = n as u64; + + while val > 0 { + buf[i] = b'0' + (val % 10) as u8; + val /= 10; + i += 1; + } + + while i > 0 { + i -= 1; + let ch = [buf[i]]; + if let Ok(s) = core::str::from_utf8(&ch) { + print(s); + } + } +} + +/// Print a Timespec +fn print_timespec(ts: &Timespec) { + print("{ tv_sec: "); + print_num(ts.tv_sec); + print(", tv_nsec: "); + print_num(ts.tv_nsec); + print(" }"); +} + +/// Calculate elapsed time in nanoseconds between two Timespec values +fn elapsed_ns(start: &Timespec, end: &Timespec) -> i64 { + let sec_diff = end.tv_sec - start.tv_sec; + let nsec_diff = end.tv_nsec - start.tv_nsec; + sec_diff * 1_000_000_000 + nsec_diff +} + +#[no_mangle] +pub extern "C" fn _start() -> ! { + print("SLEEP_DEBUG_TEST: Starting\n"); + print("SLEEP_DEBUG_TEST: This test diagnoses sleep_ms() hang in forked children on ARM64\n\n"); + + // ========================================================================= + // Test 1: Baseline - clock_gettime in parent + // ========================================================================= + print("=== TEST 1: Baseline clock_gettime in parent ===\n"); + + let t1 = now_monotonic(); + print(" First call: "); + print_timespec(&t1); + print("\n"); + + // Small yield + process::yield_now(); + + let t2 = now_monotonic(); + print(" Second call: "); + print_timespec(&t2); + print("\n"); + + let elapsed = elapsed_ns(&t1, &t2); + print(" Elapsed: "); + print_num(elapsed); + print(" ns\n"); + + if elapsed >= 0 && t1.tv_sec >= 0 && t1.tv_nsec >= 0 { + print(" TEST 1: PASS - clock works in parent\n\n"); + } else { + print(" TEST 1: FAIL - clock not working in parent\n\n"); + process::exit(1); + } + + // ========================================================================= + // Test 2: Fork and test clock_gettime in child + // ========================================================================= + print("=== TEST 2: clock_gettime in forked child ===\n"); + + let _parent_pid = process::getpid(); + let fork_result = process::fork(); + + if fork_result < 0 { + print(" FAIL - fork failed\n"); + process::exit(2); + } + + if fork_result == 0 { + // ===== CHILD PROCESS ===== + print(" [CHILD] Forked successfully, PID="); + print_num(process::getpid() as i64); + print("\n"); + + // Test 2a: First clock call in child + print(" [CHILD] Getting first timestamp...\n"); + let c1 = now_monotonic(); + print(" [CHILD] First call: "); + print_timespec(&c1); + print("\n"); + + // Test 2b: Yield in child + print(" [CHILD] Calling yield_now()...\n"); + process::yield_now(); + print(" [CHILD] yield_now() returned\n"); + + // Test 2c: Second clock call in child + print(" [CHILD] Getting second timestamp...\n"); + let c2 = now_monotonic(); + print(" [CHILD] Second call: "); + print_timespec(&c2); + print("\n"); + + let c_elapsed = elapsed_ns(&c1, &c2); + print(" [CHILD] Elapsed: "); + print_num(c_elapsed); + print(" ns\n"); + + if c_elapsed < 0 { + print(" [CHILD] WARNING: Time went backwards!\n"); + } + + // Test 2d: Manual elapsed time calculation (like sleep_ms does) + print("\n [CHILD] Testing manual elapsed calculation (like sleep_ms)...\n"); + let start = now_monotonic(); + print(" [CHILD] Start: "); + print_timespec(&start); + print("\n"); + + // Do 5 yields and check time + for i in 0..5 { + process::yield_now(); + let now = now_monotonic(); + + // Calculate elapsed like sleep_ms does + let elapsed_sec = now.tv_sec - start.tv_sec; + let elapsed_nsec = if now.tv_nsec >= start.tv_nsec { + now.tv_nsec - start.tv_nsec + } else { + // Handle nanosecond underflow - THIS IS THE POTENTIAL BUG + // sleep_ms does NOT decrement elapsed_sec here! + 1_000_000_000 - (start.tv_nsec - now.tv_nsec) + }; + + // Note: sleep_ms doesn't decrement elapsed_sec on underflow! + // Let's calculate correctly for comparison: + let correct_elapsed_sec = if now.tv_nsec >= start.tv_nsec { + elapsed_sec + } else { + elapsed_sec - 1 // Borrow from seconds + }; + + let elapsed_ns_buggy = (elapsed_sec as u64) * 1_000_000_000 + (elapsed_nsec as u64); + let elapsed_ns_correct = (correct_elapsed_sec as u64) * 1_000_000_000 + (elapsed_nsec as u64); + + print(" [CHILD] Iteration "); + print_num(i as i64); + print(": now="); + print_timespec(&now); + print("\n"); + + print(" elapsed_sec="); + print_num(elapsed_sec); + print(", elapsed_nsec="); + print_num(elapsed_nsec); + print("\n"); + + print(" buggy_elapsed_ns="); + print_num(elapsed_ns_buggy as i64); + print(", correct_elapsed_ns="); + print_num(elapsed_ns_correct as i64); + + if elapsed_ns_buggy != elapsed_ns_correct { + print(" *** BUG TRIGGERED ***"); + } + print("\n"); + } + + // Test 2e: Try a very short sleep_ms + print("\n [CHILD] Testing sleep_ms(10)...\n"); + let before_sleep = now_monotonic(); + print(" [CHILD] Before sleep: "); + print_timespec(&before_sleep); + print("\n"); + + sleep_ms(10); + + let after_sleep = now_monotonic(); + print(" [CHILD] After sleep: "); + print_timespec(&after_sleep); + print("\n"); + + let sleep_elapsed = elapsed_ns(&before_sleep, &after_sleep); + print(" [CHILD] Sleep elapsed: "); + print_num(sleep_elapsed); + print(" ns ("); + print_num(sleep_elapsed / 1_000_000); + print(" ms)\n"); + + if sleep_elapsed >= 10_000_000 { + print(" [CHILD] sleep_ms(10) completed successfully!\n"); + } else { + print(" [CHILD] WARNING: sleep returned too early\n"); + } + + print(" [CHILD] TEST 2: PASS - all clock tests passed in child\n"); + process::exit(0); + } else { + // ===== PARENT PROCESS ===== + print(" [PARENT] Forked child PID="); + print_num(fork_result); + print("\n"); + + // Wait for child + let mut status: i32 = 0; + let wait_result = process::waitpid(fork_result as i32, &mut status, 0); + + print(" [PARENT] waitpid returned "); + print_num(wait_result); + print(", status="); + print_num(status as i64); + print("\n"); + + if wait_result == fork_result && process::wifexited(status) && process::wexitstatus(status) == 0 { + print(" TEST 2: PASS - child completed successfully\n\n"); + } else { + print(" TEST 2: FAIL - child did not complete successfully\n\n"); + process::exit(2); + } + } + + // ========================================================================= + // Test 3: Sleep in parent (sanity check) + // ========================================================================= + print("=== TEST 3: sleep_ms(50) in parent ===\n"); + + let before = now_monotonic(); + print(" Before: "); + print_timespec(&before); + print("\n"); + + sleep_ms(50); + + let after = now_monotonic(); + print(" After: "); + print_timespec(&after); + print("\n"); + + let parent_sleep_elapsed = elapsed_ns(&before, &after); + print(" Elapsed: "); + print_num(parent_sleep_elapsed); + print(" ns ("); + print_num(parent_sleep_elapsed / 1_000_000); + print(" ms)\n"); + + if parent_sleep_elapsed >= 50_000_000 { + print(" TEST 3: PASS\n\n"); + } else { + print(" TEST 3: FAIL - sleep returned too early\n\n"); + } + + // ========================================================================= + // Final results + // ========================================================================= + print("=== FINAL RESULTS ===\n"); + print("SLEEP_DEBUG_TEST: ALL TESTS PASSED\n"); + print("SLEEP_DEBUG_TEST: PASS\n"); + process::exit(0); +} + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + print("SLEEP_DEBUG_TEST: PANIC!\n"); + process::exit(99); +} diff --git a/userspace/tests/tcp_blocking_test.rs b/userspace/tests/tcp_blocking_test.rs index 4f79a13b..f4abecee 100644 --- a/userspace/tests/tcp_blocking_test.rs +++ b/userspace/tests/tcp_blocking_test.rs @@ -14,9 +14,10 @@ //! will FAIL clearly rather than silently working through retries. //! //! Timeout Watchdog Mechanism: -//! The child process acts as a watchdog - after completing its work, it waits -//! an additional delay and then sends SIGKILL to the parent if still alive. -//! This prevents infinite hangs if blocking is broken. +//! Child processes act as watchdogs - if they encounter an error before +//! completing their task, they kill the parent to prevent infinite hangs. +//! Once a child completes successfully, it exits immediately (no post-work +//! delay) so the parent's waitpid() can return and the test can continue. #![no_std] #![no_main] @@ -27,10 +28,7 @@ use libbreenix::io::{fcntl_getfl, fcntl_setfl, status_flags::O_NONBLOCK}; use libbreenix::process; use libbreenix::signal::{kill, SIGKILL}; use libbreenix::socket::{accept, bind, connect, listen, socket, SockAddrIn, AF_INET, SOCK_STREAM}; - -/// Timeout delay iterations for the watchdog -/// This should be significantly longer than the normal test delays -const WATCHDOG_TIMEOUT_ITERATIONS: usize = 500; +use libbreenix::time::{now_monotonic, sleep_ms}; // Errno constants const EAGAIN: i32 = 11; @@ -82,6 +80,13 @@ fn delay_yield(iterations: usize) { } } +/// Calculate elapsed time in milliseconds between two Timespec values +fn elapsed_ms(start: &libbreenix::types::Timespec, end: &libbreenix::types::Timespec) -> i64 { + let sec_diff = end.tv_sec - start.tv_sec; + let nsec_diff = end.tv_nsec - start.tv_nsec; + (sec_diff * 1000) + (nsec_diff / 1_000_000) +} + #[no_mangle] pub extern "C" fn _start() -> ! { print("TCP_BLOCKING_TEST: Starting\n"); @@ -210,30 +215,13 @@ pub extern "C" fn _start() -> ! { // Close socket io::close(client_fd as u64); - // ========================================================================= - // WATCHDOG: Wait a bit longer and kill parent if still alive - // If blocking I/O is working, the parent should have already completed - // and exited (or at least be past the blocking calls). If the parent - // is stuck in a blocking call that never returns, we kill it. - // ========================================================================= - print(" [CHILD] Starting watchdog timer...\n"); - delay_yield(WATCHDOG_TIMEOUT_ITERATIONS); - - // Check if parent is still alive and kill it - // kill(pid, 0) checks if process exists without sending a signal - if kill(parent_pid, 0).is_ok() { - print(" [CHILD] WATCHDOG TIMEOUT: Parent still alive after "); - print_num(WATCHDOG_TIMEOUT_ITERATIONS as i64); - print(" iterations!\n"); - print(" [CHILD] WATCHDOG: Blocking call did not complete in time - killing parent\n"); - print("TCP_BLOCKING_TEST: TIMEOUT - blocking I/O appears to be broken!\n"); - print("TCP_BLOCKING_TEST: FAIL\n"); - let _ = kill(parent_pid, SIGKILL); - process::exit(99); // Special exit code for watchdog timeout - } else { - print(" [CHILD] Watchdog: Parent completed normally\n"); - } - + // Child's job is done - exit immediately. + // Error-path watchdogs above (lines 171-204) handle killing parent if child fails. + // + // We do NOT run a post-work watchdog here because the parent will call + // waitpid() to wait for us after TEST 2 completes. If we delay here and + // check if parent is alive, we'd see it waiting in waitpid (for us!) and + // incorrectly kill it - preventing TEST 3, 4, 4b, 5, 6 from running. print(" [CHILD] Exiting successfully\n"); process::exit(0); @@ -576,6 +564,277 @@ pub extern "C" fn _start() -> ! { io::close(server3_fd as u64); } + // ========================================================================= + // Test 4b: Non-blocking read() returns EAGAIN IMMEDIATELY (Unfalsifiable) + // + // This is an UNFALSIFIABLE test that proves O_NONBLOCK actually works. + // Simply checking for EAGAIN is insufficient because a buggy kernel could: + // - Always return EAGAIN regardless of O_NONBLOCK flag + // - Return EAGAIN after blocking for some time + // + // To be unfalsifiable, this test proves TWO things: + // Part A: Blocking read actually blocks (~200ms when child delays send) + // Part B: Non-blocking read returns EAGAIN in < 50ms + // + // If both pass, we KNOW O_NONBLOCK is the reason for the fast return. + // + // This test WILL FAIL if: + // - O_NONBLOCK check is removed (Part B would block ~200ms) + // - Always-return-EAGAIN bug exists (Part A would return EAGAIN, not block) + // - Non-blocking path is slow (Part B timing check fails) + // ========================================================================= + print("=== TEST 4b: Non-blocking read() returns EAGAIN IMMEDIATELY (Unfalsifiable) ===\n"); + print(" This test proves O_NONBLOCK works by comparing blocking vs non-blocking timing.\n\n"); + { + // Timing thresholds + const CHILD_DELAY_MS: u64 = 300; // Child waits 300ms after connecting before sending + const BLOCKING_MIN_MS: i64 = 100; // Blocking read must take at least this long + const NONBLOCK_MAX_MS: i64 = 100; // Non-blocking read must complete within this + + let mut part_a_passed = false; + let mut part_b_passed = false; + + // ===================================================================== + // Part A: Prove blocking read actually blocks (counterfactual) + // ===================================================================== + print(" --- Part A: Proving blocking read actually blocks ---\n"); + + let server_a = match socket(AF_INET, SOCK_STREAM, 0) { + Ok(fd) if fd >= 0 => fd, + _ => { + print(" FAIL - Part A server socket creation failed\n"); + -1 + } + }; + + if server_a >= 0 { + let server_a_addr = SockAddrIn::new([0, 0, 0, 0], 9104); + if bind(server_a, &server_a_addr).is_ok() && listen(server_a, 128).is_ok() { + print(" Server A listening on port 9104 (blocking test)\n"); + + let parent_pid = process::getpid() as i32; + let fork_a = process::fork(); + + if fork_a == 0 { + // ===== CHILD for Part A ===== + print(" [CHILD-A] Started, will delay then send data\n"); + + // Create client and connect + let client = match socket(AF_INET, SOCK_STREAM, 0) { + Ok(fd) if fd >= 0 => fd, + _ => { + print(" [CHILD-A] FAIL - socket creation failed\n"); + let _ = kill(parent_pid, SIGKILL); + process::exit(20); + } + }; + + let loopback = SockAddrIn::new([127, 0, 0, 1], 9104); + if connect(client, &loopback).is_err() { + // May get EINPROGRESS, wait a bit + delay_yield(10); + } + print(" [CHILD-A] Connected\n"); + + // DELAY before sending using wall-clock time + // This ensures parent has time to accept and start blocking read + print(" [CHILD-A] Sleeping "); + print_num(CHILD_DELAY_MS as i64); + print("ms before send...\n"); + sleep_ms(CHILD_DELAY_MS); + + // Send data + let data = b"BLOCKING_TEST"; + let written = io::write(client as u64, data); + if written < 0 { + print(" [CHILD-A] FAIL - write failed\n"); + let _ = kill(parent_pid, SIGKILL); + process::exit(21); + } + print(" [CHILD-A] Sent data, exiting\n"); + io::close(client as u64); + process::exit(0); + + } else if fork_a > 0 { + // ===== PARENT for Part A ===== + // Yield to let child start and connect + // This ensures child connects BEFORE we try to accept + delay_yield(20); + + // Accept connection (child should have connected by now) + match accept(server_a, None) { + Ok(conn_fd) if conn_fd >= 0 => { + print(" [PARENT-A] Accepted connection\n"); + + // BLOCKING read - measure how long it takes + let mut buf = [0u8; 64]; + print(" [PARENT-A] Starting BLOCKING read (should wait for child)...\n"); + + let start = now_monotonic(); + let result = io::read(conn_fd as u64, &mut buf); + let end = now_monotonic(); + + let elapsed = elapsed_ms(&start, &end); + + if result > 0 { + print(" [PARENT-A] Blocking read returned "); + print_num(result); + print(" bytes in "); + print_num(elapsed); + print("ms\n"); + + if elapsed >= BLOCKING_MIN_MS { + print(" [PARENT-A] PASS - blocking read took >= "); + print_num(BLOCKING_MIN_MS); + print("ms (proves blocking works)\n"); + part_a_passed = true; + } else { + print(" [PARENT-A] FAIL - blocking read too fast ("); + print_num(elapsed); + print("ms < "); + print_num(BLOCKING_MIN_MS); + print("ms)\n"); + print(" This suggests blocking is broken or child sent too fast\n"); + } + } else if result == -(EAGAIN as i64) { + print(" [PARENT-A] FAIL - blocking read returned EAGAIN!\n"); + print(" This proves 'always return EAGAIN' bug exists!\n"); + } else { + print(" [PARENT-A] FAIL - blocking read error: "); + print_num(-result); + print("\n"); + } + io::close(conn_fd as u64); + } + _ => { + print(" [PARENT-A] FAIL - accept failed\n"); + } + } + + // Wait for child + let mut status: i32 = 0; + let _ = process::waitpid(fork_a as i32, &mut status, 0); + } + } else { + print(" FAIL - Part A bind/listen failed\n"); + } + io::close(server_a as u64); + } + + print("\n"); + + // ===================================================================== + // Part B: Prove non-blocking read returns IMMEDIATELY + // ===================================================================== + print(" --- Part B: Proving non-blocking read returns immediately ---\n"); + + let server_b = match socket(AF_INET, SOCK_STREAM, 0) { + Ok(fd) if fd >= 0 => fd, + _ => { + print(" FAIL - Part B server socket creation failed\n"); + -1 + } + }; + + if server_b >= 0 { + let server_b_addr = SockAddrIn::new([0, 0, 0, 0], 9105); + if bind(server_b, &server_b_addr).is_ok() && listen(server_b, 128).is_ok() { + print(" Server B listening on port 9105 (non-blocking test)\n"); + + // Create client and connect + let client_b = match socket(AF_INET, SOCK_STREAM, 0) { + Ok(fd) if fd >= 0 => fd, + _ => { + print(" FAIL - Part B client socket creation failed\n"); + -1 + } + }; + + if client_b >= 0 { + let loopback = SockAddrIn::new([127, 0, 0, 1], 9105); + let _ = connect(client_b, &loopback); // May return EINPROGRESS + delay_yield(10); // Let connection complete + + match accept(server_b, None) { + Ok(conn_fd) if conn_fd >= 0 => { + print(" Connection established\n"); + + // Set O_NONBLOCK + let flags = fcntl_getfl(conn_fd as u64); + if flags >= 0 && fcntl_setfl(conn_fd as u64, (flags as i32) | O_NONBLOCK) >= 0 { + print(" Socket set to O_NONBLOCK\n"); + + // NON-BLOCKING read with NO data pending - measure timing + let mut buf = [0u8; 64]; + print(" Starting NON-BLOCKING read (no data pending)...\n"); + + let start = now_monotonic(); + let result = io::read(conn_fd as u64, &mut buf); + let end = now_monotonic(); + + let elapsed = elapsed_ms(&start, &end); + + print(" Non-blocking read returned in "); + print_num(elapsed); + print("ms with result "); + print_num(result); + print("\n"); + + if result == -(EAGAIN as i64) { + if elapsed < NONBLOCK_MAX_MS { + print(" PASS - returned EAGAIN in < "); + print_num(NONBLOCK_MAX_MS); + print("ms (proves O_NONBLOCK works)\n"); + part_b_passed = true; + } else { + print(" FAIL - returned EAGAIN but took too long ("); + print_num(elapsed); + print("ms >= "); + print_num(NONBLOCK_MAX_MS); + print("ms)\n"); + } + } else if result >= 0 { + print(" FAIL - read returned data but none was sent!\n"); + } else { + print(" FAIL - unexpected error: "); + print_num(-result); + print("\n"); + } + } else { + print(" FAIL - fcntl failed\n"); + } + io::close(conn_fd as u64); + } + _ => { + print(" FAIL - Part B accept failed\n"); + } + } + io::close(client_b as u64); + } + } else { + print(" FAIL - Part B bind/listen failed\n"); + } + io::close(server_b as u64); + } + + // Final verdict + print("\n --- TEST 4b VERDICT ---\n"); + if part_a_passed && part_b_passed { + print(" Part A (blocking proof): PASS\n"); + print(" Part B (non-blocking proof): PASS\n"); + print(" TEST 4b: PASS (unfalsifiable - both paths verified)\n\n"); + } else { + if !part_a_passed { + print(" Part A (blocking proof): FAIL\n"); + } + if !part_b_passed { + print(" Part B (non-blocking proof): FAIL\n"); + } + print(" TEST 4b: FAIL\n\n"); + failed += 1; + } + } + // ========================================================================= // Test 5: connect() with invalid fd returns EBADF //