Skip to content
52 changes: 45 additions & 7 deletions kernel/src/arch_impl/aarch64/context_switch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,29 @@ pub extern "C" fn check_need_resched_and_switch_arm64(
/// Save userspace context for the current thread.
fn save_userspace_context_arm64(thread_id: u64, frame: &Aarch64ExceptionFrame) {
crate::task::scheduler::with_thread_mut(thread_id, |thread| {
// Save X0 (return value register) - important for fork/syscall returns
// Save ALL general-purpose registers from exception frame.
// CRITICAL: When a userspace thread is context-switched (e.g., for blocking I/O
// or preemption), its caller-saved registers (x1-x18) may contain important
// values that must be preserved for correct execution when resumed.
thread.context.x0 = frame.x0;

// Save callee-saved registers from exception frame
thread.context.x1 = frame.x1;
thread.context.x2 = frame.x2;
thread.context.x3 = frame.x3;
thread.context.x4 = frame.x4;
thread.context.x5 = frame.x5;
thread.context.x6 = frame.x6;
thread.context.x7 = frame.x7;
thread.context.x8 = frame.x8;
thread.context.x9 = frame.x9;
thread.context.x10 = frame.x10;
thread.context.x11 = frame.x11;
thread.context.x12 = frame.x12;
thread.context.x13 = frame.x13;
thread.context.x14 = frame.x14;
thread.context.x15 = frame.x15;
thread.context.x16 = frame.x16;
thread.context.x17 = frame.x17;
thread.context.x18 = frame.x18;
thread.context.x19 = frame.x19;
thread.context.x20 = frame.x20;
thread.context.x21 = frame.x21;
Expand Down Expand Up @@ -468,11 +487,30 @@ fn restore_userspace_context_arm64(thread_id: u64, frame: &mut Aarch64ExceptionF

// Restore saved context
crate::task::scheduler::with_thread_mut(thread_id, |thread| {
// Restore X0 - important for fork() return value
// For forked children, x0 is set to 0; for parent, it will be the child PID
// Restore ALL general-purpose registers
// CRITICAL: For forked children, the caller-saved registers (x1-x18) contain
// important values from the parent's execution state that must be preserved.
// Only restoring callee-saved registers (x19-x30) would leave x1-x18 with
// garbage values from the previous thread's exception frame, causing crashes.
frame.x0 = thread.context.x0;

// Restore callee-saved registers
frame.x1 = thread.context.x1;
frame.x2 = thread.context.x2;
frame.x3 = thread.context.x3;
frame.x4 = thread.context.x4;
frame.x5 = thread.context.x5;
frame.x6 = thread.context.x6;
frame.x7 = thread.context.x7;
frame.x8 = thread.context.x8;
frame.x9 = thread.context.x9;
frame.x10 = thread.context.x10;
frame.x11 = thread.context.x11;
frame.x12 = thread.context.x12;
frame.x13 = thread.context.x13;
frame.x14 = thread.context.x14;
frame.x15 = thread.context.x15;
frame.x16 = thread.context.x16;
frame.x17 = thread.context.x17;
frame.x18 = thread.context.x18;
frame.x19 = thread.context.x19;
frame.x20 = thread.context.x20;
frame.x21 = thread.context.x21;
Expand Down
38 changes: 10 additions & 28 deletions kernel/src/arch_impl/aarch64/syscall_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -815,36 +815,18 @@ fn sys_gettid() -> u64 {
crate::task::scheduler::current_thread_id().unwrap_or(0)
}

/// sys_clock_gettime implementation - uses architecture-independent time module
/// sys_clock_gettime implementation - delegates to shared syscall/time.rs
fn sys_clock_gettime(clock_id: u32, user_timespec_ptr: *mut Timespec) -> u64 {
// Validate pointer
if user_timespec_ptr.is_null() {
return (-14_i64) as u64; // -EFAULT
// Use the shared implementation which properly uses copy_to_user
// This is critical for CoW (Copy-on-Write) support - writing directly
// to userspace memory would fail on CoW pages in forked children.
match crate::syscall::time::sys_clock_gettime(
clock_id,
user_timespec_ptr as *mut crate::syscall::time::Timespec,
) {
crate::syscall::SyscallResult::Ok(v) => v,
crate::syscall::SyscallResult::Err(e) => (-(e as i64)) as u64,
}

// Get time from arch-agnostic time module
let (tv_sec, tv_nsec) = match clock_id {
0 => {
// CLOCK_REALTIME
crate::time::get_real_time_ns()
}
1 => {
// CLOCK_MONOTONIC
let (secs, nanos) = crate::time::get_monotonic_time_ns();
(secs as i64, nanos as i64)
}
_ => {
return (-22_i64) as u64; // -EINVAL
}
};

// Write to userspace
unsafe {
(*user_timespec_ptr).tv_sec = tv_sec;
(*user_timespec_ptr).tv_nsec = tv_nsec;
}

0
}

// =============================================================================
Expand Down
10 changes: 7 additions & 3 deletions kernel/src/ipc/fd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,10 @@ impl Clone for FdTable {
// Increment TCP connection reference count for the clone
crate::net::tcp::tcp_add_ref(conn_id);
}
FdKind::TcpListener(port) => {
// Increment TCP listener reference count for the clone
crate::net::tcp::tcp_listener_ref_inc(*port);
}
_ => {}
}
}
Expand Down Expand Up @@ -524,9 +528,9 @@ impl Drop for FdTable {
log::debug!("FdTable::drop() - releasing TCP socket fd {}", i);
}
FdKind::TcpListener(port) => {
// Remove from listener table
crate::net::tcp::TCP_LISTENERS.lock().remove(&port);
log::debug!("FdTable::drop() - closed TCP listener fd {} on port {}", i, port);
// Decrement ref count, remove only if it reaches 0
crate::net::tcp::tcp_listener_ref_dec(port);
log::debug!("FdTable::drop() - released TCP listener fd {} on port {}", i, port);
}
FdKind::TcpConnection(conn_id) => {
// Close the TCP connection
Expand Down
5 changes: 0 additions & 5 deletions kernel/src/net/ipv4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ pub const IPV4_HEADER_MIN_SIZE: usize = 20;
pub const PROTOCOL_ICMP: u8 = 1;

/// IPv4 protocol number for TCP
#[allow(dead_code)]
pub const PROTOCOL_TCP: u8 = 6;

/// IPv4 protocol number for UDP
#[allow(dead_code)]
pub const PROTOCOL_UDP: u8 = 17;

/// Default TTL for outgoing packets
Expand Down Expand Up @@ -174,16 +172,13 @@ pub fn handle_ipv4(eth_frame: &EthernetFrame, ip: &Ipv4Packet) {
icmp::handle_icmp(eth_frame, ip, &icmp_packet);
}
}
#[cfg(target_arch = "x86_64")]
PROTOCOL_TCP => {
super::tcp::handle_tcp(ip, ip.payload);
}
#[cfg(target_arch = "x86_64")]
PROTOCOL_UDP => {
super::udp::handle_udp(ip, ip.payload);
}
_ => {
#[cfg(target_arch = "x86_64")]
log::debug!("IPv4: Unknown protocol {}", ip.protocol);
}
}
Expand Down
30 changes: 30 additions & 0 deletions kernel/src/net/tcp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,9 @@ pub struct ListenSocket {
pub owner_pid: crate::process::process::ProcessId,
/// Threads waiting for incoming connections (accept() blocking)
pub waiting_threads: Mutex<Vec<u64>>,
/// Reference count - number of fds pointing to this listener (for fork support)
/// When this reaches 0, the listener is removed from TCP_LISTENERS
pub ref_count: core::sync::atomic::AtomicUsize,
}

/// Global TCP connection table
Expand Down Expand Up @@ -901,6 +904,7 @@ pub fn tcp_listen(
pending: VecDeque::new(),
owner_pid,
waiting_threads: Mutex::new(Vec::new()),
ref_count: core::sync::atomic::AtomicUsize::new(1),
});

log::info!("TCP: Listening on port {}", local_port);
Expand Down Expand Up @@ -1300,3 +1304,29 @@ fn wake_connection_waiters(conn: &TcpConnection) {
log::debug!("TCP: Woke {} connection waiters", readers.len());
}
}

/// Increment the reference count for a TCP listener (called when fd is duplicated via fork)
pub fn tcp_listener_ref_inc(port: u16) {
let listeners = TCP_LISTENERS.lock();
if let Some(listener) = listeners.get(&port) {
let old = listener.ref_count.fetch_add(1, core::sync::atomic::Ordering::SeqCst);
log::debug!("TCP: Listener port {} ref_count {} -> {}", port, old, old + 1);
}
}

/// Decrement the reference count for a TCP listener and remove if it reaches 0
/// Returns true if the listener was removed, false otherwise
pub fn tcp_listener_ref_dec(port: u16) -> bool {
let mut listeners = TCP_LISTENERS.lock();
if let Some(listener) = listeners.get(&port) {
let old = listener.ref_count.fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
log::debug!("TCP: Listener port {} ref_count {} -> {}", port, old, old - 1);
if old == 1 {
// Reference count reached 0, remove the listener
listeners.remove(&port);
log::info!("TCP: Removed listener on port {} (ref_count reached 0)", port);
return true;
}
}
false
}
12 changes: 6 additions & 6 deletions kernel/src/process/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,9 @@ impl Process {
log::debug!("Process::close_all_fds() - released TCP socket fd {}", fd);
}
FdKind::TcpListener(port) => {
// Remove from listener table
crate::net::tcp::TCP_LISTENERS.lock().remove(&port);
log::debug!("Process::close_all_fds() - closed TCP listener fd {} on port {}", fd, port);
// Decrement ref count, remove only if it reaches 0
crate::net::tcp::tcp_listener_ref_dec(port);
log::debug!("Process::close_all_fds() - released TCP listener fd {} on port {}", fd, port);
}
FdKind::TcpConnection(conn_id) => {
// Close the TCP connection
Expand Down Expand Up @@ -416,9 +416,9 @@ impl Process {
log::debug!("Process::close_all_fds() - closed TCP socket fd {}", fd);
}
FdKind::TcpListener(port) => {
// Remove from listener table
crate::net::tcp::TCP_LISTENERS.lock().remove(&port);
log::debug!("Process::close_all_fds() - closed TCP listener fd {} port {}", fd, port);
// Decrement ref count, remove only if it reaches 0
crate::net::tcp::tcp_listener_ref_dec(port);
log::debug!("Process::close_all_fds() - released TCP listener fd {} port {}", fd, port);
}
FdKind::TcpConnection(conn_id) => {
// Close TCP connection
Expand Down
16 changes: 12 additions & 4 deletions kernel/src/syscall/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -864,17 +864,18 @@ pub fn sys_read(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult {
SyscallResult::Err(super::errno::ENOTCONN as u64)
}
FdKind::TcpConnection(conn_id) => {
// Read from TCP connection with blocking
// Clone conn_id so we can drop the manager_guard before blocking
// Read from TCP connection with blocking/non-blocking support
// Clone conn_id and capture flags before dropping manager_guard
let conn_id = *conn_id;
let is_nonblocking = (fd_entry.status_flags & crate::ipc::fd::status_flags::O_NONBLOCK) != 0;
drop(manager_guard);

// Drain loopback queue for localhost connections (127.x.x.x, own IP).
crate::net::drain_loopback_queue();

let mut user_buf = alloc::vec![0u8; count as usize];

// Blocking read loop
// Read loop (may block if O_NONBLOCK not set)
loop {
// Register as waiter FIRST to avoid race condition
crate::net::tcp::tcp_register_recv_waiter(&conn_id, thread_id);
Expand All @@ -899,7 +900,14 @@ pub fn sys_read(fd: u64, buf_ptr: u64, count: u64) -> SyscallResult {
return SyscallResult::Ok(0);
}
Err(_) => {
// No data available - block
// No data available
if is_nonblocking {
// O_NONBLOCK set: return EAGAIN immediately
crate::net::tcp::tcp_unregister_recv_waiter(&conn_id, thread_id);
log::debug!("sys_read: TCP no data, O_NONBLOCK set - returning EAGAIN");
return SyscallResult::Err(super::errno::EAGAIN as u64);
}
// Will block below
}
_ => unreachable!(),
}
Expand Down
Loading
Loading