Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#88] Draft of foundations for clone3() syscall & friends #115

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ arrayvec = { version = "0.7.2", default-features = false }
hashbrown = { version = "0.11.2", features = ["nightly"] }
crossbeam = { version = "0.8.1", default-features = false, features = ["alloc"] }
atomic_refcell = "0.1.6"
simple_endian = { git = "https://github.com/michalfita/simple-endian-rs", branch ="bugfix/4/fix-no_std-support-right-way", default-features = false, features = ["big_endian", "little_endian", "byte_impls", "integer_impls", "comparisons", "format"] }

# Arch-specific dependencies.
x86 = "0.43.0"
Expand Down
112 changes: 110 additions & 2 deletions kernel/process/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use alloc::collections::BTreeMap;
use alloc::sync::{Arc, Weak};
use alloc::vec::Vec;
use atomic_refcell::{AtomicRef, AtomicRefCell};
use core::cmp::max;
use core::{cmp::max, ops::RangeBounds};
use core::mem::size_of;
use core::sync::atomic::{AtomicI32, Ordering};
use crossbeam::atomic::AtomicCell;
Expand All @@ -37,8 +37,8 @@ use kerla_runtime::{
spinlock::{SpinLock, SpinLockGuard},
};
use kerla_utils::{alignment::align_up, bitmap::BitMap};

use super::signal::SigSet;
use crate::syscalls::clone3::CloneFlags;

type ProcessTable = BTreeMap<PId, Arc<Process>>;

Expand Down Expand Up @@ -490,6 +490,114 @@ impl Process {
SCHEDULER.lock().enqueue(pid);
Ok(child)
}

/// Creates a new process or thread. The calling process (`self`) will be the parent
/// process of the created process or thread. Returns the created child process.
pub fn clone(parent: &Arc<Process>,
parent_frame: &PtRegs,
flags: CloneFlags,
exit_signal: usize
) -> Result<Arc<Process>> {
let mut process_table = PROCESSES.lock();
let pid = alloc_pid(&mut process_table)?;
let process_group = parent.process_group();
let arch = parent.arch.fork(parent_frame)?;
let sigset = parent.sigset.lock(); // TODO: requires understanding

let vm = if flags.contains(CloneFlags::CLONE_VM) { // set if VM shared between processes
parent.vm.clone()
} else {
let forked_vm = parent.vm().as_ref().unwrap().lock().fork()?;
AtomicRefCell::new(Some(Arc::new(SpinLock::new(forked_vm))))
};
let root_fs = if flags.contains(CloneFlags::CLONE_FS) { // set if fs info shared between processes
parent.root_fs.clone()
} else {
parent.root_fs.clone() // TODO #105: if flags isn't set we need copy of the FS info, not another reference to it!
};
let opened_files = if flags.contains(CloneFlags::CLONE_FILES){ // set if open files shared between processes
SpinLock::new(parent.opened_files().lock().fork()) // TODO #106: we need to share opened files table
} else {
SpinLock::new(parent.opened_files().lock().fork()) // This one's good - it copies
};
let signals = if flags.contains(CloneFlags::CLONE_SIGHAND) { // set if signal handlers and blocked signals shared
SpinLock::new(SignalDelivery::new()) // TODO #107: we need to share the table of signal handlers if requested
} else {
SpinLock::new(SignalDelivery::new())
};
// TODO: semantics of CloneFlags::CLONE_PIDFD to be understand (since Linux 5.2) // set if a pidfd should be placed in parent

// TODO: the flag below cannot be supported, as Kerla doesn't have tracing yet
//CloneFlags::CLONE_PTRACE // set if we want to let tracing continue on the child too

// TODO: more research required how to handle vfork()
//CloneFlags::CLONE_VFORK // set if the parent wants the child to wake it up on mm_release

let parent_weak = if flags.contains(CloneFlags::CLONE_PARENT) { // set if we want to have the same parent as the cloner
parent.parent.clone() // TODO: add protection against doing that for the init process to prevent multiple process trees, consider throwing error
} else {
Arc::downgrade(parent)
};

// TODO: the flag below cannot work as support of thread groups require proper handling of PIDs and TIDs, what Kerla doesn't do yet
// CloneFlags::CLONE_THREAD // Same thread group?

// TODO: the flag below cannot be supported, as Kerla doesn't have namespaces yet
//CloneFlags::CLONE_NEWNS // New mount namespace group

// TODO: the flag below cannot be supported, as Kerla doesn't have SYS V semaphores yet
//CloneFlags::CLONE_SYSVSEM // share system V SEM_UNDO semantics

// TODO: the flag below cannot be supported, as Kerla doesn't handle thread local storage yet
//CloneFlags::CLONE_SETTLS // create a new TLS for the child

if flags.contains(CloneFlags::CLONE_PARENT_SETTID) { // set the TID in the parent
// TODO: Deal with propagation of the parent's TID upwards! (Tricky, we don't have notion of TIDs withing thread group yet)
};
if flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) { // clear the TID in the child
// TODO: Deal with clearing of the TID upwards! Deal with futex wake up at the given address.
};
// CloneFlags::CLONE_DETACHED // Unused, ignored

// TODO when tracing supported: CloneFlags::CLONE_UNTRACED // set if the tracing process can't force CLONE_PTRACE on this clone

if flags.contains(CloneFlags::CLONE_CHILD_SETTID) { // set the TID in the child
// TODO: Deal with propagation of the child's TID upwards! (Tricky, we don't have notion of TIDs withing thread group yet)
};

// TODO: flags below cannot be supported, as Kerla doesn't have namespaces yet
// CloneFlags::CLONE_NEWCGROUP // New cgroup namespace
// CloneFlags::CLONE_NEWUTS // New utsname namespace
// CloneFlags::CLONE_NEWIPC // New ipc namespace
// CloneFlags::CLONE_NEWUSER // New user namespace
// CloneFlags::CLONE_NEWPID // New pid namespace
// CloneFlags::CLONE_NEWNET // New network namespace

// TODO: the flag bellow cannot be supported, as Kerla doesn't have IO scheduler yet
// CloneFlags::CLONE_IO // Clone io context

let child = Arc::new(Process {
process_group: AtomicRefCell::new(Arc::downgrade(&process_group)),
pid,
state: AtomicCell::new(ProcessState::Runnable),
parent: parent_weak,
cmdline: AtomicRefCell::new(parent.cmdline().clone()),
children: SpinLock::new(Vec::new()),
vm,
opened_files,
root_fs,
arch,
signals,
signaled_frame: AtomicCell::new(None),
sigset: SpinLock::new(sigset.clone()),
});

process_group.lock().add(Arc::downgrade(&child));
parent.children().push(child.clone());
process_table.insert(pid, child.clone());
SCHEDULER.lock().enqueue(pid);
Ok(child)
}
}

impl Drop for Process {
Expand Down
139 changes: 139 additions & 0 deletions kernel/syscalls/clone3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
use crate::prelude::*;
use crate::syscalls::SyscallHandler;
use crate::{
ctypes::*
};
use kerla_runtime::address::UserVAddr;
use bitflags::bitflags;


bitflags! {
/// Flags used by [`clone()`] system call
/// Source: `/usr/include/linux/sched.h`
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please avoid copying from the Linux source code even for a definition like this. Please refer musl instead.

///
/// [`clone()`]: https://linux.die.net/man/2/clone
pub struct CloneFlags : c_uint {
const CLONE_VM = 0x00000100; // set if VM shared between processes
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please avoid adding a comment at the end of the line and add a full stop at the end of the comment.

Suggested change
const CLONE_VM = 0x00000100; // set if VM shared between processes
/// Set if VM shared between processes.
const CLONE_VM = 0x00000100;

const CLONE_FS = 0x00000200; // set if fs info shared between processes
const CLONE_FILES = 0x00000400; // set if open files shared between processes
const CLONE_SIGHAND = 0x00000800; // set if signal handlers and blocked signals shared
const CLONE_PIDFD = 0x00001000; // set if a pidfd should be placed in parent
const CLONE_PTRACE = 0x00002000; // set if we want to let tracing continue on the child too
const CLONE_VFORK = 0x00004000; // set if the parent wants the child to wake it up on mm_release
const CLONE_PARENT = 0x00008000; // set if we want to have the same parent as the cloner
const CLONE_THREAD = 0x00010000; // Same thread group?
const CLONE_NEWNS = 0x00020000; // New mount namespace group
const CLONE_SYSVSEM = 0x00040000; // share system V SEM_UNDO semantics
const CLONE_SETTLS = 0x00080000; // create a new TLS for the child
const CLONE_PARENT_SETTID = 0x00100000; // set the TID in the parent
const CLONE_CHILD_CLEARTID = 0x00200000; // clear the TID in the child
const CLONE_DETACHED = 0x00400000; // Unused, ignored
const CLONE_UNTRACED = 0x00800000; // set if the tracing process can't force CLONE_PTRACE on this clone
const CLONE_CHILD_SETTID = 0x01000000; // set the TID in the child
const CLONE_NEWCGROUP = 0x02000000; // New cgroup namespace
const CLONE_NEWUTS = 0x04000000; // New utsname namespace
const CLONE_NEWIPC = 0x08000000; // New ipc namespace
const CLONE_NEWUSER = 0x10000000; // New user namespace
const CLONE_NEWPID = 0x20000000; // New pid namespace
const CLONE_NEWNET = 0x40000000; // New network namespace
const CLONE_IO = 0x80000000; // Clone io context
}
}

#[repr(C, align(8))]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Why do you need align(8) here?

pub struct CloneArgs {
flags: u64,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Use rustfmt for formatting.

pidfd: u64,
child_tid: u64,
parent_tid: u64,
exit_signal: u64,
stack: u64,
stack_size: u64,
tls: u64,
set_tid: u64,
set_tid_size: u64,
cgroup: u64,
}

/// Internal structure for `clone3()` system call
/// Modeled on `kernel_clone_args` from `include/linux/sched/task.h`
pub(crate) struct KernelCloneArgs {
flags: CloneFlags,
pidfd: Option<UserVAddr>, /// Address to store pidfd into
child_tid: Option<UserVAddr>, /// Address to store child TID into or read futex address
parent_tid: Option<UserVAddr>, /// Address to store parent TID into...
exit_signal: c_int, /// Exit signal; TODO: I'd suggest enum for signal IDs for type safety
stack: UserVAddr,
stack_size: usize,
tls: UserVAddr,
set_tid: Option<UserVAddr>, /// Handles TID array, requires namespace support
set_tid_size: usize, /// Number of elements in set_tid
cgroup: c_int,
io_thread: c_int,
//struct cgroup *cgrp; // TODO: Kerla don't have support for cgroups yet
//struct css_set *cset; // cgroup sets TODO: Kerla don't have support for cgroups yet
}

impl From<CloneArgs> for KernelCloneArgs {
fn from(item: CloneArgs) -> Self {
KernelCloneArgs {
flags: CloneFlags::from_bits_truncate(item.flags as u32),
pidfd: UserVAddr::new_nonnull(item.pidfd as usize).map_or(None, |v| Some(v)),
child_tid: UserVAddr::new_nonnull(item.child_tid as usize).map_or(None, |v| Some(v)),
parent_tid: UserVAddr::new_nonnull(item.parent_tid as usize).map_or(None, |v| Some(v)),
exit_signal: item.exit_signal as c_int, // TODO: we need nicer signal type internally
stack: unsafe { UserVAddr::new_unchecked(item.stack as usize) }, // TODO: this might be a problem - I struggle with distinction which type is output and which is input, damn C ABI
stack_size: item.stack_size as usize,
tls: unsafe { UserVAddr::new_unchecked(item.tls as usize) },
set_tid: UserVAddr::new_nonnull(item.set_tid as usize).map_or(None, |v| Some(v)),
set_tid_size: item.set_tid_size as usize,
cgroup: item.cgroup as i32,
io_thread: 0, // TODO: find out what's that!
}
}
}

impl<'a> SyscallHandler<'a> {
// TODO: place `fork()` here either!
pub fn _sys_fork(&mut self) -> Result<isize> {
// struct kernel_clone_args args = {
// .exit_signal = SIGCHLD,
// };

// return kernel_clone(&args);
Err(Error::new(Errno::ENOSYS))
}

pub fn sys_vfork(&mut self) -> Result<isize> {
// struct kernel_clone_args args = {
// .flags = CLONE_VFORK | CLONE_VM,
// .exit_signal = SIGCHLD,
// };

// return kernel_clone(&args);
Err(Error::new(Errno::ENOSYS))
}

pub fn sys_clone(&mut self, flags: CloneFlags, child_stack: Option<UserVAddr>,
parent_tid: Option<UserVAddr>, child_tid: Option<UserVAddr>,
tls: Option<UserVAddr>,
) -> Result<isize> {
// TODO: implement

if let Some(ptid_vaddr) = parent_tid {
let ptid = 0i32;
ptid_vaddr.write(&ptid);
};

if let Some(ctid_vaddr) = child_tid {
let ctid = 0i32;
ctid_vaddr.write(&ctid);
};

Err(Error::new(Errno::ENOSYS))
}

pub fn sys_clone3(&mut self, uargs: &CloneArgs, size: usize) -> Result<isize> {
Err(Error::new(Errno::ENOSYS))
}
}
17 changes: 17 additions & 0 deletions kernel/syscalls/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub(self) mod chdir;
pub(self) mod chmod;
pub(self) mod clock_gettime;
pub(self) mod close;
pub(crate) mod clone3; // TODO: we have structures and bitfields that may require moving
pub(self) mod connect;
pub(self) mod dup2;
pub(self) mod execve;
Expand Down Expand Up @@ -102,6 +103,7 @@ pub(self) struct IoVec {
len: usize,
}

// TODO: these are valid for x86_64
const SYS_READ: usize = 0;
const SYS_WRITE: usize = 1;
const SYS_OPEN: usize = 2;
Expand Down Expand Up @@ -131,7 +133,9 @@ const SYS_LISTEN: usize = 50;
const SYS_GETSOCKNAME: usize = 51;
const SYS_GETPEERNAME: usize = 52;
const SYS_GETSOCKOPT: usize = 55;
const SYS_CLONE: usize = 56;
const SYS_FORK: usize = 57;
const SYS_VFORK: usize = 58;
const SYS_EXECVE: usize = 59;
const SYS_EXIT: usize = 60;
const SYS_WAIT4: usize = 61;
Expand All @@ -152,6 +156,7 @@ const SYS_SETGID: usize = 106;
const SYS_GETEUID: usize = 107;
const SYS_SETPGID: usize = 109;
const SYS_GETPPID: usize = 110;

const SYS_GETPGID: usize = 121;
const SYS_SETGROUPS: usize = 116;
const SYS_ARCH_PRCTL: usize = 158;
Expand All @@ -163,6 +168,8 @@ const SYS_CLOCK_GETTIME: usize = 228;
const SYS_UTIMES: usize = 235;
const SYS_LINKAT: usize = 265;
const SYS_GETRANDOM: usize = 318;
const SYS_CLONE3: usize = 435;


fn resolve_path(uaddr: usize) -> Result<PathBuf> {
const PATH_MAX: usize = 512;
Expand Down Expand Up @@ -308,7 +315,15 @@ impl<'a> SyscallHandler<'a> {
UserVAddr::new_nonnull(a2)?,
UserVAddr::new_nonnull(a3)?,
),
SYS_CLONE => self.sys_clone(
clone3::CloneFlags::from_bits(a1 as u32).ok_or_else(|| Errno::EINVAL)?, // TODO: check if valid / it may be correct only for some platforms
UserVAddr::new_nonnull(a2).map_or(None, |v| Some(v)),
UserVAddr::new_nonnull(a3).map_or(None, |v| Some(v)),
UserVAddr::new_nonnull(a4).map_or(None, |v| Some(v)),
UserVAddr::new_nonnull(a5).map_or(None, |v| Some(v)),
),
SYS_FORK => self.sys_fork(),
SYS_VFORK => self.sys_vfork(),
SYS_WAIT4 => self.sys_wait4(
PId::new(a1 as i32),
UserVAddr::new(a2),
Expand Down Expand Up @@ -386,6 +401,7 @@ impl<'a> SyscallHandler<'a> {
}

fn syscall_name_by_number(n: usize) -> &'static str {
// TODO: Consider one of crates automatically dealing with enum serialization
match n {
0 => "read",
1 => "write",
Expand Down Expand Up @@ -722,6 +738,7 @@ fn syscall_name_by_number(n: usize) -> &'static str {
332 => "statx",
333 => "io_pgetevents",
334 => "rseq",
435 => "clone3",
_ => "(unknown)",
}
}