From f5b2038871d3441e1c7f32439ff422957e7ab828 Mon Sep 17 00:00:00 2001 From: codeironman <1553389239@qq.com> Date: Thu, 31 Oct 2024 00:50:34 +0800 Subject: [PATCH 1/5] ospp project (feature) add namespace overlayfs cgroup (#949) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## 开发进展: ## namespace - pid_namespace 基本实现,基于pid_struct等数据结构实现隔离 - mnt_namespace 基本实现,挂载点的隔离通过不同的挂载树来实现 - usernamespace 作为支持性的namespace,目前受限实现全局静态 ## overlayfs - 实现若干个文件系统的叠加,在mount中传入多个路径作为多个fs的mount路径以及最后merge层的fs路径 - copy-up机制的,除最上层外其他层为只读层,满足写时拷贝,需要修改的时候copy到上层修改 - whiteout特殊文件,用于标记在下层需要被删除的文件用来掩盖需要删除的文件 ## cgroups - 目前cgroups还处于框架阶段,之后具体实现具体的内存、CPU等子系统 --- docs/index.rst | 2 + docs/kernel/container/index.rst | 13 + docs/kernel/container/namespaces/index.rst | 14 + .../container/namespaces/mnt_namespace.md | 19 + .../container/namespaces/pid_namespace.md | 21 + docs/kernel/filesystem/index.rst | 1 + docs/kernel/filesystem/unionfs/index.rst | 10 + docs/kernel/filesystem/unionfs/overlayfs.md | 26 ++ kernel/crates/ida/src/lib.rs | 5 + kernel/src/arch/riscv64/process/mod.rs | 2 +- kernel/src/arch/x86_64/process/mod.rs | 2 +- kernel/src/cgroup/mem_cgroup.rs | 6 + kernel/src/cgroup/mod.rs | 48 ++ kernel/src/filesystem/mod.rs | 1 + kernel/src/filesystem/overlayfs/copy_up.rs | 41 ++ kernel/src/filesystem/overlayfs/entry.rs | 32 ++ kernel/src/filesystem/overlayfs/mod.rs | 432 ++++++++++++++++++ kernel/src/filesystem/ramfs/mod.rs | 69 +-- kernel/src/filesystem/vfs/mod.rs | 27 +- kernel/src/filesystem/vfs/syscall.rs | 7 +- kernel/src/init/initial_kthread.rs | 3 + kernel/src/lib.rs | 5 +- kernel/src/libs/ida/src/lib.rs | 55 +++ kernel/src/namespaces/mnt_namespace.rs | 206 +++++++++ kernel/src/namespaces/mod.rs | 92 ++++ kernel/src/namespaces/namespace.rs | 119 +++++ kernel/src/namespaces/pid_namespace.rs | 273 +++++++++++ kernel/src/namespaces/syscall.rs | 50 ++ kernel/src/namespaces/ucount.rs | 187 ++++++++ kernel/src/namespaces/user_namespace.rs | 135 ++++++ kernel/src/process/fork.rs | 52 ++- kernel/src/process/mod.rs | 39 +- kernel/src/process/syscall.rs | 8 +- kernel/src/syscall/mod.rs | 6 +- user/apps/test_namespace/Cargo.toml | 7 + user/apps/test_namespace/Makefile | 56 +++ user/apps/test_namespace/makefile.toml | 63 +++ user/apps/test_namespace/src/main.rs | 38 ++ user/apps/test_overlayfs/.gitignore | 1 + user/apps/test_overlayfs/Makefile | 20 + user/apps/test_overlayfs/main.c | 92 ++++ user/dadk/config/test_namespace.dadk | 25 + user/dadk/config/test_overlayfs.dadk | 25 + 43 files changed, 2279 insertions(+), 56 deletions(-) create mode 100644 docs/kernel/container/index.rst create mode 100644 docs/kernel/container/namespaces/index.rst create mode 100644 docs/kernel/container/namespaces/mnt_namespace.md create mode 100644 docs/kernel/container/namespaces/pid_namespace.md create mode 100644 docs/kernel/filesystem/unionfs/index.rst create mode 100644 docs/kernel/filesystem/unionfs/overlayfs.md create mode 100644 kernel/src/cgroup/mem_cgroup.rs create mode 100644 kernel/src/cgroup/mod.rs create mode 100644 kernel/src/filesystem/overlayfs/copy_up.rs create mode 100644 kernel/src/filesystem/overlayfs/entry.rs create mode 100644 kernel/src/filesystem/overlayfs/mod.rs create mode 100644 kernel/src/libs/ida/src/lib.rs create mode 100644 kernel/src/namespaces/mnt_namespace.rs create mode 100644 kernel/src/namespaces/mod.rs create mode 100644 kernel/src/namespaces/namespace.rs create mode 100644 kernel/src/namespaces/pid_namespace.rs create mode 100644 kernel/src/namespaces/syscall.rs create mode 100644 kernel/src/namespaces/ucount.rs create mode 100644 kernel/src/namespaces/user_namespace.rs create mode 100644 user/apps/test_namespace/Cargo.toml create mode 100644 user/apps/test_namespace/Makefile create mode 100644 user/apps/test_namespace/makefile.toml create mode 100644 user/apps/test_namespace/src/main.rs create mode 100644 user/apps/test_overlayfs/.gitignore create mode 100644 user/apps/test_overlayfs/Makefile create mode 100644 user/apps/test_overlayfs/main.c create mode 100644 user/dadk/config/test_namespace.dadk create mode 100644 user/dadk/config/test_overlayfs.dadk diff --git a/docs/index.rst b/docs/index.rst index 5d2aa82b3..a0d9c869f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,10 +30,12 @@ kernel/debug/index kernel/ktest/index kernel/cpu_arch/index + kernel/container/index kernel/libs/index kernel/trace/index + .. toctree:: :maxdepth: 1 :caption: 应用层 diff --git a/docs/kernel/container/index.rst b/docs/kernel/container/index.rst new file mode 100644 index 000000000..0a5709d3a --- /dev/null +++ b/docs/kernel/container/index.rst @@ -0,0 +1,13 @@ +==================================== +容器化 +==================================== + + 这里是DragonOS中,与容器化相关的说明文档。 + + 主要包括namespace,overlayfs和cgroup + +.. toctree:: + :maxdepth: 2 + + namespaces/index + filesystem/unionfs/index diff --git a/docs/kernel/container/namespaces/index.rst b/docs/kernel/container/namespaces/index.rst new file mode 100644 index 000000000..2cfe02100 --- /dev/null +++ b/docs/kernel/container/namespaces/index.rst @@ -0,0 +1,14 @@ +==================================== +名称空间 +==================================== + +DragonOS的namespaces目前支持pid_namespace和mnt_namespace 预计之后会继续完善 +namespace是容器化实现过程中的重要组成部分 + +由于目前os是单用户,user_namespace为全局静态 + +.. toctree:: + :maxdepth: 1 + + pid_namespace + mnt_namespace diff --git a/docs/kernel/container/namespaces/mnt_namespace.md b/docs/kernel/container/namespaces/mnt_namespace.md new file mode 100644 index 000000000..dd828adfb --- /dev/null +++ b/docs/kernel/container/namespaces/mnt_namespace.md @@ -0,0 +1,19 @@ +# 挂载名称空间 + +## 底层架构 + +pcb -> nsproxy -> mnt_namespace + +每一个挂载文件系统都有自立独立的挂载点,表现在数据结构上是一个挂载的红黑树,每一个名称空间中挂载是独立的,所以文件系统的挂载和卸载不会影响别的 + +## 系统调用接口 + + +- clone + - CLONE_NEWNS用于创建一个新的 MNT 命名空间。提供独立的文件系统挂载点 +- unshare + - 使用 CLONE_NEWPID 标志调用 unshare() 后,后续创建的所有子进程都将在新的命名空间中运行。 +- setns + - 将进程加入到指定的名称空间 +- chroot + - 将当前进程的根目录更改为指定的路径,提供文件系统隔离。 \ No newline at end of file diff --git a/docs/kernel/container/namespaces/pid_namespace.md b/docs/kernel/container/namespaces/pid_namespace.md new file mode 100644 index 000000000..4e9210cc0 --- /dev/null +++ b/docs/kernel/container/namespaces/pid_namespace.md @@ -0,0 +1,21 @@ +# 进程名称空间 +:::{note} 本文作者:操丰毅 1553389239@qq.com + +2024年10月30日 ::: +pid_namespace 是内核中的一种名称空间,用于实现进程隔离,允许在不同的名称空间中运行的进程有独立的pid试图 + +## 底层架构 + +pcb -> nsproxy -> pid_namespace +- pid_namespace 内有独立的一套进程分配器,以及孤儿进程回收器,独立管理内部的pid +- 不同进程的详细信息都存放在proc文件系统中,里面的找到对应的pid号里面的信息都在pid中,记录的是pid_namespace中的信息 +- pid_namespace等限制由ucount来控制管理 + +## 系统调用接口 + +- clone + - CLONE_NEWPID用于创建一个新的 PID 命名空间。使用这个标志时,子进程将在新的 PID 命名空间内运行,进程 ID 从 1 开始。 +- unshare + - 使用 CLONE_NEWPID 标志调用 unshare() 后,后续创建的所有子进程都将在新的命名空间中运行。 +- getpid + - 在命名空间中调用 getpid() 会返回进程在当前 PID 命名空间中的进程 ID \ No newline at end of file diff --git a/docs/kernel/filesystem/index.rst b/docs/kernel/filesystem/index.rst index ff0c01b01..ef81b8ddb 100644 --- a/docs/kernel/filesystem/index.rst +++ b/docs/kernel/filesystem/index.rst @@ -13,4 +13,5 @@ todo: 由于文件系统模块重构,文档暂时不可用,预计在2023年4 vfs/index sysfs kernfs + unionfs/index diff --git a/docs/kernel/filesystem/unionfs/index.rst b/docs/kernel/filesystem/unionfs/index.rst new file mode 100644 index 000000000..141fb7a29 --- /dev/null +++ b/docs/kernel/filesystem/unionfs/index.rst @@ -0,0 +1,10 @@ +==================================== +联合文件系统 +==================================== +Union Filesystem: +OverlayFS 将多个文件系统(称为“层”)合并为一个逻辑文件系统,使用户看到一个统一的目录结构。 + +.. toctree:: + :maxdepth: 1 + + overlayfs diff --git a/docs/kernel/filesystem/unionfs/overlayfs.md b/docs/kernel/filesystem/unionfs/overlayfs.md new file mode 100644 index 000000000..33ae97bd4 --- /dev/null +++ b/docs/kernel/filesystem/unionfs/overlayfs.md @@ -0,0 +1,26 @@ +# overlayfs + +OverlayFs是目前使用最多的联合文件系统,原理简单方便使用,主要用于容器中 +在 Docker 中,OverlayFS 是默认的存储驱动之一。Docker 为每个容器创建一个独立的上层目录,而所有容器共享同一个下层镜像文件。这样的设计使得容器之间的资源共享更加高效,同时减少了存储需求。 +## 架构设计 +overlayfs主要有两个层,以及一个虚拟的合并层 +- Lower Layer(下层):通常是 只读 文件系统。可以包含多层。 +- Upper Layer(上层):为 可写层,所有的写操作都会在这一层上进行。 +- Merged Layer(合并层):上层和下层的逻辑视图合并后,向用户呈现的最终文件系统。 + + +## 工作原理 +- 读取操作: + - OverlayFS 会优先从 Upper Layer 读取文件。如果文件不存在于上层,则读取 Lower Layer 中的内容。 +- 写入操作: + - 如果一个文件位于 Lower Layer 中,并尝试写入该文件,系统会将其 copy-up 到 Upper Layer 并在上层写入。如果文件已经存在于 Upper Layer,则直接在该层写入。 +- 删除操作: + - 当删除文件时,OverlayFS 会在上层创建一个标记为 whiteout 的条目,这会隐藏下层的文件。 + +## Copy-up +- 写时拷贝 +当一个文件从 下层 被修改时,它会被复制到 上层(称为 copy-up)。之后的所有修改都会发生在上层的文件副本上。 + + +## 实现逻辑 +通过构建ovlInode来实现indexnode这个trait来代表上层或者下层的inode,具体的有关文件文件夹的操作都在 \ No newline at end of file diff --git a/kernel/crates/ida/src/lib.rs b/kernel/crates/ida/src/lib.rs index 5d0d85a79..d9676bc92 100644 --- a/kernel/crates/ida/src/lib.rs +++ b/kernel/crates/ida/src/lib.rs @@ -140,6 +140,11 @@ impl IdAllocator { pub fn used(&self) -> usize { self.used } + + /// 返回最大id数 + pub fn get_max_id(&self) -> usize { + self.max_id + } } impl core::fmt::Debug for IdAllocator { diff --git a/kernel/src/arch/riscv64/process/mod.rs b/kernel/src/arch/riscv64/process/mod.rs index 89ec982d3..f7d4e2258 100644 --- a/kernel/src/arch/riscv64/process/mod.rs +++ b/kernel/src/arch/riscv64/process/mod.rs @@ -104,7 +104,7 @@ impl ProcessManager { pub fn copy_thread( current_pcb: &Arc, new_pcb: &Arc, - clone_args: KernelCloneArgs, + clone_args: &KernelCloneArgs, current_trapframe: &TrapFrame, ) -> Result<(), SystemError> { let clone_flags = clone_args.flags; diff --git a/kernel/src/arch/x86_64/process/mod.rs b/kernel/src/arch/x86_64/process/mod.rs index c4382cdc7..be2f53387 100644 --- a/kernel/src/arch/x86_64/process/mod.rs +++ b/kernel/src/arch/x86_64/process/mod.rs @@ -299,7 +299,7 @@ impl ProcessManager { pub fn copy_thread( current_pcb: &Arc, new_pcb: &Arc, - clone_args: KernelCloneArgs, + clone_args: &KernelCloneArgs, current_trapframe: &TrapFrame, ) -> Result<(), SystemError> { let clone_flags = clone_args.flags; diff --git a/kernel/src/cgroup/mem_cgroup.rs b/kernel/src/cgroup/mem_cgroup.rs new file mode 100644 index 000000000..1d1381929 --- /dev/null +++ b/kernel/src/cgroup/mem_cgroup.rs @@ -0,0 +1,6 @@ +use super::CgroupSubsysState; + +struct MemCgroup { + css: CgroupSubsysState, + id: u32, +} diff --git a/kernel/src/cgroup/mod.rs b/kernel/src/cgroup/mod.rs new file mode 100644 index 000000000..baed2f199 --- /dev/null +++ b/kernel/src/cgroup/mod.rs @@ -0,0 +1,48 @@ +#![allow(dead_code, unused_variables, unused_imports)] +pub mod mem_cgroup; + +use alloc::{collections::LinkedList, rc::Weak, sync::Arc, vec::Vec}; + +use alloc::boxed::Box; + +use crate::filesystem::vfs::IndexNode; + +pub struct Cgroup { + css: Weak, + /// 当前所在的深度 + level: u32, + /// 支持的最大深度 + max_depth: u32, + /// 可见后代数量 + nr_descendants: u32, + /// 正在死亡后代数量 + nr_dying_descendants: u32, + /// 允许的最大后代数量 + max_descendants: u32, + /// css_set的数量 + nr_populated_csets: u32, + /// 子group中有任务的记数 + nr_populated_domain_children: u32, + /// 线程子group中有任务的记数 + nr_populated_threaded_children: u32, + /// 活跃线程子cgroup数量 + nr_threaded_children: u32, + /// 关联cgroup的inode + kernfs_node: Box, +} + +/// 控制资源的统计信息 +pub struct CgroupSubsysState { + cgroup: Arc, + /// 兄弟节点 + sibling: LinkedList>, + /// 孩子节点 + children: LinkedList>, +} + +pub struct CgroupSubsys {} + +/// cgroup_sub_state 的集合 +pub struct CssSet { + subsys: Vec>, +} diff --git a/kernel/src/filesystem/mod.rs b/kernel/src/filesystem/mod.rs index 90dcc51bf..772d21f41 100644 --- a/kernel/src/filesystem/mod.rs +++ b/kernel/src/filesystem/mod.rs @@ -4,6 +4,7 @@ pub mod eventfd; pub mod fat; pub mod kernfs; pub mod mbr; +pub mod overlayfs; pub mod procfs; pub mod ramfs; pub mod sysfs; diff --git a/kernel/src/filesystem/overlayfs/copy_up.rs b/kernel/src/filesystem/overlayfs/copy_up.rs new file mode 100644 index 000000000..80b94aefa --- /dev/null +++ b/kernel/src/filesystem/overlayfs/copy_up.rs @@ -0,0 +1,41 @@ +use super::OvlInode; +use crate::{ + filesystem::vfs::{IndexNode, Metadata}, + libs::spinlock::SpinLock, +}; +use alloc::sync::Arc; +use system_error::SystemError; + +impl OvlInode { + pub fn copy_up(&self) -> Result<(), SystemError> { + let mut upper_inode = self.upper_inode.lock(); + if upper_inode.is_some() { + return Ok(()); + } + + let lower_inode = self.lower_inode.as_ref().ok_or(SystemError::ENOENT)?; + + let metadata = lower_inode.metadata()?; + let new_upper_inode = self.create_upper_inode(metadata.clone())?; + + let mut buffer = vec![0u8; metadata.size as usize]; + let lock = SpinLock::new(crate::filesystem::vfs::FilePrivateData::Unused); + lower_inode.read_at(0, metadata.size as usize, &mut buffer, lock.lock())?; + + new_upper_inode.write_at(0, metadata.size as usize, &buffer, lock.lock())?; + + *upper_inode = Some(new_upper_inode); + + Ok(()) + } + + fn create_upper_inode(&self, metadata: Metadata) -> Result, SystemError> { + let upper_inode = self.upper_inode.lock(); + let upper_root_inode = upper_inode + .as_ref() + .ok_or(SystemError::ENOSYS)? + .fs() + .root_inode(); + upper_root_inode.create_with_data(&self.dname()?.0, metadata.file_type, metadata.mode, 0) + } +} diff --git a/kernel/src/filesystem/overlayfs/entry.rs b/kernel/src/filesystem/overlayfs/entry.rs new file mode 100644 index 000000000..d6d69c87d --- /dev/null +++ b/kernel/src/filesystem/overlayfs/entry.rs @@ -0,0 +1,32 @@ +use alloc::sync::Arc; + +use alloc::vec::Vec; + +use crate::filesystem::vfs::IndexNode; + +use super::{OvlInode, OvlSuperBlock}; +#[derive(Debug)] +pub struct OvlEntry { + numlower: usize, // 下层数量 + lowerstack: Vec, +} + +impl OvlEntry { + pub fn new() -> Self { + Self { + numlower: 2, + lowerstack: Vec::new(), + } + } +} +#[derive(Debug)] +pub struct OvlPath { + layer: Arc, + inode: Arc, +} +#[derive(Debug)] +pub struct OvlLayer { + pub mnt: Arc, // 挂载点 + pub index: u32, // 0 是上层读写层,>0 是下层只读层 + pub fsid: u32, // 文件系统标识符 +} diff --git a/kernel/src/filesystem/overlayfs/mod.rs b/kernel/src/filesystem/overlayfs/mod.rs new file mode 100644 index 000000000..244217e32 --- /dev/null +++ b/kernel/src/filesystem/overlayfs/mod.rs @@ -0,0 +1,432 @@ +#![allow(dead_code, unused_variables, unused_imports)] +pub mod copy_up; +pub mod entry; + +use super::ramfs::{LockedRamFSInode, RamFSInode}; +use super::vfs::{self, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock}; +use super::vfs::{FSMAKER, ROOT_INODE}; +use crate::driver::base::device::device_number::DeviceNumber; +use crate::driver::base::device::device_number::Major; +use crate::filesystem::vfs::{FileSystemMaker, FileSystemMakerData}; +use crate::libs::spinlock::SpinLock; +use alloc::string::String; +use alloc::sync::Arc; +use alloc::sync::Weak; +use alloc::vec::Vec; +use entry::{OvlEntry, OvlLayer}; +use system_error::SystemError; + +const WHITEOUT_MODE: u64 = 0o020000 | 0o600; // whiteout字符设备文件模式与权限 +const WHITEOUT_DEV: DeviceNumber = DeviceNumber::new(Major::UNNAMED_MAJOR, 0); // Whiteout 文件设备号 +const WHITEOUT_FLAG: u64 = 0x1; + +#[distributed_slice(FSMAKER)] +static OVERLAYFSMAKER: FileSystemMaker = FileSystemMaker::new( + "overlay", + &(OverlayFS::make_overlayfs + as fn( + Option<&dyn FileSystemMakerData>, + ) -> Result, SystemError>), +); +#[derive(Debug)] +pub struct OverlayMountData { + upper_dir: String, + lower_dirs: Vec, + work_dir: String, +} + +impl OverlayMountData { + pub fn from_row(raw_data: *const u8) -> Result { + if raw_data.is_null() { + return Err(SystemError::EINVAL); + } + let len = (0..) + .find(|&i| unsafe { raw_data.add(i).read() } == 0) + .ok_or(SystemError::EINVAL)?; + let slice = unsafe { core::slice::from_raw_parts(raw_data, len) }; + let raw_str = core::str::from_utf8(slice).map_err(|_| SystemError::EINVAL)?; + let mut data = OverlayMountData { + upper_dir: String::new(), + lower_dirs: Vec::new(), + work_dir: String::new(), + }; + + for pair in raw_str.split(',') { + let mut parts = pair.split('='); + let key = parts.next().ok_or(SystemError::EINVAL)?; + let value = parts.next().ok_or(SystemError::EINVAL)?; + + match key { + "upperdir" => data.upper_dir = value.into(), + "lowerdir" => data.lower_dirs = value.split(':').map(|s| s.into()).collect(), + "workdir" => data.work_dir = value.into(), + _ => return Err(SystemError::EINVAL), + } + } + Ok(data) + } +} +impl FileSystemMakerData for OverlayMountData { + fn as_any(&self) -> &dyn core::any::Any { + self + } +} +#[derive(Debug)] +pub struct OvlSuperBlock { + super_block: SuperBlock, + pseudo_dev: DeviceNumber, // 虚拟设备号 + is_lower: bool, +} + +#[derive(Debug)] +struct OverlayFS { + numlayer: usize, + numfs: u32, + numdatalayer: usize, + layers: Vec, // 第0层为读写层,后面是只读层 + workdir: Arc, + root_inode: Arc, +} + +#[derive(Debug)] +struct OvlInode { + redirect: String, // 重定向路径 + file_type: FileType, + flags: SpinLock, + upper_inode: SpinLock>>, // 读写层 + lower_inode: Option>, // 只读层 + oe: Arc, + fs: Weak, +} +impl OvlInode { + pub fn new( + redirect: String, + upper: Option>, + lower_inode: Option>, + ) -> Self { + Self { + redirect, + file_type: FileType::Dir, + flags: SpinLock::new(0), + upper_inode: SpinLock::new(upper), + lower_inode, + oe: Arc::new(OvlEntry::new()), + fs: Weak::default(), + } + } +} + +impl FileSystem for OverlayFS { + fn root_inode(&self) -> Arc { + self.root_inode.clone() + } + + fn info(&self) -> vfs::FsInfo { + FsInfo { + blk_dev_id: 0, + max_name_len: 255, + } + } + + fn as_any_ref(&self) -> &dyn core::any::Any { + self + } + + fn name(&self) -> &str { + "overlayfs" + } + + fn super_block(&self) -> SuperBlock { + todo!() + } +} + +impl OverlayFS { + pub fn ovl_upper_mnt(&self) -> Arc { + self.layers[0].mnt.clone() + } + pub fn make_overlayfs( + data: Option<&dyn FileSystemMakerData>, + ) -> Result, SystemError> { + let mount_data = data + .and_then(|d| d.as_any().downcast_ref::()) + .ok_or(SystemError::EINVAL)?; + + let upper_inode = ROOT_INODE() + .lookup(&mount_data.upper_dir) + .map_err(|_| SystemError::EINVAL)?; + let upper_layer = OvlLayer { + mnt: Arc::new(OvlInode::new( + mount_data.upper_dir.clone(), + Some(upper_inode), + None, + )), + index: 0, + fsid: 0, + }; + + let lower_layers: Result, SystemError> = mount_data + .lower_dirs + .iter() + .enumerate() + .map(|(i, dir)| { + let lower_inode = ROOT_INODE().lookup(dir).map_err(|_| SystemError::EINVAL)?; // 处理错误 + Ok(OvlLayer { + mnt: Arc::new(OvlInode::new(dir.clone(), None, Some(lower_inode))), + index: (i + 1) as u32, + fsid: (i + 1) as u32, + }) + }) + .collect(); + + let lower_layers = lower_layers?; + + let workdir = Arc::new(OvlInode::new(mount_data.work_dir.clone(), None, None)); + + if lower_layers.is_empty() { + return Err(SystemError::EINVAL); + } + + let mut layers = Vec::new(); + layers.push(upper_layer); + layers.extend(lower_layers); + + let root_inode = layers[0].mnt.clone(); + + let fs = OverlayFS { + numlayer: layers.len(), + numfs: 1, + numdatalayer: layers.len() - 1, + layers, + workdir, + root_inode, + }; + Ok(Arc::new(fs)) + } +} + +impl OvlInode { + pub fn ovl_lower_redirect(&self) -> Option<&str> { + if self.file_type == FileType::File || self.file_type == FileType::Dir { + Some(&self.redirect) + } else { + None + } + } + + pub fn create_whiteout(&self, name: &str) -> Result<(), SystemError> { + let whiteout_mode = vfs::syscall::ModeType::S_IFCHR; + let mut upper_inode = self.upper_inode.lock(); + if let Some(ref upper_inode) = *upper_inode { + upper_inode.mknod(name, whiteout_mode, WHITEOUT_DEV)?; + } else { + let new_inode = self + .fs + .upgrade() + .ok_or(SystemError::EROFS)? + .root_inode() + .create(name, FileType::CharDevice, whiteout_mode)?; + *upper_inode = Some(new_inode); + } + let mut flags = self.flags.lock(); + *flags |= WHITEOUT_FLAG; // 标记为 whiteout + Ok(()) + } + + fn is_whiteout(&self) -> bool { + let flags = self.flags.lock(); + self.file_type == FileType::CharDevice && (*flags & WHITEOUT_FLAG) != 0 + } + + fn has_whiteout(&self, name: &str) -> bool { + let upper_inode = self.upper_inode.lock(); + if let Some(ref upper_inode) = *upper_inode { + if let Ok(inode) = upper_inode.find(name) { + if let Some(ovl_inode) = inode.as_any_ref().downcast_ref::() { + return ovl_inode.is_whiteout(); + } + } + } + false + } +} + +impl IndexNode for OvlInode { + fn read_at( + &self, + offset: usize, + len: usize, + buf: &mut [u8], + data: crate::libs::spinlock::SpinLockGuard, + ) -> Result { + if let Some(ref upper_inode) = *self.upper_inode.lock() { + return upper_inode.read_at(offset, len, buf, data); + } + + if let Some(lower_inode) = &self.lower_inode { + return lower_inode.read_at(offset, len, buf, data); + } + + Err(SystemError::ENOENT) + } + + fn write_at( + &self, + offset: usize, + len: usize, + buf: &[u8], + data: crate::libs::spinlock::SpinLockGuard, + ) -> Result { + if (*self.upper_inode.lock()).is_none() { + self.copy_up()?; + } + if let Some(ref upper_inode) = *self.upper_inode.lock() { + return upper_inode.write_at(offset, len, buf, data); + } + + Err(SystemError::EROFS) + } + + fn fs(&self) -> Arc { + self.fs.upgrade().unwrap() + } + + fn metadata(&self) -> Result { + if let Some(ref upper_inode) = *self.upper_inode.lock() { + return upper_inode.metadata(); + } + + if let Some(ref lower_inode) = self.lower_inode { + return lower_inode.metadata(); + } + Ok(Metadata::default()) + } + + fn as_any_ref(&self) -> &dyn core::any::Any { + self + } + + fn list(&self) -> Result, system_error::SystemError> { + let mut entries: Vec = Vec::new(); + let upper_inode = self.upper_inode.lock(); + if let Some(ref upper_inode) = *upper_inode { + let upper_entries = upper_inode.list()?; + entries.extend(upper_entries); + } + if let Some(lower_inode) = &self.lower_inode { + let lower_entries = lower_inode.list()?; + for entry in lower_entries { + if !entries.contains(&entry) && !self.has_whiteout(&entry) { + entries.push(entry); + } + } + } + + Ok(entries) + } + + fn mkdir( + &self, + name: &str, + mode: vfs::syscall::ModeType, + ) -> Result, system_error::SystemError> { + if let Some(ref upper_inode) = *self.upper_inode.lock() { + upper_inode.mkdir(name, mode) + } else { + Err(SystemError::EROFS) + } + } + + fn rmdir(&self, name: &str) -> Result<(), SystemError> { + let upper_inode = self.upper_inode.lock(); + if let Some(ref upper_inode) = *upper_inode { + upper_inode.rmdir(name)?; + } else if let Some(lower_inode) = &self.lower_inode { + if lower_inode.find(name).is_ok() { + self.create_whiteout(name)?; + } else { + return Err(SystemError::ENOENT); + } + } else { + return Err(SystemError::ENOENT); + } + + Ok(()) + } + + fn unlink(&self, name: &str) -> Result<(), SystemError> { + let upper_inode = self.upper_inode.lock(); + if let Some(ref upper_inode) = *upper_inode { + upper_inode.unlink(name)?; + } else if let Some(lower_inode) = &self.lower_inode { + if lower_inode.find(name).is_ok() { + self.create_whiteout(name)?; + } else { + return Err(SystemError::ENOENT); + } + } else { + return Err(SystemError::ENOENT); + } + + Ok(()) + } + + fn link( + &self, + name: &str, + other: &Arc, + ) -> Result<(), system_error::SystemError> { + if let Some(ref upper_inode) = *self.upper_inode.lock() { + upper_inode.link(name, other) + } else { + Err(SystemError::EROFS) + } + } + + fn create( + &self, + name: &str, + file_type: vfs::FileType, + mode: vfs::syscall::ModeType, + ) -> Result, system_error::SystemError> { + if let Some(ref upper_inode) = *self.upper_inode.lock() { + upper_inode.create(name, file_type, mode) + } else { + Err(SystemError::EROFS) + } + } + + fn find(&self, name: &str) -> Result, system_error::SystemError> { + let upper_inode = self.upper_inode.lock(); + if let Some(ref upper) = *upper_inode { + if let Ok(inode) = upper.find(name) { + return Ok(inode); + } + } + if self.has_whiteout(name) { + return Err(SystemError::ENOENT); + } + + if let Some(lower) = &self.lower_inode { + if let Ok(inode) = lower.find(name) { + return Ok(inode); + } + } + + Err(SystemError::ENOENT) + } + + fn mknod( + &self, + filename: &str, + mode: vfs::syscall::ModeType, + dev_t: crate::driver::base::device::device_number::DeviceNumber, + ) -> Result, system_error::SystemError> { + let upper_inode = self.upper_inode.lock(); + if let Some(ref inode) = *upper_inode { + inode.mknod(filename, mode, dev_t) + } else { + Err(SystemError::EROFS) + } + } +} diff --git a/kernel/src/filesystem/ramfs/mod.rs b/kernel/src/filesystem/ramfs/mod.rs index 5f1ec4864..61dddf34f 100644 --- a/kernel/src/filesystem/ramfs/mod.rs +++ b/kernel/src/filesystem/ramfs/mod.rs @@ -1,7 +1,7 @@ use core::any::Any; use core::intrinsics::unlikely; -use crate::filesystem::vfs::FSMAKER; +use crate::filesystem::vfs::{FileSystemMakerData, FSMAKER}; use crate::libs::rwlock::RwLock; use crate::{ driver::base::device::device_number::DeviceNumber, @@ -35,7 +35,7 @@ const RAMFS_MAX_NAMELEN: usize = 64; const RAMFS_BLOCK_SIZE: u64 = 512; /// @brief 内存文件系统的Inode结构体 #[derive(Debug)] -struct LockedRamFSInode(SpinLock); +pub struct LockedRamFSInode(pub SpinLock); /// @brief 内存文件系统结构体 #[derive(Debug)] @@ -70,6 +70,35 @@ pub struct RamFSInode { name: DName, } +impl RamFSInode { + pub fn new() -> Self { + Self { + parent: Weak::default(), + self_ref: Weak::default(), + children: BTreeMap::new(), + data: Vec::new(), + metadata: Metadata { + dev_id: 0, + inode_id: generate_inode_id(), + size: 0, + blk_size: 0, + blocks: 0, + atime: PosixTimeSpec::default(), + mtime: PosixTimeSpec::default(), + ctime: PosixTimeSpec::default(), + file_type: FileType::Dir, + mode: ModeType::from_bits_truncate(0o777), + nlinks: 1, + uid: 0, + gid: 0, + raw_dev: DeviceNumber::default(), + }, + fs: Weak::default(), + special_node: None, + name: Default::default(), + } + } +} impl FileSystem for RamFS { fn root_inode(&self) -> Arc { return self.root_inode.clone(); @@ -105,31 +134,8 @@ impl RamFS { RAMFS_MAX_NAMELEN as u64, ); // 初始化root inode - let root: Arc = Arc::new(LockedRamFSInode(SpinLock::new(RamFSInode { - parent: Weak::default(), - self_ref: Weak::default(), - children: BTreeMap::new(), - data: Vec::new(), - metadata: Metadata { - dev_id: 0, - inode_id: generate_inode_id(), - size: 0, - blk_size: 0, - blocks: 0, - atime: PosixTimeSpec::default(), - mtime: PosixTimeSpec::default(), - ctime: PosixTimeSpec::default(), - file_type: FileType::Dir, - mode: ModeType::from_bits_truncate(0o777), - nlinks: 1, - uid: 0, - gid: 0, - raw_dev: DeviceNumber::default(), - }, - fs: Weak::default(), - special_node: None, - name: Default::default(), - }))); + let root: Arc = + Arc::new(LockedRamFSInode(SpinLock::new(RamFSInode::new()))); let result: Arc = Arc::new(RamFS { root_inode: root, @@ -147,7 +153,9 @@ impl RamFS { return result; } - pub fn make_ramfs() -> Result, SystemError> { + pub fn make_ramfs( + _data: Option<&dyn FileSystemMakerData>, + ) -> Result, SystemError> { let fs = RamFS::new(); return Ok(fs); } @@ -155,7 +163,10 @@ impl RamFS { #[distributed_slice(FSMAKER)] static RAMFSMAKER: FileSystemMaker = FileSystemMaker::new( "ramfs", - &(RamFS::make_ramfs as fn() -> Result, SystemError>), + &(RamFS::make_ramfs + as fn( + Option<&dyn FileSystemMakerData>, + ) -> Result, SystemError>), ); impl IndexNode for LockedRamFSInode { diff --git a/kernel/src/filesystem/vfs/mod.rs b/kernel/src/filesystem/vfs/mod.rs index 7801258e2..67b7643a3 100644 --- a/kernel/src/filesystem/vfs/mod.rs +++ b/kernel/src/filesystem/vfs/mod.rs @@ -935,12 +935,20 @@ impl FileSystemMaker { FileSystemMaker { function, name } } - pub fn call(&self) -> Result, SystemError> { - (self.function)() + pub fn call( + &self, + data: Option<&dyn FileSystemMakerData>, + ) -> Result, SystemError> { + (self.function)(data) } } -pub type FileSystemNewFunction = fn() -> Result, SystemError>; +pub trait FileSystemMakerData: Send + Sync { + fn as_any(&self) -> &dyn Any; +} + +pub type FileSystemNewFunction = + fn(data: Option<&dyn FileSystemMakerData>) -> Result, SystemError>; #[macro_export] macro_rules! define_filesystem_maker_slice { @@ -956,9 +964,18 @@ macro_rules! define_filesystem_maker_slice { /// 调用指定数组中的所有初始化器 #[macro_export] macro_rules! producefs { - ($initializer_slice:ident,$filesystem:ident) => { + ($initializer_slice:ident,$filesystem:ident,$raw_data : ident) => { match $initializer_slice.iter().find(|&m| m.name == $filesystem) { - Some(maker) => maker.call(), + Some(maker) => { + let mount_data = match $filesystem { + "overlay" => OverlayMountData::from_row($raw_data).ok(), + _ => None, + }; + let data: Option<&dyn FileSystemMakerData> = + mount_data.as_ref().map(|d| d as &dyn FileSystemMakerData); + + maker.call(data) + } None => { log::error!("mismatch filesystem type : {}", $filesystem); Err(SystemError::EINVAL) diff --git a/kernel/src/filesystem/vfs/syscall.rs b/kernel/src/filesystem/vfs/syscall.rs index ef8220ef7..795469bd4 100644 --- a/kernel/src/filesystem/vfs/syscall.rs +++ b/kernel/src/filesystem/vfs/syscall.rs @@ -1,4 +1,5 @@ -use core::ffi::c_void; +use crate::filesystem::overlayfs::OverlayMountData; +use crate::filesystem::vfs::FileSystemMakerData; use core::mem::size_of; use alloc::{string::String, sync::Arc, vec::Vec}; @@ -1706,7 +1707,7 @@ impl Syscall { target: *const u8, filesystemtype: *const u8, _mountflags: usize, - _data: *const c_void, + data: *const u8, ) -> Result { let target = user_access::check_and_clone_cstr(target, Some(MAX_PATHLEN))? .into_string() @@ -1715,7 +1716,7 @@ impl Syscall { let fstype_str = user_access::check_and_clone_cstr(filesystemtype, Some(MAX_PATHLEN))?; let fstype_str = fstype_str.to_str().map_err(|_| SystemError::EINVAL)?; - let fstype = producefs!(FSMAKER, fstype_str)?; + let fstype = producefs!(FSMAKER, fstype_str, data)?; Vcore::do_mount(fstype, &target)?; diff --git a/kernel/src/init/initial_kthread.rs b/kernel/src/init/initial_kthread.rs index 0d71c8d43..25fb21911 100644 --- a/kernel/src/init/initial_kthread.rs +++ b/kernel/src/init/initial_kthread.rs @@ -10,6 +10,7 @@ use crate::{ arch::{interrupt::TrapFrame, process::arch_switch_to_user}, driver::{net::e1000e::e1000e::e1000e_init, virtio::virtio::virtio_probe}, filesystem::vfs::core::mount_root_fs, + namespaces::NsProxy, net::net_core::net_init, process::{ exec::ProcInitInfo, kthread::KernelThreadMechanism, stdio::stdio_init, ProcessFlags, @@ -140,6 +141,7 @@ fn run_init_process( trap_frame: &mut TrapFrame, ) -> Result<(), SystemError> { compiler_fence(Ordering::SeqCst); + ProcessManager::current_pcb().set_nsproxy(NsProxy::new()); // 初始化init进程的namespace let path = proc_init_info.proc_name.to_str().unwrap(); Syscall::do_execve( @@ -148,5 +150,6 @@ fn run_init_process( proc_init_info.envs.clone(), trap_frame, )?; + Ok(()) } diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 5a7e09535..881f6d319 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -48,6 +48,7 @@ mod libs; #[macro_use] mod include; mod bpf; +mod cgroup; mod debug; mod driver; // 如果driver依赖了libs,应该在libs后面导出 mod exception; @@ -56,6 +57,7 @@ mod init; mod ipc; mod misc; mod mm; +mod namespaces; mod net; mod perf; mod process; @@ -93,8 +95,6 @@ extern crate wait_queue_macros; use crate::mm::allocator::kernel_allocator::KernelAllocator; -use crate::process::ProcessManager; - #[cfg(all(feature = "backtrace", target_arch = "x86_64"))] extern crate mini_backtrace; @@ -112,6 +112,7 @@ pub static KERNEL_ALLOCATOR: KernelAllocator = KernelAllocator; #[no_mangle] pub fn panic(info: &PanicInfo) -> ! { use log::error; + use process::ProcessManager; error!("Kernel Panic Occurred."); diff --git a/kernel/src/libs/ida/src/lib.rs b/kernel/src/libs/ida/src/lib.rs new file mode 100644 index 000000000..ac25d557c --- /dev/null +++ b/kernel/src/libs/ida/src/lib.rs @@ -0,0 +1,55 @@ +#![no_std] +#![feature(core_intrinsics)] +#![allow(clippy::needless_return)] + +use core::intrinsics::unlikely; +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + +/// id分配器 +/// +/// TODO: 当前只是为了简单实现功能,将来这里应使用类似linux的ida的方式去实现 +#[derive(Debug)] +pub struct IdAllocator { + current_id: AtomicUsize, + max_id: usize, + dead: AtomicBool, +} + +impl IdAllocator { + /// 创建一个新的id分配器 + pub const fn new(initial_id: usize, max_id: usize) -> Self { + Self { + current_id: AtomicUsize::new(initial_id), + max_id, + dead: AtomicBool::new(false), + } + } + + /// 分配一个新的id + /// + /// ## 返回 + /// + /// 如果分配成功,返回Some(id),否则返回None + pub fn alloc(&self) -> Option { + if unlikely(self.dead.load(Ordering::SeqCst)) { + return None; + } + + let ret = self.current_id.fetch_add(1, Ordering::SeqCst); + // 如果id溢出,panic + if ret == self.max_id { + self.dead.store(true, Ordering::SeqCst); + return None; + } + + return Some(ret); + } + + pub fn free(&self, _id: usize) { + // todo: free + } + + pub fn get_max_id(&self) -> usize { + self.max_id + } +} diff --git a/kernel/src/namespaces/mnt_namespace.rs b/kernel/src/namespaces/mnt_namespace.rs new file mode 100644 index 000000000..e5312d82c --- /dev/null +++ b/kernel/src/namespaces/mnt_namespace.rs @@ -0,0 +1,206 @@ +#![allow(dead_code, unused_variables, unused_imports)] +use core::sync::atomic::AtomicU64; +use core::sync::atomic::Ordering; + +use alloc::boxed::Box; +use alloc::string::ToString; + +use alloc::string::String; + +use alloc::sync::Arc; +use system_error::SystemError; + +use super::namespace::Namespace; +use super::namespace::NsOperations; +use super::ucount::Ucount::MntNamespaces; +use super::{namespace::NsCommon, ucount::UCounts, user_namespace::UserNamespace}; +use crate::container_of; +use crate::filesystem::vfs::mount::MountFSInode; +use crate::filesystem::vfs::IndexNode; +use crate::filesystem::vfs::InodeId; +use crate::filesystem::vfs::MountFS; +use crate::filesystem::vfs::ROOT_INODE; +use crate::libs::rbtree::RBTree; +use crate::libs::wait_queue::WaitQueue; +use crate::process::fork::CloneFlags; +use crate::process::ProcessManager; +use crate::syscall::Syscall; +#[allow(dead_code)] +#[derive(Debug)] +pub struct MntNamespace { + /// namespace 共有的部分 + ns_common: Arc, + /// 关联的用户名字空间 + user_ns: Arc, + /// 资源计数器 + ucounts: Arc, + /// 根文件系统 + root: Option>, + /// 红黑树用于挂载所有挂载点 + mounts: RBTree, + /// 等待队列 + poll: WaitQueue, + /// 挂载序列号 + seq: AtomicU64, + /// 挂载点的数量 + nr_mounts: u32, + /// 待处理的挂载点 + pending_mounts: u32, +} + +impl Default for MntNamespace { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug)] +struct MntNsOperations { + name: String, + clone_flags: CloneFlags, +} + +/// 使用该结构体的时候加spinlock +#[derive(Clone, Debug)] +pub struct FsStruct { + umask: u32, //文件权限掩码 + pub root: Arc, + pub pwd: Arc, +} +impl Default for FsStruct { + fn default() -> Self { + Self::new() + } +} + +impl FsStruct { + pub fn new() -> Self { + Self { + umask: 0o22, + root: ROOT_INODE(), + pwd: ROOT_INODE(), + } + } + pub fn set_root(&mut self, inode: Arc) { + self.root = inode; + } + pub fn set_pwd(&mut self, inode: Arc) { + self.pwd = inode; + } +} + +impl Namespace for MntNamespace { + fn ns_common_to_ns(ns_common: Arc) -> Arc { + let ns_common_ptr = Arc::as_ptr(&ns_common); + container_of!(ns_common_ptr, MntNamespace, ns_common) + } +} + +impl MntNsOperations { + pub fn new(name: String) -> Self { + Self { + name, + clone_flags: CloneFlags::CLONE_NEWNS, + } + } +} + +impl NsOperations for MntNsOperations { + fn get(&self, pid: crate::process::Pid) -> Option> { + let pcb = ProcessManager::find(pid); + pcb.map(|pcb| pcb.get_nsproxy().read().mnt_namespace.ns_common.clone()) + } + // 不存在这个方法 + fn get_parent(&self, _ns_common: Arc) -> Result, SystemError> { + unreachable!() + } + fn install( + &self, + nsset: &mut super::NsSet, + ns_common: Arc, + ) -> Result<(), SystemError> { + let nsproxy = &mut nsset.nsproxy; + let mnt_ns = MntNamespace::ns_common_to_ns(ns_common); + if mnt_ns.is_anon_ns() { + return Err(SystemError::EINVAL); + } + nsproxy.mnt_namespace = mnt_ns; + + nsset.fs.lock().set_pwd(ROOT_INODE()); + nsset.fs.lock().set_root(ROOT_INODE()); + Ok(()) + } + fn owner(&self, ns_common: Arc) -> Arc { + let mnt_ns = MntNamespace::ns_common_to_ns(ns_common); + mnt_ns.user_ns.clone() + } + fn put(&self, ns_common: Arc) { + let pid_ns = MntNamespace::ns_common_to_ns(ns_common); + } +} +impl MntNamespace { + pub fn new() -> Self { + let ns_common = Arc::new(NsCommon::new(Box::new(MntNsOperations::new( + "mnt".to_string(), + )))); + + Self { + ns_common, + user_ns: Arc::new(UserNamespace::new()), + ucounts: Arc::new(UCounts::new()), + root: None, + mounts: RBTree::new(), + poll: WaitQueue::default(), + seq: AtomicU64::new(0), + nr_mounts: 0, + pending_mounts: 0, + } + } + /// anon 用来判断是否是匿名的.匿名函数的问题还需要考虑 + pub fn create_mnt_namespace( + &self, + user_ns: Arc, + anon: bool, + ) -> Result { + let ucounts = self.inc_mnt_namespace(user_ns.clone())?; + if ucounts.is_none() { + return Err(SystemError::ENOSPC); + } + let ucounts = ucounts.unwrap(); + let ns_common = Arc::new(NsCommon::new(Box::new(MntNsOperations::new( + "mnt".to_string(), + )))); + let seq = AtomicU64::new(0); + if !anon { + seq.fetch_add(1, core::sync::atomic::Ordering::SeqCst); + } + Ok(Self { + ns_common, + user_ns, + ucounts, + root: None, + mounts: RBTree::new(), + poll: WaitQueue::default(), + seq, + nr_mounts: 0, + pending_mounts: 0, + }) + } + + pub fn inc_mnt_namespace( + &self, + user_ns: Arc, + ) -> Result>, SystemError> { + Ok(self + .ucounts + .inc_ucounts(user_ns, Syscall::geteuid()?, MntNamespaces)) + } + + pub fn dec_mnt_namespace(&self, uc: Arc) { + UCounts::dec_ucount(uc, super::ucount::Ucount::MntNamespaces) + } + //判断是不是匿名空间 + pub fn is_anon_ns(&self) -> bool { + self.seq.load(Ordering::SeqCst) == 0 + } +} diff --git a/kernel/src/namespaces/mod.rs b/kernel/src/namespaces/mod.rs new file mode 100644 index 000000000..a9043a125 --- /dev/null +++ b/kernel/src/namespaces/mod.rs @@ -0,0 +1,92 @@ +use alloc::sync::Arc; +use mnt_namespace::{FsStruct, MntNamespace}; +use pid_namespace::PidNamespace; +use system_error::SystemError; +use user_namespace::UserNamespace; + +use crate::{ + libs::spinlock::SpinLock, + process::{fork::CloneFlags, ProcessControlBlock}, +}; + +pub mod mnt_namespace; +pub mod namespace; +pub mod pid_namespace; +pub mod syscall; +pub mod ucount; +pub mod user_namespace; + +/// 管理 namespace,包含了所有namespace的信息 +#[derive(Clone)] +pub struct NsSet { + flags: u64, + nsproxy: NsProxy, + pub fs: Arc>, +} +#[derive(Debug, Clone)] +pub struct NsProxy { + pub pid_namespace: Arc, + pub mnt_namespace: Arc, +} +impl Default for NsProxy { + fn default() -> Self { + Self::new() + } +} + +impl NsProxy { + pub fn new() -> Self { + Self { + pid_namespace: Arc::new(PidNamespace::new()), + mnt_namespace: Arc::new(MntNamespace::new()), + } + } + pub fn set_pid_namespace(&mut self, new_pid_ns: Arc) { + self.pid_namespace = new_pid_ns; + } + + pub fn set_mnt_namespace(&mut self, new_mnt_ns: Arc) { + self.mnt_namespace = new_mnt_ns; + } +} + +pub fn create_new_namespaces( + clone_flags: u64, + pcb: &Arc, + user_ns: Arc, +) -> Result { + let mut nsproxy = NsProxy::new(); + // pid_namespace + let new_pid_ns = if (clone_flags & CloneFlags::CLONE_NEWPID.bits()) != 0 { + Arc::new(PidNamespace::new().create_pid_namespace( + pcb.get_nsproxy().read().pid_namespace.clone(), + user_ns.clone(), + )?) + } else { + pcb.get_nsproxy().read().pid_namespace.clone() + }; + nsproxy.set_pid_namespace(new_pid_ns); + + // mnt_namespace + let new_mnt_ns = if clone_flags & CloneFlags::CLONE_NEWNS.bits() != 0 { + Arc::new(MntNamespace::new().create_mnt_namespace(user_ns.clone(), false)?) + } else { + pcb.get_nsproxy().read().mnt_namespace.clone() + }; + nsproxy.set_mnt_namespace(new_mnt_ns); + + Ok(nsproxy) +} + +#[macro_export] +macro_rules! container_of { + ($ptr:expr, $struct:path, $field:ident) => { + unsafe { + let dummy = core::mem::MaybeUninit::<$struct>::uninit(); + let dummy_ptr = dummy.as_ptr(); + let field_ptr = &(*dummy_ptr).$field as *const _ as usize; + let offset = field_ptr - dummy_ptr as usize; + Arc::from_raw(($ptr as *const u8).wrapping_sub(offset) as *mut $struct) + } + }; +} diff --git a/kernel/src/namespaces/namespace.rs b/kernel/src/namespaces/namespace.rs new file mode 100644 index 000000000..06b0e6aca --- /dev/null +++ b/kernel/src/namespaces/namespace.rs @@ -0,0 +1,119 @@ +#![allow(dead_code, unused_variables, unused_imports)] +use core::fmt::Debug; + +use crate::filesystem::procfs::ProcFSInode; +use crate::filesystem::vfs::{IndexNode, ROOT_INODE}; +use crate::namespaces::user_namespace::UserNamespace; +use crate::process::fork::CloneFlags; +use crate::process::{Pid, ProcessControlBlock, ProcessManager}; +use alloc::boxed::Box; +use alloc::sync::Arc; +use system_error::SystemError; + +// 目前无credit功能,采用全局静态的user_namespace +lazy_static! { + pub static ref USER_NS: Arc = Arc::new(UserNamespace::new()); +} +use super::{create_new_namespaces, NsProxy, NsSet}; +pub trait NsOperations: Send + Sync + Debug { + fn get(&self, pid: Pid) -> Option>; + fn put(&self, ns_common: Arc); + fn install(&self, nsset: &mut NsSet, ns_common: Arc) -> Result<(), SystemError>; + fn owner(&self, ns_common: Arc) -> Arc; + fn get_parent(&self, ns_common: Arc) -> Result, SystemError>; +} +#[derive(Debug)] +pub struct NsCommon { + ops: Box, + stashed: Arc, +} + +impl NsCommon { + pub fn new(ops: Box) -> Self { + let inode = ROOT_INODE().find("proc").unwrap_or_else(|_| ROOT_INODE()); + Self { + ops, + stashed: inode, + } + } +} + +pub enum NsType { + Pid, + User, + Uts, + Ipc, + Net, + Mnt, + Cgroup, + Time, +} + +pub trait Namespace { + fn ns_common_to_ns(ns_common: Arc) -> Arc; +} + +pub fn check_unshare_flags(unshare_flags: u64) -> Result { + let valid_flags = CloneFlags::CLONE_THREAD + | CloneFlags::CLONE_FS + | CloneFlags::CLONE_NEWNS + | CloneFlags::CLONE_SIGHAND + | CloneFlags::CLONE_VM + | CloneFlags::CLONE_FILES + | CloneFlags::CLONE_SYSVSEM + | CloneFlags::CLONE_NEWUTS + | CloneFlags::CLONE_NEWIPC + | CloneFlags::CLONE_NEWNET + | CloneFlags::CLONE_NEWUSER + | CloneFlags::CLONE_NEWPID + | CloneFlags::CLONE_NEWCGROUP; + + if unshare_flags & !valid_flags.bits() != 0 { + return Err(SystemError::EINVAL); + } + Ok(0) +} + +pub fn unshare_nsproxy_namespaces(unshare_flags: u64) -> Result, SystemError> { + if (unshare_flags + & (CloneFlags::CLONE_NEWNS.bits() + | CloneFlags::CLONE_NEWUTS.bits() + | CloneFlags::CLONE_NEWIPC.bits() + | CloneFlags::CLONE_NEWNET.bits() + | CloneFlags::CLONE_NEWPID.bits() + | CloneFlags::CLONE_NEWCGROUP.bits())) + == 0 + { + return Ok(None); + } + let current = ProcessManager::current_pid(); + let pcb = ProcessManager::find(current).unwrap(); + let new_nsproxy = create_new_namespaces(unshare_flags, &pcb, USER_NS.clone())?; + Ok(Some(new_nsproxy)) +} + +pub fn switch_task_namespace(pcb: Arc, new_nsproxy: NsProxy) { + let ns = pcb.get_nsproxy(); + pcb.set_nsproxy(new_nsproxy); +} + +pub fn prepare_nsset(flags: u64) -> Result { + let current = ProcessManager::current_pcb(); + Ok(NsSet { + flags, + fs: current.fs_struct(), + nsproxy: create_new_namespaces(flags, ¤t, USER_NS.clone())?, + }) +} + +pub fn commit_nsset(nsset: NsSet) { + let flags = CloneFlags::from_bits_truncate(nsset.flags); + let current = ProcessManager::current_pcb(); + if flags.contains(CloneFlags::CLONE_NEWNS) { + let fs = current.fs_struct(); + let nsset_fs = nsset.fs.lock(); + fs.lock().set_pwd(nsset_fs.pwd.clone()); + fs.lock().set_root(nsset_fs.root.clone()); + } + switch_task_namespace(current, nsset.nsproxy); // 转移所有权 +} diff --git a/kernel/src/namespaces/pid_namespace.rs b/kernel/src/namespaces/pid_namespace.rs new file mode 100644 index 000000000..7f1eb05b2 --- /dev/null +++ b/kernel/src/namespaces/pid_namespace.rs @@ -0,0 +1,273 @@ +#![allow(dead_code, unused_variables, unused_imports)] +use alloc::vec::Vec; + +use super::namespace::Namespace; +use super::ucount::Ucount::PidNamespaces; +use super::NsSet; +use super::{namespace::NsCommon, ucount::UCounts, user_namespace::UserNamespace}; +use crate::container_of; +use crate::filesystem::vfs::{IndexNode, ROOT_INODE}; +use crate::namespaces::namespace::NsOperations; +use crate::process::fork::CloneFlags; +use crate::process::ProcessManager; +use crate::syscall::Syscall; +use crate::{libs::rwlock::RwLock, process::Pid}; +use alloc::boxed::Box; +use alloc::string::String; +use alloc::string::ToString; +use alloc::sync::Arc; +use ida::IdAllocator; +use system_error::SystemError; +use system_error::SystemError::ENOSPC; + +const INT16_MAX: u32 = 32767; +const MAX_PID_NS_LEVEL: usize = 32; +const PIDNS_ADDING: u32 = 1 << 31; +const PID_MAX: usize = 4096; +static PID_IDA: ida::IdAllocator = ida::IdAllocator::new(1, usize::MAX).unwrap(); +#[derive(Debug)] +#[repr(C)] +pub struct PidNamespace { + id_alloctor: RwLock, + /// 已经分配的进程数 + pid_allocated: u32, + /// 当前的pid_namespace所在的层数 + pub level: usize, + /// 父命名空间 + parent: Option>, + /// 资源计数器 + ucounts: Arc, + /// 关联的用户namespace + user_ns: Arc, + /// 回收孤儿进程的init进程 + child_reaper: Arc>, + /// namespace共有部分 + pub ns_common: Arc, +} + +impl Default for PidNamespace { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Clone)] +pub struct PidStrcut { + pub level: usize, + pub numbers: Vec, + pub stashed: Arc, +} + +impl Default for PidStrcut { + fn default() -> Self { + Self::new() + } +} +#[derive(Debug, Clone)] +pub struct UPid { + pub nr: Pid, // 在该pid_namespace 中的pid + pub ns: Arc, +} + +impl PidStrcut { + pub fn new() -> Self { + Self { + level: 0, + numbers: vec![UPid { + nr: Pid::new(0), + ns: Arc::new(PidNamespace::new()), + }], + stashed: ROOT_INODE(), + } + } + + pub fn put_pid(pid: PidStrcut) { + let ns = pid.numbers[pid.level].ns.clone(); + let id = pid.numbers[pid.level].nr.data(); + ns.id_alloctor.write().free(id); + } + pub fn alloc_pid(ns: Arc, set_tid: Vec) -> Result { + let mut set_tid_size = set_tid.len(); + if set_tid_size > ns.level + 1 { + return Err(SystemError::EINVAL); + } + + let mut numbers = Vec::::with_capacity(ns.level + 1); + let mut tid_iter = set_tid.into_iter().rev(); + let mut pid_ns = ns.clone(); // 当前正在处理的命名空间 + for i in (0..=ns.level).rev() { + let tid = tid_iter.next().unwrap_or(0); + if set_tid_size > 0 { + if tid < 1 || tid > INT16_MAX as usize { + return Err(SystemError::EINVAL); + } + set_tid_size -= 1; + } + let mut nr = tid; + + if tid == 0 { + nr = pid_ns + .id_alloctor + .write() + .alloc() + .expect("PID allocation failed."); + } + + numbers.insert( + i, + UPid { + nr: Pid::from(nr), + ns: pid_ns.clone(), + }, + ); + + if let Some(parent_ns) = &pid_ns.parent { + pid_ns = parent_ns.clone(); + } else { + break; // 根命名空间,无需继续向上。 + } + } + Ok(PidStrcut { + level: ns.level, + numbers, + stashed: ROOT_INODE(), + }) + } + + pub fn ns_of_pid(&self) -> Arc { + self.numbers[self.level].ns.clone() + } +} +#[derive(Debug)] +struct PidNsOperations { + name: String, + clone_flags: CloneFlags, +} +impl PidNsOperations { + pub fn new(name: String) -> Self { + Self { + name, + clone_flags: CloneFlags::CLONE_NEWPID, + } + } +} +impl Namespace for PidNamespace { + fn ns_common_to_ns(ns_common: Arc) -> Arc { + container_of!(Arc::as_ptr(&ns_common), PidNamespace, ns_common) + } +} + +impl NsOperations for PidNsOperations { + fn put(&self, ns_common: Arc) { + let _pid_ns = PidNamespace::ns_common_to_ns(ns_common); + // pid_ns 超出作用域自动drop 同时递归drop + } + + fn owner(&self, ns_common: Arc) -> Arc { + let pid_ns = PidNamespace::ns_common_to_ns(ns_common); + pid_ns.user_ns.clone() + } + + fn get_parent(&self, ns_common: Arc) -> Result, SystemError> { + let current = ProcessManager::current_pid(); + let pcb = ProcessManager::find(current).unwrap(); + let active = pcb.pid_strcut().read().ns_of_pid(); + let mut pid_ns = &PidNamespace::ns_common_to_ns(ns_common).parent; + + while let Some(ns) = pid_ns { + if Arc::ptr_eq(&active, ns) { + return Ok(ns.ns_common.clone()); + } + pid_ns = &ns.parent; + } + Err(SystemError::EPERM) + } + + fn get(&self, pid: Pid) -> Option> { + let pcb = ProcessManager::find(pid); + pcb.map(|pcb| pcb.get_nsproxy().read().pid_namespace.ns_common.clone()) + } + fn install(&self, nsset: &mut NsSet, ns_common: Arc) -> Result<(), SystemError> { + let nsproxy = &mut nsset.nsproxy; + let current = ProcessManager::current_pid(); + let pcb = ProcessManager::find(current).unwrap(); + let active = pcb.pid_strcut().read().ns_of_pid(); + let mut pid_ns = PidNamespace::ns_common_to_ns(ns_common); + if pid_ns.level < active.level { + return Err(SystemError::EINVAL); + } + while pid_ns.level > active.level { + if let Some(ns) = &pid_ns.parent { + pid_ns = ns.clone(); + } else { + break; + } + } + if Arc::ptr_eq(&pid_ns, &active) { + return Err(SystemError::EINVAL); + } + nsproxy.pid_namespace = pid_ns.clone(); + Ok(()) + } +} +impl PidNamespace { + pub fn new() -> Self { + Self { + id_alloctor: RwLock::new(IdAllocator::new(1, PID_MAX).unwrap()), + pid_allocated: 1, + level: 0, + child_reaper: Arc::new(RwLock::new(Pid::from(1))), + parent: None, + ucounts: Arc::new(UCounts::new()), + user_ns: Arc::new(UserNamespace::new()), + ns_common: Arc::new(NsCommon::new(Box::new(PidNsOperations::new( + "pid".to_string(), + )))), + } + } + + pub fn create_pid_namespace( + &self, + parent: Arc, + user_ns: Arc, + ) -> Result { + let level = parent.level + 1; + if level > MAX_PID_NS_LEVEL { + return Err(ENOSPC); + } + let ucounts = self.inc_pid_namespaces(user_ns.clone())?; + + if ucounts.is_none() { + return Err(SystemError::ENOSPC); + } + let ucounts = ucounts.unwrap(); + + let ns_common = Arc::new(NsCommon::new(Box::new(PidNsOperations::new( + "pid".to_string(), + )))); + let child_reaper = parent.child_reaper.clone(); + Ok(Self { + id_alloctor: RwLock::new(IdAllocator::new(1, PID_MAX).unwrap()), + pid_allocated: PIDNS_ADDING, + level, + ucounts, + parent: Some(parent), + user_ns, + ns_common, + child_reaper, + }) + } + + pub fn inc_pid_namespaces( + &self, + user_ns: Arc, + ) -> Result>, SystemError> { + Ok(self + .ucounts + .inc_ucounts(user_ns, Syscall::geteuid()?, PidNamespaces)) + } + + pub fn dec_pid_namespaces(&mut self, uc: Arc) { + UCounts::dec_ucount(uc, PidNamespaces) + } +} diff --git a/kernel/src/namespaces/syscall.rs b/kernel/src/namespaces/syscall.rs new file mode 100644 index 000000000..fe6143dc7 --- /dev/null +++ b/kernel/src/namespaces/syscall.rs @@ -0,0 +1,50 @@ +use system_error::SystemError; + +use crate::{ + process::{fork::CloneFlags, ProcessManager}, + syscall::Syscall, +}; + +use super::namespace::{ + check_unshare_flags, commit_nsset, prepare_nsset, unshare_nsproxy_namespaces, +}; + +impl Syscall { + pub fn sys_unshare(mut unshare_flags: u64) -> Result { + if unshare_flags & CloneFlags::CLONE_NEWUSER.bits() != 0 { + unshare_flags |= CloneFlags::CLONE_THREAD.bits() | CloneFlags::CLONE_FS.bits(); + } + + if unshare_flags & CloneFlags::CLONE_VM.bits() != 0 { + unshare_flags |= CloneFlags::CLONE_SIGHAND.bits(); + } + + if unshare_flags & CloneFlags::CLONE_SIGHAND.bits() != 0 { + unshare_flags |= CloneFlags::CLONE_THREAD.bits(); + } + + if unshare_flags & CloneFlags::CLONE_NEWNS.bits() != 0 { + unshare_flags |= CloneFlags::CLONE_FS.bits(); + } + + let check = check_unshare_flags(unshare_flags)?; + + let current = ProcessManager::current_pcb(); + if let Some(nsproxy) = unshare_nsproxy_namespaces(unshare_flags)? { + *current.get_nsproxy().write() = nsproxy; + } + + Ok(check) + } + + pub fn sys_setns(_fd: i32, flags: u64) -> Result { + let check = check_unshare_flags(flags)?; + + let nsset = prepare_nsset(flags)?; + + if check == 0 { + commit_nsset(nsset) + }; + Ok(0) + } +} diff --git a/kernel/src/namespaces/ucount.rs b/kernel/src/namespaces/ucount.rs new file mode 100644 index 000000000..358c8cef0 --- /dev/null +++ b/kernel/src/namespaces/ucount.rs @@ -0,0 +1,187 @@ +#![allow(dead_code, unused_variables, unused_imports)] +use alloc::vec::Vec; +use core::{hash::Hash, sync::atomic::AtomicU32}; +use system_error::SystemError; + +use alloc::sync::Arc; +use hashbrown::HashMap; +use log::warn; + +use super::user_namespace::UserNamespace; +use crate::libs::mutex::Mutex; + +#[derive(Clone, Copy)] +pub enum Ucount { + UserNamespaces = 1, + PidNamespaces = 2, + UtsNamespaces = 3, + IpcNamespaces = 4, + NetNamespaces = 5, + MntNamespaces = 6, + CgroupNamespaces = 7, + TimeNamespaces = 8, + Counts = 9, +} + +pub enum UcountRlimit { + Nproc = 1, + Msgqueue = 2, + Sigpending = 3, + Memlock = 4, + Counts = 5, +} + +lazy_static! { + static ref COUNT_MANAGER: Arc = Arc::new(CountManager::new()); +} + +#[derive(Debug)] +pub struct UCounts { + /// 对应的user_namespace + ns: Arc, + /// 用户标识符 + uid: usize, + count: AtomicU32, + ucount: Vec, //[AtomicU32; UCOUNT_COUNTS as usize], + rlimit: Vec, //[AtomicU32; UCOUNT_RLIMIT_COUNTS as usize], +} + +impl Default for UCounts { + fn default() -> Self { + Self::new() + } +} +impl UCounts { + pub fn new() -> Self { + Self { + ns: Arc::new(UserNamespace::new()), + uid: 0, + count: AtomicU32::new(1), + ucount: (0..Ucount::Counts as usize) + .map(|_| AtomicU32::new(0)) + .collect(), + rlimit: (0..UcountRlimit::Counts as usize) + .map(|_| AtomicU32::new(0)) + .collect(), + } + } + + fn alloc_ucounts(&self, ns: Arc, uid: usize) -> Arc { + let mut counts = COUNT_MANAGER.counts.lock(); + let key = UKey { + user_ns: ns.clone(), + uid, + }; + let uc = if let Some(uc) = counts.get(&key) { + self.count + .fetch_add(1, core::sync::atomic::Ordering::SeqCst); + uc.clone() + } else { + Arc::new(Self { + ns, + uid, + count: AtomicU32::new(1), + ucount: (0..Ucount::Counts as usize) + .map(|_| AtomicU32::new(0)) + .collect(), + rlimit: (0..UcountRlimit::Counts as usize) + .map(|_| AtomicU32::new(0)) + .collect(), + }) + }; + counts.insert(key, uc.clone()); + uc + } + + pub fn inc_ucounts( + &self, + user_ns: Arc, + uid: usize, + ucount_type: Ucount, + ) -> Option> { + let uc_type = ucount_type as usize; + let uc = self.alloc_ucounts(user_ns, uid); + let mut uc_iter = Some(uc.clone()); + let mut ucounts_add = vec![]; + while let Some(iter) = uc_iter { + let num = iter.ucount[uc_type].fetch_add(1, core::sync::atomic::Ordering::SeqCst); + ucounts_add.push(iter.clone()); + // 分配失败回滚 + if num > iter.ns.ucount_max[uc_type] { + for add_iter in &ucounts_add { + add_iter.ucount[uc_type].fetch_sub(1, core::sync::atomic::Ordering::SeqCst); + } + return None; + } + uc_iter = iter.ns.ucounts.clone(); + } + return Some(uc); + } + + fn find_ucounts(user_ns: Arc, uid: usize) -> Option> { + let counts = COUNT_MANAGER.counts.lock(); + let key = UKey { user_ns, uid }; + counts.get(&key).cloned() + } + + fn get_ucounts(uc: Arc) { + let mut counts = COUNT_MANAGER.counts.lock(); + let ukey = UKey { + user_ns: uc.ns.clone(), + uid: uc.uid, + }; + counts.insert(ukey, uc); + } + + pub fn dec_ucount(uc: Arc, ucount_type: Ucount) { + let mut uc_iter = Some(uc.clone()); + let uc_type = ucount_type as usize; + while let Some(iter) = uc_iter { + let num = iter.ucount[uc_type].fetch_sub(1, core::sync::atomic::Ordering::SeqCst); + if num == 0 { + warn!("count has reached zero"); + } + uc_iter = iter.ns.ucounts.clone(); + } + Self::put_ucounts(uc); + } + + fn put_ucounts(uc: Arc) { + let mut counts = COUNT_MANAGER.counts.lock(); + let key = UKey { + user_ns: uc.ns.clone(), + uid: uc.uid, + }; + counts.remove(&key); + } +} +struct UKey { + user_ns: Arc, + uid: usize, +} + +impl Hash for UKey { + fn hash(&self, state: &mut H) { + let user_ns_ptr = Arc::as_ptr(&self.user_ns); + user_ns_ptr.hash(state); + self.uid.hash(state) + } +} +impl Eq for UKey {} +impl PartialEq for UKey { + fn eq(&self, other: &Self) -> bool { + Arc::ptr_eq(&self.user_ns, &other.user_ns) && self.uid == other.uid + } +} + +struct CountManager { + counts: Mutex>>, +} + +impl CountManager { + fn new() -> Self { + Self { + counts: Mutex::new(HashMap::new()), + } + } +} diff --git a/kernel/src/namespaces/user_namespace.rs b/kernel/src/namespaces/user_namespace.rs new file mode 100644 index 000000000..2314ccfcb --- /dev/null +++ b/kernel/src/namespaces/user_namespace.rs @@ -0,0 +1,135 @@ +#![allow(dead_code, unused_variables, unused_imports)] + +use alloc::boxed::Box; + +use crate::libs::rwlock::RwLock; +use alloc::string::String; +use alloc::string::ToString; + +use alloc::vec::Vec; +use system_error::SystemError; + +use crate::namespaces::namespace::NsCommon; +use crate::namespaces::ucount::UCounts; +use crate::process::fork::CloneFlags; +use crate::process::Pid; +use alloc::sync::Arc; + +use super::namespace::NsOperations; +use super::ucount::Ucount::Counts; + +const UID_GID_MAP_MAX_BASE_EXTENTS: usize = 5; +const UCOUNT_MAX: u32 = 62636; +/// 管理用户ID和组ID的映射 +#[allow(dead_code)] +#[derive(Clone, Debug)] +struct UidGidMap { + nr_extents: u32, + extent: Vec, +} + +///区间映射 +#[allow(dead_code)] +#[derive(Clone, Debug)] +struct UidGidExtent { + first: u32, + lower_first: u32, + count: u32, +} +#[derive(Debug)] +pub struct UserNamespace { + uid_map: UidGidMap, + gid_map: UidGidMap, + progid_map: UidGidMap, + ///项目ID映射 + parent: Option>, + level: u32, + owner: usize, + group: usize, + ns_common: Arc, + flags: u32, + pid: Arc>, + pub ucounts: Option>, + pub ucount_max: Vec, //vec![u32; UCOUNT_COUNTS as usize], + pub rlimit_max: Vec, // vec![u32; UCOUNT_RLIMIT_COUNTS as usize], +} + +impl Default for UserNamespace { + fn default() -> Self { + Self::new() + } +} +#[derive(Debug)] +struct UserNsOperations { + name: String, + clone_flags: CloneFlags, +} +impl UserNsOperations { + pub fn new(name: String) -> Self { + Self { + name, + clone_flags: CloneFlags::CLONE_NEWUSER, + } + } +} +impl NsOperations for UserNsOperations { + fn get(&self, pid: Pid) -> Option> { + unimplemented!() + } + fn get_parent(&self, ns_common: Arc) -> Result, SystemError> { + unimplemented!() + } + fn install( + &self, + nsset: &mut super::NsSet, + ns_common: Arc, + ) -> Result<(), SystemError> { + unimplemented!() + } + fn owner(&self, ns_common: Arc) -> Arc { + unimplemented!() + } + fn put(&self, ns_common: Arc) { + unimplemented!() + } +} +impl UidGidMap { + pub fn new() -> Self { + Self { + nr_extents: 1, + extent: vec![UidGidExtent::new(); UID_GID_MAP_MAX_BASE_EXTENTS], + } + } +} + +impl UidGidExtent { + pub fn new() -> Self { + Self { + first: 0, + lower_first: 0, + count: u32::MAX, + } + } +} + +impl UserNamespace { + pub fn new() -> Self { + Self { + uid_map: UidGidMap::new(), + gid_map: UidGidMap::new(), + progid_map: UidGidMap::new(), + owner: 0, + level: 0, + group: 0, + flags: 1, + parent: None, + ns_common: Arc::new(NsCommon::new(Box::new(UserNsOperations::new( + "User".to_string(), + )))), + pid: Arc::new(RwLock::new(Pid::new(1))), + ucount_max: vec![UCOUNT_MAX; Counts as usize], + ucounts: None, + rlimit_max: vec![65535, 10, 32000, 64 * 1024], + } + } +} diff --git a/kernel/src/process/fork.rs b/kernel/src/process/fork.rs index 547325c6c..bde3b0b8e 100644 --- a/kernel/src/process/fork.rs +++ b/kernel/src/process/fork.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use core::{intrinsics::unlikely, sync::atomic::Ordering}; use alloc::{string::ToString, sync::Arc}; @@ -10,6 +11,7 @@ use crate::{ ipc::signal::flush_signal_handlers, libs::rwlock::RwLock, mm::VirtAddr, + namespaces::{create_new_namespaces, namespace::USER_NS, pid_namespace::PidStrcut}, process::ProcessFlags, sched::{sched_cgroup_fork, sched_fork}, smp::core::smp_get_processor_id, @@ -20,6 +22,7 @@ use super::{ kthread::{KernelThreadPcbPrivate, WorkerPrivate}, KernelStack, Pid, ProcessControlBlock, ProcessManager, }; +const MAX_PID_NS_LEVEL: usize = 32; bitflags! { /// 进程克隆标志 @@ -84,8 +87,8 @@ bitflags! { /// 因为这两个系统调用的参数很多,所以有这样一个载体更灵活 /// /// 仅仅作为参数传递 -#[derive(Debug, Clone, Copy)] #[allow(dead_code)] +#[derive(Debug, Clone)] pub struct KernelCloneArgs { pub flags: CloneFlags, @@ -93,7 +96,7 @@ pub struct KernelCloneArgs { pub pidfd: VirtAddr, pub child_tid: VirtAddr, pub parent_tid: VirtAddr, - pub set_tid: VirtAddr, + pub set_tid: Vec, /// 进程退出时发送的信号 pub exit_signal: Signal, @@ -122,7 +125,7 @@ impl KernelCloneArgs { pidfd: null_addr, child_tid: null_addr, parent_tid: null_addr, - set_tid: null_addr, + set_tid: Vec::with_capacity(MAX_PID_NS_LEVEL), exit_signal: Signal::SIGCHLD, stack: 0, stack_size: 0, @@ -260,6 +263,34 @@ impl ProcessManager { return Ok(()); } + #[inline(never)] + fn copy_namespaces( + clone_flags: &CloneFlags, + current_pcb: &Arc, + new_pcb: &Arc, + ) -> Result<(), SystemError> { + if !clone_flags.contains(CloneFlags::CLONE_NEWNS) + && !clone_flags.contains(CloneFlags::CLONE_NEWUTS) + && !clone_flags.contains(CloneFlags::CLONE_NEWIPC) + && !clone_flags.contains(CloneFlags::CLONE_NEWPID) + && !clone_flags.contains(CloneFlags::CLONE_NEWNET) + && !clone_flags.contains(CloneFlags::CLONE_NEWCGROUP) + { + new_pcb.set_nsproxy(current_pcb.get_nsproxy().read().clone()); + return Ok(()); + } + + if clone_flags.contains(CloneFlags::CLONE_NEWIPC) + && clone_flags.contains(CloneFlags::CLONE_SYSVSEM) + { + return Err(SystemError::EINVAL); + } + + let new_nsproxy = create_new_namespaces(clone_flags.bits(), current_pcb, USER_NS.clone())?; + *new_pcb.nsproxy.write() = new_nsproxy; + Ok(()) + } + #[inline(never)] fn copy_files( clone_flags: &CloneFlags, @@ -422,6 +453,11 @@ impl ProcessManager { ) }); + Self::copy_namespaces(&clone_flags, current_pcb, pcb).unwrap_or_else(|e|{ + panic!("fork: Failed to copy namespace form current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", + current_pcb.pid(), pcb.pid(), e) + }); + // 拷贝文件描述符表 Self::copy_files(&clone_flags, current_pcb, pcb).unwrap_or_else(|e| { panic!( @@ -439,13 +475,19 @@ impl ProcessManager { }); // 拷贝线程 - Self::copy_thread(current_pcb, pcb, clone_args,current_trapframe).unwrap_or_else(|e| { + Self::copy_thread(current_pcb, pcb, &clone_args, current_trapframe).unwrap_or_else(|e| { panic!( "fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", current_pcb.pid(), pcb.pid(), e ) }); - + if current_pcb.pid() != Pid(0) { + let new_pid = PidStrcut::alloc_pid( + pcb.get_nsproxy().read().pid_namespace.clone(), // 获取命名空间 + clone_args.set_tid.clone(), + )?; + *pcb.thread_pid.write() = new_pid; + } // 设置线程组id、组长 if clone_flags.contains(CloneFlags::CLONE_THREAD) { pcb.thread.write_irqsave().group_leader = diff --git a/kernel/src/process/mod.rs b/kernel/src/process/mod.rs index d5841e93b..1c5ce4bb4 100644 --- a/kernel/src/process/mod.rs +++ b/kernel/src/process/mod.rs @@ -50,11 +50,11 @@ use crate::{ ucontext::AddressSpace, VirtAddr, }, + namespaces::{mnt_namespace::FsStruct, pid_namespace::PidStrcut, NsProxy}, net::socket::SocketInode, - sched::completion::Completion, sched::{ - cpu_rq, fair::FairSchedEntity, prio::MAX_PRIO, DequeueFlag, EnqueueFlag, OnRq, SchedMode, - WakeupFlags, __schedule, + completion::Completion, cpu_rq, fair::FairSchedEntity, prio::MAX_PRIO, DequeueFlag, + EnqueueFlag, OnRq, SchedMode, WakeupFlags, __schedule, }, smp::{ core::smp_get_processor_id, @@ -90,7 +90,6 @@ pub static mut PROCESS_SWITCH_RESULT: Option> = None; /// 一个只改变1次的全局变量,标志进程管理器是否已经初始化完成 static mut __PROCESS_MANAGEMENT_INIT_DONE: bool = false; -#[derive(Debug)] pub struct SwitchResult { pub prev_pcb: Option>, pub next_pcb: Option>, @@ -609,14 +608,14 @@ bitflags! { const RANDOMIZE = 1 << 8; } } - #[derive(Debug)] pub struct ProcessControlBlock { /// 当前进程的pid pid: Pid, /// 当前进程的线程组id(这个值在同一个线程组内永远不变) tgid: Pid, - + /// 有关Pid的相关的信息 + thread_pid: Arc>, basic: RwLock, /// 当前进程的自旋锁持有计数 preempt_count: AtomicUsize, @@ -654,12 +653,18 @@ pub struct ProcessControlBlock { /// 线程信息 thread: RwLock, + /// 进程文件系统的状态 + fs: Arc>, + ///闹钟定时器 alarm_timer: SpinLock>, /// 进程的robust lock列表 robust_list: RwLock>, + /// namespace的指针 + nsproxy: Arc>, + /// 进程作为主体的凭证集 cred: SpinLock, } @@ -722,10 +727,10 @@ impl ProcessControlBlock { let ppcb: Weak = ProcessManager::find(ppid) .map(|p| Arc::downgrade(&p)) .unwrap_or_default(); - let pcb = Self { pid, tgid: pid, + thread_pid: Arc::new(RwLock::new(PidStrcut::new())), basic: basic_info, preempt_count, flags, @@ -742,8 +747,10 @@ impl ProcessControlBlock { children: RwLock::new(Vec::new()), wait_queue: WaitQueue::default(), thread: RwLock::new(ThreadInfo::new()), + fs: Arc::new(SpinLock::new(FsStruct::new())), alarm_timer: SpinLock::new(None), robust_list: RwLock::new(None), + nsproxy: Arc::new(RwLock::new(NsProxy::new())), cred: SpinLock::new(cred), }; @@ -886,11 +893,21 @@ impl ProcessControlBlock { return self.pid; } + #[inline(always)] + pub fn pid_strcut(&self) -> Arc> { + self.thread_pid.clone() + } + #[inline(always)] pub fn tgid(&self) -> Pid { return self.tgid; } + #[inline(always)] + pub fn fs_struct(&self) -> Arc> { + self.fs.clone() + } + /// 获取文件描述符表的Arc指针 #[inline(always)] pub fn fd_table(&self) -> Arc> { @@ -1022,6 +1039,14 @@ impl ProcessControlBlock { pub fn alarm_timer_irqsave(&self) -> SpinLockGuard> { return self.alarm_timer.lock_irqsave(); } + + pub fn get_nsproxy(&self) -> Arc> { + self.nsproxy.clone() + } + + pub fn set_nsproxy(&self, nsprsy: NsProxy) { + *self.nsproxy.write() = nsprsy; + } } impl Drop for ProcessControlBlock { diff --git a/kernel/src/process/syscall.rs b/kernel/src/process/syscall.rs index 1a2fec460..9dd04a114 100644 --- a/kernel/src/process/syscall.rs +++ b/kernel/src/process/syscall.rs @@ -188,7 +188,13 @@ impl Syscall { /// @brief 获取当前进程的pid pub fn getpid() -> Result { let current_pcb = ProcessManager::current_pcb(); - return Ok(current_pcb.tgid()); + // if let Some(pid_ns) = ¤t_pcb.get_nsproxy().read().pid_namespace { + // // 获取该进程在命名空间中的 PID + // return Ok(current_pcb.pid_strcut().read().numbers[pid_ns.level].nr); + // // 返回命名空间中的 PID + // } + // 默认返回 tgid + Ok(current_pcb.tgid()) } /// @brief 获取指定进程的pgid diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 74f90610a..c83b67297 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -1,6 +1,5 @@ use core::{ ffi::{c_int, c_void}, - ptr::null, sync::atomic::{AtomicBool, Ordering}, }; @@ -1090,7 +1089,9 @@ impl Syscall { let source = args[0] as *const u8; let target = args[1] as *const u8; let filesystemtype = args[2] as *const u8; - return Self::mount(source, target, filesystemtype, 0, null()); + let mountflags = args[3]; + let data = args[4] as *const u8; // 额外的mount参数,实现自己的mountdata来获取 + return Self::mount(source, target, filesystemtype, mountflags, data); } SYS_UMOUNT2 => { @@ -1183,6 +1184,7 @@ impl Syscall { let flags = args[1] as u32; Self::sys_eventfd(initval, flags) } + SYS_UNSHARE => Self::sys_unshare(args[0] as u64), SYS_BPF => { let cmd = args[0] as u32; let attr = args[1] as *mut u8; diff --git a/user/apps/test_namespace/Cargo.toml b/user/apps/test_namespace/Cargo.toml new file mode 100644 index 000000000..9a287f068 --- /dev/null +++ b/user/apps/test_namespace/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "test-namespace" +version = "0.1.0" +edition = "2021" + +[dependencies] +nix = { version = "0.29.0", features = ["sched", "process"] } diff --git a/user/apps/test_namespace/Makefile b/user/apps/test_namespace/Makefile new file mode 100644 index 000000000..1b0274d20 --- /dev/null +++ b/user/apps/test_namespace/Makefile @@ -0,0 +1,56 @@ +TOOLCHAIN="+nightly-2023-08-15-x86_64-unknown-linux-gnu" +RUSTFLAGS+="" + +ifdef DADK_CURRENT_BUILD_DIR +# 如果是在dadk中编译,那么安装到dadk的安装目录中 + INSTALL_DIR = $(DADK_CURRENT_BUILD_DIR) +else +# 如果是在本地编译,那么安装到当前目录下的install目录中 + INSTALL_DIR = ./install +endif + +ifeq ($(ARCH), x86_64) + export RUST_TARGET=x86_64-unknown-linux-musl +else ifeq ($(ARCH), riscv64) + export RUST_TARGET=riscv64gc-unknown-linux-gnu +else +# 默认为x86_86,用于本地编译 + export RUST_TARGET=x86_64-unknown-linux-musl +endif + +run: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) + +build: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) + +clean: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) + +test: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) + +doc: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) doc --target $(RUST_TARGET) + +fmt: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt + +fmt-check: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt --check + +run-release: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) --release + +build-release: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) --release + +clean-release: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) --release + +test-release: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) --release + +.PHONY: install +install: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path . --no-track --root $(INSTALL_DIR) --force diff --git a/user/apps/test_namespace/makefile.toml b/user/apps/test_namespace/makefile.toml new file mode 100644 index 000000000..dc3f667e6 --- /dev/null +++ b/user/apps/test_namespace/makefile.toml @@ -0,0 +1,63 @@ +# Makefile.toml + +[env] +TOOLCHAIN = "+nightly-2023-08-15-x86_64-unknown-linux-gnu" + +ARCH = { default = "x86_64" } +RUST_TARGET = { default = { if = "eq(env.ARCH, 'riscv64')", value = "riscv64gc-unknown-linux-gnu", else = "x86_64-unknown-linux-musl" } } +INSTALL_DIR = { default = { if = "defined(env.DADK_CURRENT_BUILD_DIR)", value = "${DADK_CURRENT_BUILD_DIR}", else = "./install" } } + +[tasks.build] +description = "Build the project" +command = "cargo" +args = ["${TOOLCHAIN}", "build", "--target", "${RUST_TARGET}"] + +[tasks.run] +description = "Run the project" +command = "cargo" +args = ["${TOOLCHAIN}", "run", "--target", "${RUST_TARGET}"] + +[tasks.clean] +description = "Clean the project" +command = "cargo" +args = ["${TOOLCHAIN}", "clean", "--target", "${RUST_TARGET}"] + +[tasks.test] +description = "Run the tests" +command = "cargo" +args = ["${TOOLCHAIN}", "test", "--target", "${RUST_TARGET}"] + +[tasks.doc] +description = "Generate documentation" +command = "cargo" +args = ["${TOOLCHAIN}", "doc", "--target", "${RUST_TARGET}"] + +[tasks.fmt] +description = "Format the code" +command = "cargo" +args = ["${TOOLCHAIN}", "fmt"] + +[tasks.fmt-check] +description = "Check code format" +command = "cargo" +args = ["${TOOLCHAIN}", "fmt", "--check"] + +[tasks.run-release] +description = "Run the project in release mode" +command = "cargo" +args = ["${TOOLCHAIN}", "run", "--target", "${RUST_TARGET}", "--release"] + +[tasks.build-release] +description = "Build the project in release mode" +command = "cargo" +args = ["${TOOLCHAIN}", "build", "--target", "${RUST_TARGET}", "--release"] + +[tasks.test-release] +description = "Test the project in release mode" +command = "cargo" +args = ["${TOOLCHAIN}", "test", "--target", "${RUST_TARGET}", "--release"] + +[tasks.install] +description = "Install the project" +command = "cargo" +args = ["${TOOLCHAIN}", "install", "--target", "${RUST_TARGET}", "--path", ".", "--no-track", "--root", "${INSTALL_DIR}", "--force"] diff --git a/user/apps/test_namespace/src/main.rs b/user/apps/test_namespace/src/main.rs new file mode 100644 index 000000000..3a2b1d33d --- /dev/null +++ b/user/apps/test_namespace/src/main.rs @@ -0,0 +1,38 @@ +extern crate nix; +use nix::sched::{self, CloneFlags}; +use nix::sys::wait::{waitpid, WaitStatus}; +use nix::unistd::{self, fork, ForkResult}; +use std::process; + +fn main() { + let clone_flags = CloneFlags::CLONE_NEWPID | CloneFlags::CLONE_NEWNS; + + println!("Parent process. PID: {}", unistd::getpid()); + unsafe { + match fork() { + Ok(ForkResult::Parent { child }) => { + println!("Parent process. Child PID: {}", child); + match waitpid(child, None) { + Ok(WaitStatus::Exited(pid, status)) => { + println!("Child {} exited with status: {}", pid, status); + } + Ok(_) => println!("Child process did not exit normally."), + Err(e) => println!("Error waiting for child process: {:?}", e), + } + } + Ok(ForkResult::Child) => { + // 使用 unshare 创建新的命名空间 + println!("Child process. PID: {}", unistd::getpid()); + if let Err(e) = sched::unshare(clone_flags) { + println!("Failed to unshare: {:?}", e); + process::exit(1); + } + println!("Child process. PID: {}", unistd::getpid()); + } + Err(err) => { + println!("Fork failed: {:?}", err); + process::exit(1); + } + } + } +} diff --git a/user/apps/test_overlayfs/.gitignore b/user/apps/test_overlayfs/.gitignore new file mode 100644 index 000000000..614b14a88 --- /dev/null +++ b/user/apps/test_overlayfs/.gitignore @@ -0,0 +1 @@ +test_ovrlayfs \ No newline at end of file diff --git a/user/apps/test_overlayfs/Makefile b/user/apps/test_overlayfs/Makefile new file mode 100644 index 000000000..6be387383 --- /dev/null +++ b/user/apps/test_overlayfs/Makefile @@ -0,0 +1,20 @@ +ifeq ($(ARCH), x86_64) + CROSS_COMPILE=x86_64-linux-musl- +else ifeq ($(ARCH), riscv64) + CROSS_COMPILE=riscv64-linux-musl- +endif + +CC=$(CROSS_COMPILE)gcc + +.PHONY: all +all: main.c + $(CC) -static -o test_overlayfs main.c + +.PHONY: install clean +install: all + mv test_overlayfs $(DADK_CURRENT_BUILD_DIR)/test_overlayfs + +clean: + rm test_overlayfs *.o + +fmt: diff --git a/user/apps/test_overlayfs/main.c b/user/apps/test_overlayfs/main.c new file mode 100644 index 000000000..180371d87 --- /dev/null +++ b/user/apps/test_overlayfs/main.c @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +// #define LOWERDIR "/tmp/overlayfs/lower" +// #define UPPERDIR "/tmp/overlayfs/upper" +// #define WORKDIR "/tmp/overlayfs/work" +// #define MERGEDDIR "/tmp/overlayfs/merged" + +// void create_directories() +// { +// mkdir(LOWERDIR, 0755); +// mkdir(UPPERDIR, 0755); +// mkdir(WORKDIR, 0755); +// mkdir(MERGEDDIR, 0755); +// } +#define TMPDIR "/tmp" +#define OVERLAYFSDIR "/tmp/overlayfs" +#define LOWERDIR "/tmp/overlayfs/lower" +#define UPPERDIR "/tmp/overlayfs/upper" +#define WORKDIR "/tmp/overlayfs/work" +#define MERGEDDIR "/tmp/overlayfs/merged" + +void create_directories() +{ + mkdir(TMPDIR, 0755); + mkdir(OVERLAYFSDIR, 0755); + mkdir(LOWERDIR, 0755); + mkdir(UPPERDIR, 0755); + mkdir(WORKDIR, 0755); + mkdir(MERGEDDIR, 0755); + printf("step1 : success\n"); +} + +void create_lower_file() +{ + char filepath[256]; + snprintf(filepath, sizeof(filepath), "%s/lowerfile.txt", LOWERDIR); + + int fd = open(filepath, O_CREAT | O_WRONLY, 0644); + if (fd < 0) + { + perror("Failed to create file in lowerdir"); + exit(EXIT_FAILURE); + } + write(fd, "This is a lower layer file.\n", 28); + close(fd); + printf("step2 : success\n"); +} + +void mount_overlayfs() +{ + char options[1024]; + snprintf(options, sizeof(options), + "lowerdir=%s,upperdir=%s,workdir=%s", + LOWERDIR, UPPERDIR, WORKDIR); + + if (mount("overlay", MERGEDDIR, "overlay", 0, options) != 0) + { + perror("Mount failed"); + exit(EXIT_FAILURE); + } + printf("OverlayFS mounted successfully.\n"); + printf("step3 : success\n"); +} + +void create_directory_in_merged() +{ + char dirpath[256]; + snprintf(dirpath, sizeof(dirpath), "%s/newdir", UPPERDIR); + + if (mkdir(dirpath, 0755) != 0) + { + perror("Failed to create directory in merged dir"); + exit(EXIT_FAILURE); + } + printf("Directory created in merged: %s\n", dirpath); + printf("step4 : success\n"); +} + +int main() +{ + create_directories(); + mount_overlayfs(); + create_directory_in_merged(); + return 0; +} \ No newline at end of file diff --git a/user/dadk/config/test_namespace.dadk b/user/dadk/config/test_namespace.dadk new file mode 100644 index 000000000..29f1afc62 --- /dev/null +++ b/user/dadk/config/test_namespace.dadk @@ -0,0 +1,25 @@ +{ + "name": "test_namespace", + "version": "0.1.0", + "description": "test namespace", + "rust_target": null, + "task_type": { + "BuildFromSource": { + "Local": { + "path": "apps/test_namespace" + } + } + }, + "depends": [], + "build": { + "build_command": "make install" + }, + "install": { + "in_dragonos_path": "/bin" + }, + "clean": { + "clean_command": "make clean" + }, + "envs": [], + "target_arch": ["x86_64"] +} \ No newline at end of file diff --git a/user/dadk/config/test_overlayfs.dadk b/user/dadk/config/test_overlayfs.dadk new file mode 100644 index 000000000..03592a6c8 --- /dev/null +++ b/user/dadk/config/test_overlayfs.dadk @@ -0,0 +1,25 @@ +{ + "name": "test_overlayfs", + "version": "0.1.0", + "description": "test overlayfs", + "rust_target": null, + "task_type": { + "BuildFromSource": { + "Local": { + "path": "apps/test_overlayfs" + } + } + }, + "depends": [], + "build": { + "build_command": "make install" + }, + "install": { + "in_dragonos_path": "/bin" + }, + "clean": { + "clean_command": "make clean" + }, + "envs": [], + "target_arch": ["x86_64"] +} \ No newline at end of file From 7b0ef10895108a0de5ff5ef3d2f93f40cf2e33a5 Mon Sep 17 00:00:00 2001 From: LoGin Date: Thu, 31 Oct 2024 01:05:34 +0800 Subject: [PATCH 2/5] ci: change rust src to crates-io-index while using GitHub workflow (#1023) Signed-off-by: longjin --- .github/workflows/makefile.yml | 16 ++++++++-------- tools/BUILD_CONTAINER_VERSION | 2 +- tools/docker-entrypoint.sh | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index cfb550834..6a1612e4a 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -12,14 +12,14 @@ jobs: name: Format check ${{ matrix.arch }} runs-on: ubuntu-latest continue-on-error: true - container: dragonos/dragonos-dev:v1.4 + container: dragonos/dragonos-dev:v1.5 strategy: matrix: arch: [x86_64, riscv64] steps: - - run: echo "Running in dragonos/dragonos-dev:v1.4" + - run: echo "Running in dragonos/dragonos-dev:v1.5" - uses: actions/checkout@v3 - name: Format check @@ -35,14 +35,14 @@ jobs: name: Kernel static test ${{ matrix.arch }} runs-on: ubuntu-latest continue-on-error: true - container: dragonos/dragonos-dev:v1.4 + container: dragonos/dragonos-dev:v1.5 strategy: matrix: arch: [x86_64, riscv64] steps: - - run: echo "Running in dragonos/dragonos-dev:v1.4" + - run: echo "Running in dragonos/dragonos-dev:v1.5" - uses: actions/checkout@v3 @@ -56,10 +56,10 @@ jobs: build-x86_64: runs-on: ubuntu-latest - container: dragonos/dragonos-dev:v1.4 + container: dragonos/dragonos-dev:v1.5 steps: - - run: echo "Running in dragonos/dragonos-dev:v1.4" + - run: echo "Running in dragonos/dragonos-dev:v1.5" - uses: actions/checkout@v3 - name: build the DragonOS @@ -78,10 +78,10 @@ jobs: build-riscv64: runs-on: ubuntu-latest - container: dragonos/dragonos-dev:v1.4 + container: dragonos/dragonos-dev:v1.5 steps: - - run: echo "Running in dragonos/dragonos-dev:v1.4" + - run: echo "Running in dragonos/dragonos-dev:v1.5" - uses: actions/checkout@v3 with: diff --git a/tools/BUILD_CONTAINER_VERSION b/tools/BUILD_CONTAINER_VERSION index 64c411b81..59b6ef75f 100644 --- a/tools/BUILD_CONTAINER_VERSION +++ b/tools/BUILD_CONTAINER_VERSION @@ -1 +1 @@ -v1.4 \ No newline at end of file +v1.5 \ No newline at end of file diff --git a/tools/docker-entrypoint.sh b/tools/docker-entrypoint.sh index 5fc44481d..ea8ccd655 100644 --- a/tools/docker-entrypoint.sh +++ b/tools/docker-entrypoint.sh @@ -1,3 +1,19 @@ #!/bin/bash +CONFIG_FILE=~/.cargo/config.toml + +change_rust_src_to_official() { +echo -e "[source.crates-io] \n \ +registry = \"sparse+https://index.crates.io/\" \n \ +[net] \n \ +git-fetch-with-cli = true \n \ +" > $CONFIG_FILE +} + +# Check if the GITHUB_WORKFLOW environment variable is set and not empty +if [ -n "$GITHUB_ACTION" ]; then + change_rust_src_to_official +fi + + exec "$@" From 2e7e4cecb0ebb42d4c417997a7ebc4891f2826e6 Mon Sep 17 00:00:00 2001 From: MemoryShore <1353318529@qq.com> Date: Thu, 31 Oct 2024 13:48:23 +0800 Subject: [PATCH 3/5] Update NovaShell to cb835e03e4 (#1022) --- user/dadk/config/nova_shell-0.1.0.dadk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/user/dadk/config/nova_shell-0.1.0.dadk b/user/dadk/config/nova_shell-0.1.0.dadk index b3cd99ec4..490a1250b 100644 --- a/user/dadk/config/nova_shell-0.1.0.dadk +++ b/user/dadk/config/nova_shell-0.1.0.dadk @@ -6,7 +6,7 @@ "BuildFromSource": { "Git": { "url": "https://git.mirrors.dragonos.org.cn/DragonOS-Community/NovaShell.git", - "revision": "7bb802ad1e" + "revision": "cb835e03e4" } } }, From c7ad61d495d56fe2db26fa454699d1720b8115fb Mon Sep 17 00:00:00 2001 From: LoGin Date: Thu, 31 Oct 2024 22:14:45 +0800 Subject: [PATCH 4/5] =?UTF-8?q?fix:=20=E5=88=A0=E9=99=A4=E5=9C=A8=20#949?= =?UTF-8?q?=20=E4=B8=AD=E6=84=8F=E5=A4=96=E6=B7=BB=E5=8A=A0=E7=9A=84?= =?UTF-8?q?=E6=97=A7=E7=89=88ida=20(#1027)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/DragonOS-Community/DragonOS/pull/949 Signed-off-by: longjin --- kernel/src/libs/ida/src/lib.rs | 55 ---------------------------------- 1 file changed, 55 deletions(-) delete mode 100644 kernel/src/libs/ida/src/lib.rs diff --git a/kernel/src/libs/ida/src/lib.rs b/kernel/src/libs/ida/src/lib.rs deleted file mode 100644 index ac25d557c..000000000 --- a/kernel/src/libs/ida/src/lib.rs +++ /dev/null @@ -1,55 +0,0 @@ -#![no_std] -#![feature(core_intrinsics)] -#![allow(clippy::needless_return)] - -use core::intrinsics::unlikely; -use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; - -/// id分配器 -/// -/// TODO: 当前只是为了简单实现功能,将来这里应使用类似linux的ida的方式去实现 -#[derive(Debug)] -pub struct IdAllocator { - current_id: AtomicUsize, - max_id: usize, - dead: AtomicBool, -} - -impl IdAllocator { - /// 创建一个新的id分配器 - pub const fn new(initial_id: usize, max_id: usize) -> Self { - Self { - current_id: AtomicUsize::new(initial_id), - max_id, - dead: AtomicBool::new(false), - } - } - - /// 分配一个新的id - /// - /// ## 返回 - /// - /// 如果分配成功,返回Some(id),否则返回None - pub fn alloc(&self) -> Option { - if unlikely(self.dead.load(Ordering::SeqCst)) { - return None; - } - - let ret = self.current_id.fetch_add(1, Ordering::SeqCst); - // 如果id溢出,panic - if ret == self.max_id { - self.dead.store(true, Ordering::SeqCst); - return None; - } - - return Some(ret); - } - - pub fn free(&self, _id: usize) { - // todo: free - } - - pub fn get_max_id(&self) -> usize { - self.max_id - } -} From 01b8a76cdc5088e524b9f3c45c5ff827e88293d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=81=AB=E8=8A=B1?= Date: Tue, 5 Nov 2024 16:47:44 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=98=BB=E5=A1=9E?= =?UTF-8?q?=E9=98=85=E8=AF=BBeventfd=E6=97=B6=E6=97=A0=E6=B3=95=E8=A2=AB?= =?UTF-8?q?=E4=B8=AD=E6=96=AD=E7=9A=84bug=20(#1030)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: sparkzky --- kernel/src/filesystem/eventfd.rs | 37 ++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/kernel/src/filesystem/eventfd.rs b/kernel/src/filesystem/eventfd.rs index 4d0d9a76e..9143697a9 100644 --- a/kernel/src/filesystem/eventfd.rs +++ b/kernel/src/filesystem/eventfd.rs @@ -5,6 +5,7 @@ use crate::libs::spinlock::{SpinLock, SpinLockGuard}; use crate::libs::wait_queue::WaitQueue; use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData}; use crate::process::ProcessManager; +use crate::sched::SchedMode; use crate::syscall::Syscall; use alloc::collections::LinkedList; use alloc::string::String; @@ -76,6 +77,11 @@ impl EventFdInode { Err(SystemError::ENOENT) } + + fn readable(&self) -> bool { + let count = self.eventfd.lock().count; + return count > 0; + } } impl IndexNode for EventFdInode { @@ -104,26 +110,29 @@ impl IndexNode for EventFdInode { _offset: usize, len: usize, buf: &mut [u8], - data: SpinLockGuard, + data_guard: SpinLockGuard, ) -> Result { + let data = data_guard.clone(); + drop(data_guard); if len < 8 { return Err(SystemError::EINVAL); } - let mut val = loop { - let val = self.eventfd.lock().count; - if val != 0 { - break val; - } - if self - .eventfd - .lock() - .flags - .contains(EventFdFlags::EFD_NONBLOCK) - { + let mut lock_efd = self.eventfd.lock(); + while lock_efd.count == 0 { + if lock_efd.flags.contains(EventFdFlags::EFD_NONBLOCK) { + drop(lock_efd); return Err(SystemError::EAGAIN_OR_EWOULDBLOCK); } - self.wait_queue.sleep(); - }; + + drop(lock_efd); + let r = wq_wait_event_interruptible!(self.wait_queue, self.readable(), {}); + if r.is_err() { + return Err(SystemError::ERESTARTSYS); + } + + lock_efd = self.eventfd.lock(); + } + let mut val = lock_efd.count; let mut eventfd = self.eventfd.lock(); if eventfd.flags.contains(EventFdFlags::EFD_SEMAPHORE) {