diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
| commit | 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch) | |
| tree | 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/rustix/src/thread | |
| parent | 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff) | |
chore: add vendor directory
Diffstat (limited to 'vendor/rustix/src/thread')
| -rw-r--r-- | vendor/rustix/src/thread/clock.rs | 117 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/futex.rs | 600 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/id.rs | 184 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/libcap.rs | 185 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/membarrier.rs | 92 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/mod.rs | 35 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/prctl.rs | 1014 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/sched.rs | 161 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/sched_yield.rs | 16 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/setns.rs | 139 |
10 files changed, 2543 insertions, 0 deletions
diff --git a/vendor/rustix/src/thread/clock.rs b/vendor/rustix/src/thread/clock.rs new file mode 100644 index 00000000..d6be40e8 --- /dev/null +++ b/vendor/rustix/src/thread/clock.rs @@ -0,0 +1,117 @@ +use crate::{backend, io}; +use core::fmt; + +pub use crate::timespec::{Nsecs, Secs, Timespec}; + +#[cfg(not(any( + apple, + target_os = "dragonfly", + target_os = "espidf", + target_os = "freebsd", // FreeBSD 12 has clock_nanosleep, but libc targets FreeBSD 11. + target_os = "openbsd", + target_os = "redox", + target_os = "vita", + target_os = "wasi", +)))] +pub use crate::clockid::ClockId; + +/// `clock_nanosleep(id, 0, request, remain)`—Sleeps for a duration on a +/// given clock. +/// +/// This is `clock_nanosleep` specialized for the case of a relative sleep +/// interval. See [`clock_nanosleep_absolute`] for absolute intervals. +/// +/// # References +/// - [POSIX] +/// - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/clock_nanosleep.html +/// [Linux]: https://man7.org/linux/man-pages/man2/clock_nanosleep.2.html +#[cfg(not(any( + apple, + target_os = "dragonfly", + target_os = "emscripten", + target_os = "espidf", + target_os = "freebsd", // FreeBSD 12 has clock_nanosleep, but libc targets FreeBSD 11. + target_os = "haiku", + target_os = "horizon", + target_os = "openbsd", + target_os = "redox", + target_os = "vita", + target_os = "wasi", +)))] +#[inline] +pub fn clock_nanosleep_relative(id: ClockId, request: &Timespec) -> NanosleepRelativeResult { + backend::thread::syscalls::clock_nanosleep_relative(id, request) +} + +/// `clock_nanosleep(id, TIMER_ABSTIME, request, NULL)`—Sleeps until an +/// absolute time on a given clock. +/// +/// This is `clock_nanosleep` specialized for the case of an absolute sleep +/// interval. See [`clock_nanosleep_relative`] for relative intervals. +/// +/// # References +/// - [POSIX] +/// - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/clock_nanosleep.html +/// [Linux]: https://man7.org/linux/man-pages/man2/clock_nanosleep.2.html +#[cfg(not(any( + apple, + target_os = "dragonfly", + target_os = "emscripten", + target_os = "espidf", + target_os = "freebsd", // FreeBSD 12 has clock_nanosleep, but libc targets FreeBSD 11. + target_os = "haiku", + target_os = "horizon", + target_os = "openbsd", + target_os = "redox", + target_os = "vita", + target_os = "wasi", +)))] +#[inline] +pub fn clock_nanosleep_absolute(id: ClockId, request: &Timespec) -> io::Result<()> { + backend::thread::syscalls::clock_nanosleep_absolute(id, request) +} + +/// `nanosleep(request, remain)`—Sleeps for a duration. +/// +/// This effectively uses the system monotonic clock. +/// +/// # References +/// - [POSIX] +/// - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/nanosleep.html +/// [Linux]: https://man7.org/linux/man-pages/man2/nanosleep.2.html +#[inline] +pub fn nanosleep(request: &Timespec) -> NanosleepRelativeResult { + backend::thread::syscalls::nanosleep(request) +} + +/// A return type for `nanosleep` and `clock_nanosleep_relative`. +#[derive(Clone)] +#[must_use] +pub enum NanosleepRelativeResult { + /// The sleep completed normally. + Ok, + /// The sleep was interrupted, the remaining time is returned. + Interrupted(Timespec), + /// An invalid time value was provided. + Err(io::Errno), +} + +impl fmt::Debug for NanosleepRelativeResult { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Ok => f.write_str("Ok"), + Self::Interrupted(remaining) => write!( + f, + "Interrupted(Timespec {{ tv_sec: {:?}, tv_nsec: {:?} }})", + remaining.tv_sec, remaining.tv_nsec + ), + Self::Err(err) => write!(f, "Err({:?})", err), + } + } +} diff --git a/vendor/rustix/src/thread/futex.rs b/vendor/rustix/src/thread/futex.rs new file mode 100644 index 00000000..7ac49e69 --- /dev/null +++ b/vendor/rustix/src/thread/futex.rs @@ -0,0 +1,600 @@ +//! Linux `futex`. +//! +//! Futex is a very low-level mechanism for implementing concurrency primitives +//! such as mutexes, rwlocks, and condvars. For a higher-level API that +//! provides those abstractions, see [rustix-futex-syntax]. +//! +//! # Examples +//! +//! ``` +//! use rustix::thread::futex; +//! use std::sync::atomic::AtomicU32; +//! +//! # fn test(futex: &AtomicU32) -> rustix::io::Result<()> { +//! // Wake up one waiter. +//! futex::wake(futex, futex::Flags::PRIVATE, 1)?; +//! # Ok(()) +//! # } +//! ``` +//! +//! # References +//! - [Linux `futex` system call] +//! - [Linux `futex` feature] +//! +//! [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +//! [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +//! [rustix-futex-sync]: https://crates.io/crates/rustix-futex-sync +#![allow(unsafe_code)] + +use core::ffi::c_void; +use core::num::NonZeroU32; +use core::ptr; +use core::sync::atomic::AtomicU32; + +use crate::backend::thread::futex::Operation; +use crate::backend::thread::syscalls::{futex_timeout, futex_val2}; +use crate::fd::{FromRawFd as _, OwnedFd, RawFd}; +use crate::{backend, io}; + +pub use crate::clockid::ClockId; +pub use crate::timespec::{Nsecs, Secs, Timespec}; + +pub use backend::thread::futex::{Flags, WaitFlags, OWNER_DIED, WAITERS}; + +/// `syscall(SYS_futex, uaddr, FUTEX_WAIT, val, timeout, NULL, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn wait( + uaddr: &AtomicU32, + flags: Flags, + val: u32, + timeout: Option<&Timespec>, +) -> io::Result<()> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_timeout(uaddr, Operation::Wait, flags, val, timeout, ptr::null(), 0).map(|val| { + debug_assert_eq!( + val, 0, + "The return value should always equal zero, if the call is successful" + ); + }) + } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_WAKE, val, NULL, NULL, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn wake(uaddr: &AtomicU32, flags: Flags, val: u32) -> io::Result<usize> { + // SAFETY: The raw pointers come from references or null. + unsafe { futex_val2(uaddr, Operation::Wake, flags, val, 0, ptr::null(), 0) } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_FD, val, NULL, NULL, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn fd(uaddr: &AtomicU32, flags: Flags, val: u32) -> io::Result<OwnedFd> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_val2(uaddr, Operation::Fd, flags, val, 0, ptr::null(), 0).map(|val| { + let fd = val as RawFd; + debug_assert_eq!(fd as usize, val, "return value should be a valid fd"); + OwnedFd::from_raw_fd(fd) + }) + } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_REQUEUE, val, val2, uaddr2, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn requeue( + uaddr: &AtomicU32, + flags: Flags, + val: u32, + val2: u32, + uaddr2: &AtomicU32, +) -> io::Result<usize> { + // SAFETY: The raw pointers come from references or null. + unsafe { futex_val2(uaddr, Operation::Requeue, flags, val, val2, uaddr2, 0) } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_CMP_REQUEUE, val, val2, uaddr2, val3)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn cmp_requeue( + uaddr: &AtomicU32, + flags: Flags, + val: u32, + val2: u32, + uaddr2: &AtomicU32, + val3: u32, +) -> io::Result<usize> { + // SAFETY: The raw pointers come from references or null. + unsafe { futex_val2(uaddr, Operation::CmpRequeue, flags, val, val2, uaddr2, val3) } +} + +/// `FUTEX_OP_*` operations for use with [`wake_op`]. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[repr(u32)] +#[allow(clippy::identity_op)] +pub enum WakeOp { + /// `FUTEX_OP_SET`: `uaddr2 = oparg;` + Set = 0, + /// `FUTEX_OP_ADD`: `uaddr2 += oparg;` + Add = 1, + /// `FUTEX_OP_OR`: `uaddr2 |= oparg;` + Or = 2, + /// `FUTEX_OP_ANDN`: `uaddr2 &= ~oparg;` + AndN = 3, + /// `FUTEX_OP_XOR`: `uaddr2 ^= oparg;` + XOr = 4, + /// `FUTEX_OP_SET | FUTEX_OP_ARG_SHIFT`: `uaddr2 = (oparg << 1);` + SetShift = 0 | 8, + /// `FUTEX_OP_ADD | FUTEX_OP_ARG_SHIFT`: `uaddr2 += (oparg << 1);` + AddShift = 1 | 8, + /// `FUTEX_OP_OR | FUTEX_OP_ARG_SHIFT`: `uaddr2 |= (oparg << 1);` + OrShift = 2 | 8, + /// `FUTEX_OP_ANDN | FUTEX_OP_ARG_SHIFT`: `uaddr2 &= !(oparg << 1);` + AndNShift = 3 | 8, + /// `FUTEX_OP_XOR | FUTEX_OP_ARG_SHIFT`: `uaddr2 ^= (oparg << 1);` + XOrShift = 4 | 8, +} + +/// `FUTEX_OP_CMP_*` operations for use with [`wake_op`]. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[repr(u32)] +pub enum WakeOpCmp { + /// `FUTEX_OP_CMP_EQ`: `if oldval == cmparg { wake(); }` + Eq = 0, + /// `FUTEX_OP_CMP_EQ`: `if oldval != cmparg { wake(); }` + Ne = 1, + /// `FUTEX_OP_CMP_EQ`: `if oldval < cmparg { wake(); }` + Lt = 2, + /// `FUTEX_OP_CMP_EQ`: `if oldval <= cmparg { wake(); }` + Le = 3, + /// `FUTEX_OP_CMP_EQ`: `if oldval > cmparg { wake(); }` + Gt = 4, + /// `FUTEX_OP_CMP_EQ`: `if oldval >= cmparg { wake(); }` + Ge = 5, +} + +/// `syscall(SYS_futex, uaddr, FUTEX_WAKE_OP, val, val2, uaddr2, val3)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +#[allow(clippy::too_many_arguments)] +pub fn wake_op( + uaddr: &AtomicU32, + flags: Flags, + val: u32, + val2: u32, + uaddr2: &AtomicU32, + op: WakeOp, + cmp: WakeOpCmp, + oparg: u16, + cmparg: u16, +) -> io::Result<usize> { + if oparg >= 1 << 12 || cmparg >= 1 << 12 { + return Err(io::Errno::INVAL); + } + + let val3 = + ((op as u32) << 28) | ((cmp as u32) << 24) | ((oparg as u32) << 12) | (cmparg as u32); + + // SAFETY: The raw pointers come from references or null. + unsafe { futex_val2(uaddr, Operation::WakeOp, flags, val, val2, uaddr2, val3) } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn lock_pi(uaddr: &AtomicU32, flags: Flags, timeout: Option<&Timespec>) -> io::Result<()> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_timeout(uaddr, Operation::LockPi, flags, 0, timeout, ptr::null(), 0).map(|val| { + debug_assert_eq!( + val, 0, + "The return value should always equal zero, if the call is successful" + ); + }) + } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn unlock_pi(uaddr: &AtomicU32, flags: Flags) -> io::Result<()> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_val2(uaddr, Operation::UnlockPi, flags, 0, 0, ptr::null(), 0).map(|val| { + debug_assert_eq!( + val, 0, + "The return value should always equal zero, if the call is successful" + ); + }) + } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_TRYLOCK_PI, 0, NULL, NULL, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn trylock_pi(uaddr: &AtomicU32, flags: Flags) -> io::Result<bool> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_val2(uaddr, Operation::TrylockPi, flags, 0, 0, ptr::null(), 0).map(|ret| ret == 0) + } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, val3)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn wait_bitset( + uaddr: &AtomicU32, + flags: Flags, + val: u32, + timeout: Option<&Timespec>, + val3: NonZeroU32, +) -> io::Result<()> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_timeout( + uaddr, + Operation::WaitBitset, + flags, + val, + timeout, + ptr::null(), + val3.get(), + ) + .map(|val| { + debug_assert_eq!( + val, 0, + "The return value should always equal zero, if the call is successful" + ); + }) + } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_WAKE_BITSET, val, NULL, NULL, val3)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn wake_bitset( + uaddr: &AtomicU32, + flags: Flags, + val: u32, + val3: NonZeroU32, +) -> io::Result<usize> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_val2( + uaddr, + Operation::WakeBitset, + flags, + val, + 0, + ptr::null(), + val3.get(), + ) + } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn wait_requeue_pi( + uaddr: &AtomicU32, + flags: Flags, + val: u32, + timeout: Option<&Timespec>, + uaddr2: &AtomicU32, +) -> io::Result<()> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_timeout( + uaddr, + Operation::WaitRequeuePi, + flags, + val, + timeout, + uaddr2, + 0, + ) + .map(|val| { + debug_assert_eq!( + val, 0, + "The return value should always equal zero, if the call is successful" + ); + }) + } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_CMP_REQUEUE_PI, 1, val2, uaddr2, val3)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn cmp_requeue_pi( + uaddr: &AtomicU32, + flags: Flags, + val2: u32, + uaddr2: &AtomicU32, + val3: u32, +) -> io::Result<usize> { + // SAFETY: The raw pointers come from references or null. + unsafe { futex_val2(uaddr, Operation::CmpRequeuePi, flags, 1, val2, uaddr2, val3) } +} + +/// `syscall(SYS_futex, uaddr, FUTEX_LOCK_PI2, 0, timeout, NULL, 0)` +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links. +/// +/// # References +/// - [Linux `futex` system call] +/// - [Linux `futex` feature] +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub fn lock_pi2(uaddr: &AtomicU32, flags: Flags, timeout: Option<&Timespec>) -> io::Result<()> { + // SAFETY: The raw pointers come from references or null. + unsafe { + futex_timeout(uaddr, Operation::LockPi2, flags, 0, timeout, ptr::null(), 0).map(|val| { + debug_assert_eq!( + val, 0, + "The return value should always equal zero, if the call is successful" + ); + }) + } +} + +/// A pointer in the [`Wait`] struct. +#[repr(C)] +#[derive(Copy, Clone)] +#[non_exhaustive] +pub struct WaitPtr { + #[cfg(all(target_pointer_width = "32", target_endian = "big"))] + #[doc(hidden)] + pub __pad32: u32, + #[cfg(all(target_pointer_width = "16", target_endian = "big"))] + #[doc(hidden)] + pub __pad16: u16, + + /// The pointer value. + pub ptr: *mut c_void, + + #[cfg(all(target_pointer_width = "16", target_endian = "little"))] + #[doc(hidden)] + pub __pad16: u16, + #[cfg(all(target_pointer_width = "32", target_endian = "little"))] + #[doc(hidden)] + pub __pad32: u32, +} + +impl WaitPtr { + /// Construct a new `WaitPtr` holding the given raw pointer value. + #[inline] + pub const fn new(ptr: *mut c_void) -> Self { + Self { + ptr, + + #[cfg(target_pointer_width = "16")] + __pad16: 0, + #[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] + __pad32: 0, + } + } +} + +impl Default for WaitPtr { + #[inline] + fn default() -> Self { + Self::new(ptr::null_mut()) + } +} + +impl From<*mut c_void> for WaitPtr { + #[inline] + fn from(ptr: *mut c_void) -> Self { + Self::new(ptr) + } +} + +impl core::fmt::Debug for WaitPtr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.ptr.fmt(f) + } +} + +/// For use with [`waitv`]. +#[repr(C)] +#[derive(Debug, Copy, Clone)] +#[non_exhaustive] +pub struct Wait { + /// The expected value. + pub val: u64, + /// The address to wait for. + pub uaddr: WaitPtr, + /// The type and size of futex to perform. + pub flags: WaitFlags, + + /// Reserved for future use. + pub(crate) __reserved: u32, +} + +impl Wait { + /// Construct a zero-initialized `Wait`. + #[inline] + pub const fn new() -> Self { + Self { + val: 0, + uaddr: WaitPtr::new(ptr::null_mut()), + flags: WaitFlags::empty(), + __reserved: 0, + } + } +} + +impl Default for Wait { + #[inline] + fn default() -> Self { + Self::new() + } +} + +/// `futex_waitv(waiters.as_ptr(), waiters.len(), flags, timeout, clockd)`— +/// Wait on an array of futexes, wake on any. +/// +/// This requires Linux ≥ 5.16. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/latest/userspace-api/futex2.html +#[inline] +pub fn waitv( + waiters: &[Wait], + flags: WaitvFlags, + timeout: Option<&Timespec>, + clockid: ClockId, +) -> io::Result<usize> { + backend::thread::syscalls::futex_waitv(waiters, flags, timeout, clockid) +} + +bitflags::bitflags! { + /// Flags for use with the flags argument in [`waitv`]. + /// + /// At this time, no flags are defined. + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct WaitvFlags: u32 { + /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> + const _ = !0; + } +} + +#[cfg(linux_raw)] +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_layouts() { + use crate::backend::c; + + check_renamed_struct!(Wait, futex_waitv, val, uaddr, flags, __reserved); + } +} diff --git a/vendor/rustix/src/thread/id.rs b/vendor/rustix/src/thread/id.rs new file mode 100644 index 00000000..aa67c9ea --- /dev/null +++ b/vendor/rustix/src/thread/id.rs @@ -0,0 +1,184 @@ +//! CPU and thread identifiers. +//! +//! # Safety +//! +//! The `Cpuid`, type can be constructed from raw integers, which is marked +//! unsafe because actual OS's assign special meaning to some integer values. + +#![allow(unsafe_code)] +use crate::{backend, io}; +#[cfg(linux_kernel)] +use backend::thread::types::RawCpuid; + +pub use crate::pid::{Pid, RawPid}; +pub use crate::ugid::{Gid, RawGid, RawUid, Uid}; + +/// A Linux CPU ID. +#[cfg(linux_kernel)] +#[repr(transparent)] +#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] +pub struct Cpuid(RawCpuid); + +#[cfg(linux_kernel)] +impl Cpuid { + /// Converts a `RawCpuid` into a `Cpuid`. + /// + /// # Safety + /// + /// `raw` must be the value of a valid Linux CPU ID. + #[inline] + pub const unsafe fn from_raw(raw: RawCpuid) -> Self { + Self(raw) + } + + /// Converts a `Cpuid` into a `RawCpuid`. + #[inline] + pub const fn as_raw(self) -> RawCpuid { + self.0 + } +} + +/// `gettid()`—Returns the thread ID. +/// +/// This returns the OS thread ID, which is not necessarily the same as the +/// Rust's `std::thread::Thread::id` or the pthread ID. +/// +/// This function always does a system call. To avoid this overhead, ask the +/// thread runtime for the ID instead, for example using [`libc::gettid`] or +/// [`origin::thread::current_id`]. +/// +/// [`libc::gettid`]: https://docs.rs/libc/*/libc/fn.gettid.html +/// [`origin::thread::current_id`]: https://docs.rs/origin/*/origin/thread/fn.current_id.html +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/gettid.2.html +#[inline] +#[must_use] +pub fn gettid() -> Pid { + backend::thread::syscalls::gettid() +} + +/// `setuid(uid)`—Sets the effective user ID of the calling thread. +/// +/// # Warning +/// +/// This is not the `setuid` you are looking for… POSIX requires uids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the uid for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes]. This call implements the kernel +/// behavior. +/// +/// # References +/// - [POSIX] +/// - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/setuid.html +/// [Linux]: https://man7.org/linux/man-pages/man2/setuid.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setuid.2.html#NOTES +#[inline] +pub fn set_thread_uid(uid: Uid) -> io::Result<()> { + backend::thread::syscalls::setuid_thread(uid) +} + +/// `setresuid(ruid, euid, suid)`—Sets the real, effective, and saved user ID +/// of the calling thread. +/// +/// # Warning +/// +/// This is not the `setresuid` you are looking for… POSIX requires uids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the uid for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes] and the notes in [`set_thread_uid`]. This +/// call implements the kernel behavior. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setresuid.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setresuid.2.html#NOTES +#[inline] +pub fn set_thread_res_uid(ruid: Uid, euid: Uid, suid: Uid) -> io::Result<()> { + backend::thread::syscalls::setresuid_thread(ruid, euid, suid) +} + +/// `setgid(gid)`—Sets the effective group ID of the current thread. +/// +/// # Warning +/// +/// This is not the `setgid` you are looking for… POSIX requires gids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the gid for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes]. This call implements the kernel +/// behavior. +/// +/// # References +/// - [POSIX] +/// - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/setgid.html +/// [Linux]: https://man7.org/linux/man-pages/man2/setgid.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setgid.2.html#NOTES +#[inline] +pub fn set_thread_gid(gid: Gid) -> io::Result<()> { + backend::thread::syscalls::setgid_thread(gid) +} + +/// `setresgid(rgid, egid, sgid)`—Sets the real, effective, and saved group +/// ID of the current thread. +/// +/// # Warning +/// +/// This is not the `setresgid` you are looking for… POSIX requires gids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the gid for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes] and the notes in [`set_thread_gid`]. This +/// call implements the kernel behavior. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setresgid.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setresgid.2.html#NOTES +#[inline] +pub fn set_thread_res_gid(rgid: Gid, egid: Gid, sgid: Gid) -> io::Result<()> { + backend::thread::syscalls::setresgid_thread(rgid, egid, sgid) +} + +/// `setgroups(groups)`—Sets the supplementary group IDs for the calling +/// thread. +/// +/// # Warning +/// +/// This is not the `setgroups` you are looking for… POSIX requires gids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the gids for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes]. This call implements the kernel +/// behavior. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setgroups.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setgroups.2.html#NOTES +#[cfg(linux_kernel)] +#[inline] +pub fn set_thread_groups(groups: &[Gid]) -> io::Result<()> { + backend::thread::syscalls::setgroups_thread(groups) +} diff --git a/vendor/rustix/src/thread/libcap.rs b/vendor/rustix/src/thread/libcap.rs new file mode 100644 index 00000000..0a0fbb4c --- /dev/null +++ b/vendor/rustix/src/thread/libcap.rs @@ -0,0 +1,185 @@ +use bitflags::bitflags; +use core::mem::MaybeUninit; + +use crate::pid::Pid; +use crate::{backend, io}; + +/// `__user_cap_data_struct` +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct CapabilitySets { + /// `__user_cap_data_struct.effective` + pub effective: CapabilityFlags, + /// `__user_cap_data_struct.permitted` + pub permitted: CapabilityFlags, + /// `__user_cap_data_struct.inheritable` + pub inheritable: CapabilityFlags, +} + +bitflags! { + /// `CAP_*` constants. + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct CapabilityFlags: u64 { + /// `CAP_CHOWN` + const CHOWN = 1 << linux_raw_sys::general::CAP_CHOWN; + /// `CAP_DAC_OVERRIDE` + const DAC_OVERRIDE = 1 << linux_raw_sys::general::CAP_DAC_OVERRIDE; + /// `CAP_DAC_READ_SEARCH` + const DAC_READ_SEARCH = 1 << linux_raw_sys::general::CAP_DAC_READ_SEARCH; + /// `CAP_FOWNER` + const FOWNER = 1 << linux_raw_sys::general::CAP_FOWNER; + /// `CAP_FSETID` + const FSETID = 1 << linux_raw_sys::general::CAP_FSETID; + /// `CAP_KILL` + const KILL = 1 << linux_raw_sys::general::CAP_KILL; + /// `CAP_SETGID` + const SETGID = 1 << linux_raw_sys::general::CAP_SETGID; + /// `CAP_SETUID` + const SETUID = 1 << linux_raw_sys::general::CAP_SETUID; + /// `CAP_SETPCAP` + const SETPCAP = 1 << linux_raw_sys::general::CAP_SETPCAP; + /// `CAP_LINUX_IMMUTABLE` + const LINUX_IMMUTABLE = 1 << linux_raw_sys::general::CAP_LINUX_IMMUTABLE; + /// `CAP_NET_BIND_SERVICE` + const NET_BIND_SERVICE = 1 << linux_raw_sys::general::CAP_NET_BIND_SERVICE; + /// `CAP_NET_BROADCAST` + const NET_BROADCAST = 1 << linux_raw_sys::general::CAP_NET_BROADCAST; + /// `CAP_NET_ADMIN` + const NET_ADMIN = 1 << linux_raw_sys::general::CAP_NET_ADMIN; + /// `CAP_NET_RAW` + const NET_RAW = 1 << linux_raw_sys::general::CAP_NET_RAW; + /// `CAP_IPC_LOCK` + const IPC_LOCK = 1 << linux_raw_sys::general::CAP_IPC_LOCK; + /// `CAP_IPC_OWNER` + const IPC_OWNER = 1 << linux_raw_sys::general::CAP_IPC_OWNER; + /// `CAP_SYS_MODULE` + const SYS_MODULE = 1 << linux_raw_sys::general::CAP_SYS_MODULE; + /// `CAP_SYS_RAWIO` + const SYS_RAWIO = 1 << linux_raw_sys::general::CAP_SYS_RAWIO; + /// `CAP_SYS_CHROOT` + const SYS_CHROOT = 1 << linux_raw_sys::general::CAP_SYS_CHROOT; + /// `CAP_SYS_PTRACE` + const SYS_PTRACE = 1 << linux_raw_sys::general::CAP_SYS_PTRACE; + /// `CAP_SYS_PACCT` + const SYS_PACCT = 1 << linux_raw_sys::general::CAP_SYS_PACCT; + /// `CAP_SYS_ADMIN` + const SYS_ADMIN = 1 << linux_raw_sys::general::CAP_SYS_ADMIN; + /// `CAP_SYS_BOOT` + const SYS_BOOT = 1 << linux_raw_sys::general::CAP_SYS_BOOT; + /// `CAP_SYS_NICE` + const SYS_NICE = 1 << linux_raw_sys::general::CAP_SYS_NICE; + /// `CAP_SYS_RESOURCE` + const SYS_RESOURCE = 1 << linux_raw_sys::general::CAP_SYS_RESOURCE; + /// `CAP_SYS_TIME` + const SYS_TIME = 1 << linux_raw_sys::general::CAP_SYS_TIME; + /// `CAP_SYS_TTY_CONFIG` + const SYS_TTY_CONFIG = 1 << linux_raw_sys::general::CAP_SYS_TTY_CONFIG; + /// `CAP_MKNOD` + const MKNOD = 1 << linux_raw_sys::general::CAP_MKNOD; + /// `CAP_LEASE` + const LEASE = 1 << linux_raw_sys::general::CAP_LEASE; + /// `CAP_AUDIT_WRITE` + const AUDIT_WRITE = 1 << linux_raw_sys::general::CAP_AUDIT_WRITE; + /// `CAP_AUDIT_CONTROL` + const AUDIT_CONTROL = 1 << linux_raw_sys::general::CAP_AUDIT_CONTROL; + /// `CAP_SETFCAP` + const SETFCAP = 1 << linux_raw_sys::general::CAP_SETFCAP; + /// `CAP_MAC_OVERRIDE` + const MAC_OVERRIDE = 1 << linux_raw_sys::general::CAP_MAC_OVERRIDE; + /// `CAP_MAC_ADMIN` + const MAC_ADMIN = 1 << linux_raw_sys::general::CAP_MAC_ADMIN; + /// `CAP_SYSLOG` + const SYSLOG = 1 << linux_raw_sys::general::CAP_SYSLOG; + /// `CAP_WAKE_ALARM` + const WAKE_ALARM = 1 << linux_raw_sys::general::CAP_WAKE_ALARM; + /// `CAP_BLOCK_SUSPEND` + const BLOCK_SUSPEND = 1 << linux_raw_sys::general::CAP_BLOCK_SUSPEND; + /// `CAP_AUDIT_READ` + const AUDIT_READ = 1 << linux_raw_sys::general::CAP_AUDIT_READ; + /// `CAP_PERFMON` + const PERFMON = 1 << linux_raw_sys::general::CAP_PERFMON; + /// `CAP_BPF` + const BPF = 1 << linux_raw_sys::general::CAP_BPF; + /// `CAP_CHECKPOINT_RESTORE` + const CHECKPOINT_RESTORE = 1 << linux_raw_sys::general::CAP_CHECKPOINT_RESTORE; + + /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> + const _ = !0; + } +} + +/// `capget(_LINUX_CAPABILITY_VERSION_3, pid)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/capget.2.html +#[inline] +#[doc(alias = "capget")] +pub fn capabilities(pid: Option<Pid>) -> io::Result<CapabilitySets> { + capget(pid) +} + +/// `capset(_LINUX_CAPABILITY_VERSION_3, pid, effective, permitted, +/// inheritable)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/capget.2.html +#[inline] +#[doc(alias = "capset")] +pub fn set_capabilities(pid: Option<Pid>, sets: CapabilitySets) -> io::Result<()> { + capset(pid, sets) +} + +#[inline] +#[allow(unsafe_code)] +fn capget(pid: Option<Pid>) -> io::Result<CapabilitySets> { + let mut data = [MaybeUninit::<linux_raw_sys::general::__user_cap_data_struct>::uninit(); 2]; + + let data = { + let mut header = linux_raw_sys::general::__user_cap_header_struct { + version: linux_raw_sys::general::_LINUX_CAPABILITY_VERSION_3, + pid: Pid::as_raw(pid) as backend::c::c_int, + }; + + backend::thread::syscalls::capget(&mut header, &mut data)?; + // SAFETY: v3 is a 64-bit implementation, so the kernel filled in both + // data structs. + unsafe { (data[0].assume_init(), data[1].assume_init()) } + }; + + let effective = u64::from(data.0.effective) | (u64::from(data.1.effective) << u32::BITS); + let permitted = u64::from(data.0.permitted) | (u64::from(data.1.permitted) << u32::BITS); + let inheritable = u64::from(data.0.inheritable) | (u64::from(data.1.inheritable) << u32::BITS); + + // The kernel returns a partitioned bitset that we just combined above. + Ok(CapabilitySets { + effective: CapabilityFlags::from_bits_retain(effective), + permitted: CapabilityFlags::from_bits_retain(permitted), + inheritable: CapabilityFlags::from_bits_retain(inheritable), + }) +} + +#[inline] +fn capset(pid: Option<Pid>, sets: CapabilitySets) -> io::Result<()> { + let mut header = linux_raw_sys::general::__user_cap_header_struct { + version: linux_raw_sys::general::_LINUX_CAPABILITY_VERSION_3, + pid: Pid::as_raw(pid) as backend::c::c_int, + }; + let data = [ + linux_raw_sys::general::__user_cap_data_struct { + effective: sets.effective.bits() as u32, + permitted: sets.permitted.bits() as u32, + inheritable: sets.inheritable.bits() as u32, + }, + linux_raw_sys::general::__user_cap_data_struct { + effective: (sets.effective.bits() >> u32::BITS) as u32, + permitted: (sets.permitted.bits() >> u32::BITS) as u32, + inheritable: (sets.inheritable.bits() >> u32::BITS) as u32, + }, + ]; + + backend::thread::syscalls::capset(&mut header, &data) +} diff --git a/vendor/rustix/src/thread/membarrier.rs b/vendor/rustix/src/thread/membarrier.rs new file mode 100644 index 00000000..b3e6508e --- /dev/null +++ b/vendor/rustix/src/thread/membarrier.rs @@ -0,0 +1,92 @@ +//! The Linux `membarrier` syscall. + +use crate::thread::Cpuid; +use crate::{backend, io}; + +pub use backend::thread::types::MembarrierCommand; + +#[cfg(linux_kernel)] +bitflags::bitflags! { + /// A result from [`membarrier_query`]. + /// + /// These flags correspond to values of [`MembarrierCommand`] which are + /// supported in the OS. + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct MembarrierQuery: u32 { + /// `MEMBARRIER_CMD_GLOBAL` (also known as `MEMBARRIER_CMD_SHARED`) + #[doc(alias = "SHARED")] + #[doc(alias = "MEMBARRIER_CMD_SHARED")] + const GLOBAL = MembarrierCommand::Global as _; + /// `MEMBARRIER_CMD_GLOBAL_EXPEDITED` + const GLOBAL_EXPEDITED = MembarrierCommand::GlobalExpedited as _; + /// `MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED` + const REGISTER_GLOBAL_EXPEDITED = MembarrierCommand::RegisterGlobalExpedited as _; + /// `MEMBARRIER_CMD_PRIVATE_EXPEDITED` + const PRIVATE_EXPEDITED = MembarrierCommand::PrivateExpedited as _; + /// `MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED` + const REGISTER_PRIVATE_EXPEDITED = MembarrierCommand::RegisterPrivateExpedited as _; + /// `MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE` + const PRIVATE_EXPEDITED_SYNC_CORE = MembarrierCommand::PrivateExpeditedSyncCore as _; + /// `MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE` + const REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = MembarrierCommand::RegisterPrivateExpeditedSyncCore as _; + /// `MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ` (since Linux 5.10) + const PRIVATE_EXPEDITED_RSEQ = MembarrierCommand::PrivateExpeditedRseq as _; + /// `MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ` (since Linux 5.10) + const REGISTER_PRIVATE_EXPEDITED_RSEQ = MembarrierCommand::RegisterPrivateExpeditedRseq as _; + + /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> + const _ = !0; + } +} + +#[cfg(linux_kernel)] +impl MembarrierQuery { + /// Test whether this query result contains the given command. + #[inline] + pub fn contains_command(self, cmd: MembarrierCommand) -> bool { + // `MembarrierCommand` is an enum that only contains values also valid + // in `MembarrierQuery`. + self.contains(Self::from_bits_retain(cmd as _)) + } +} + +/// `membarrier(MEMBARRIER_CMD_QUERY, 0, 0)`—Query the supported `membarrier` +/// commands. +/// +/// This function doesn't return a `Result` because it always succeeds; if the +/// underlying OS doesn't support the `membarrier` syscall, it returns an empty +/// `MembarrierQuery` value. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/membarrier.2.html +#[inline] +#[doc(alias = "MEMBARRIER_CMD_QUERY")] +pub fn membarrier_query() -> MembarrierQuery { + backend::thread::syscalls::membarrier_query() +} + +/// `membarrier(cmd, 0, 0)`—Perform a memory barrier. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/membarrier.2.html +#[inline] +pub fn membarrier(cmd: MembarrierCommand) -> io::Result<()> { + backend::thread::syscalls::membarrier(cmd) +} + +/// `membarrier(cmd, MEMBARRIER_CMD_FLAG_CPU, cpu)`—Perform a memory barrier +/// with a specific CPU. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/membarrier.2.html +#[inline] +pub fn membarrier_cpu(cmd: MembarrierCommand, cpu: Cpuid) -> io::Result<()> { + backend::thread::syscalls::membarrier_cpu(cmd, cpu) +} diff --git a/vendor/rustix/src/thread/mod.rs b/vendor/rustix/src/thread/mod.rs new file mode 100644 index 00000000..e32b5945 --- /dev/null +++ b/vendor/rustix/src/thread/mod.rs @@ -0,0 +1,35 @@ +//! Thread-associated operations. + +#[cfg(not(target_os = "redox"))] +mod clock; +#[cfg(linux_kernel)] +pub mod futex; +#[cfg(linux_kernel)] +mod id; +#[cfg(linux_kernel)] +mod libcap; +#[cfg(linux_kernel)] +mod membarrier; +#[cfg(linux_kernel)] +mod prctl; +#[cfg(any(freebsdlike, linux_kernel, target_os = "fuchsia"))] +mod sched; +mod sched_yield; +#[cfg(linux_kernel)] +mod setns; + +#[cfg(not(target_os = "redox"))] +pub use clock::*; +#[cfg(linux_kernel)] +pub use id::*; +#[cfg(linux_kernel)] +pub use libcap::{capabilities, set_capabilities, CapabilityFlags, CapabilitySets}; +#[cfg(linux_kernel)] +pub use membarrier::*; +#[cfg(linux_kernel)] +pub use prctl::*; +#[cfg(any(freebsdlike, linux_kernel, target_os = "fuchsia"))] +pub use sched::*; +pub use sched_yield::sched_yield; +#[cfg(linux_kernel)] +pub use setns::*; diff --git a/vendor/rustix/src/thread/prctl.rs b/vendor/rustix/src/thread/prctl.rs new file mode 100644 index 00000000..4719b1eb --- /dev/null +++ b/vendor/rustix/src/thread/prctl.rs @@ -0,0 +1,1014 @@ +//! Linux `prctl` wrappers. +//! +//! Rustix wraps variadic/dynamic-dispatch functions like `prctl` in type-safe +//! wrappers. +//! +//! # Safety +//! +//! The inner `prctl` calls are dynamically typed and must be called correctly. +#![allow(unsafe_code)] + +use core::mem::MaybeUninit; +use core::num::NonZeroU64; +use core::ptr; +use core::ptr::NonNull; +use core::sync::atomic::AtomicU8; + +use bitflags::bitflags; + +use crate::backend::prctl::syscalls; +#[cfg(feature = "alloc")] +use crate::ffi::CString; +use crate::ffi::{c_int, c_uint, c_void, CStr}; +use crate::io; +use crate::pid::Pid; +use crate::prctl::{ + prctl_1arg, prctl_2args, prctl_3args, prctl_get_at_arg2_optional, PointerAuthenticationKeys, +}; +use crate::utils::as_ptr; + +// +// PR_GET_KEEPCAPS/PR_SET_KEEPCAPS +// + +const PR_GET_KEEPCAPS: c_int = 7; + +/// Get the current state of the calling thread's `keep capabilities` flag. +/// +/// # References +/// - [`prctl(PR_GET_KEEPCAPS,…)`] +/// +/// [`prctl(PR_GET_KEEPCAPS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn get_keep_capabilities() -> io::Result<bool> { + unsafe { prctl_1arg(PR_GET_KEEPCAPS) }.map(|r| r != 0) +} + +const PR_SET_KEEPCAPS: c_int = 8; + +/// Set the state of the calling thread's `keep capabilities` flag. +/// +/// # References +/// - [`prctl(PR_SET_KEEPCAPS,…)`] +/// +/// [`prctl(PR_SET_KEEPCAPS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_keep_capabilities(enable: bool) -> io::Result<()> { + unsafe { prctl_2args(PR_SET_KEEPCAPS, usize::from(enable) as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_NAME/PR_SET_NAME +// + +#[cfg(feature = "alloc")] +const PR_GET_NAME: c_int = 16; + +/// Get the name of the calling thread. +/// +/// # References +/// - [`prctl(PR_GET_NAME,…)`] +/// +/// [`prctl(PR_GET_NAME,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +#[cfg(feature = "alloc")] +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +pub fn name() -> io::Result<CString> { + let mut buffer = [0_u8; 16]; + unsafe { prctl_2args(PR_GET_NAME, buffer.as_mut_ptr().cast())? }; + + let len = buffer.iter().position(|&x| x == 0_u8).unwrap_or(0); + CString::new(&buffer[..len]).map_err(|_r| io::Errno::ILSEQ) +} + +const PR_SET_NAME: c_int = 15; + +/// Set the name of the calling thread. +/// +/// Unlike `pthread_setname_np`, this function silently truncates the name to +/// 16 bytes, as the Linux syscall does. +/// +/// # References +/// - [`prctl(PR_SET_NAME,…)`] +/// +/// [`prctl(PR_SET_NAME,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_name(name: &CStr) -> io::Result<()> { + unsafe { prctl_2args(PR_SET_NAME, name.as_ptr() as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_SECCOMP/PR_SET_SECCOMP +// + +const PR_GET_SECCOMP: c_int = 21; + +const SECCOMP_MODE_DISABLED: i32 = 0; +const SECCOMP_MODE_STRICT: i32 = 1; +const SECCOMP_MODE_FILTER: i32 = 2; + +/// `SECCOMP_MODE_*` +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(i32)] +pub enum SecureComputingMode { + /// Secure computing is not in use. + Disabled = SECCOMP_MODE_DISABLED, + /// Use hard-coded filter. + Strict = SECCOMP_MODE_STRICT, + /// Use user-supplied filter. + Filter = SECCOMP_MODE_FILTER, +} + +impl TryFrom<i32> for SecureComputingMode { + type Error = io::Errno; + + fn try_from(value: i32) -> Result<Self, Self::Error> { + match value { + SECCOMP_MODE_DISABLED => Ok(Self::Disabled), + SECCOMP_MODE_STRICT => Ok(Self::Strict), + SECCOMP_MODE_FILTER => Ok(Self::Filter), + _ => Err(io::Errno::RANGE), + } + } +} + +/// Get the secure computing mode of the calling thread. +/// +/// If the caller is not in secure computing mode, this returns +/// [`SecureComputingMode::Disabled`]. If the caller is in strict secure +/// computing mode, then this call will cause a [`Signal::KILL`] signal to be +/// sent to the process. If the caller is in filter mode, and this system call +/// is allowed by the seccomp filters, it returns +/// [`SecureComputingMode::Filter`]; otherwise, the process is killed with a +/// [`Signal::KILL`] signal. +/// +/// Since Linux 3.8, the Seccomp field of the `/proc/[pid]/status` file +/// provides a method of obtaining the same information, without the risk that +/// the process is killed; see [the `proc` manual page]. +/// +/// # References +/// - [`prctl(PR_GET_SECCOMP,…)`] +/// +/// [`Signal::KILL`]: crate::signal::Signal::KILL +/// [`prctl(PR_GET_SECCOMP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +/// [the `proc` manual page]: https://man7.org/linux/man-pages/man5/proc.5.html +#[inline] +pub fn secure_computing_mode() -> io::Result<SecureComputingMode> { + unsafe { prctl_1arg(PR_GET_SECCOMP) }.and_then(TryInto::try_into) +} + +const PR_SET_SECCOMP: c_int = 22; + +/// Set the secure computing mode for the calling thread, to limit the +/// available system calls. +/// +/// # References +/// - [`prctl(PR_SET_SECCOMP,…)`] +/// +/// [`prctl(PR_SET_SECCOMP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_secure_computing_mode(mode: SecureComputingMode) -> io::Result<()> { + unsafe { prctl_2args(PR_SET_SECCOMP, mode as usize as *mut _) }.map(|_r| ()) +} + +// +// PR_CAPBSET_READ/PR_CAPBSET_DROP +// + +const PR_CAPBSET_READ: c_int = 23; + +/// Linux per-thread capability. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u32)] +#[non_exhaustive] +pub enum Capability { + /// In a system with the `_POSIX_CHOWN_RESTRICTED` option defined, this + /// overrides the restriction of changing file ownership and group + /// ownership. + ChangeOwnership = linux_raw_sys::general::CAP_CHOWN, + /// Override all DAC access, including ACL execute access if `_POSIX_ACL` + /// is defined. Excluding DAC access covered by + /// [`Capability::LinuxImmutable`]. + DACOverride = linux_raw_sys::general::CAP_DAC_OVERRIDE, + /// Overrides all DAC restrictions regarding read and search on files and + /// directories, including ACL restrictions if `_POSIX_ACL` is defined. + /// Excluding DAC access covered by [`Capability::LinuxImmutable`]. + DACReadSearch = linux_raw_sys::general::CAP_DAC_READ_SEARCH, + /// Overrides all restrictions about allowed operations on files, where + /// file owner ID must be equal to the user ID, except where + /// [`Capability::FileSetID`] is applicable. It doesn't override MAC and + /// DAC restrictions. + FileOwner = linux_raw_sys::general::CAP_FOWNER, + /// Overrides the following restrictions that the effective user ID shall + /// match the file owner ID when setting the `S_ISUID` and `S_ISGID` bits + /// on that file; that the effective group ID (or one of the supplementary + /// group IDs) shall match the file owner ID when setting the `S_ISGID` bit + /// on that file; that the `S_ISUID` and `S_ISGID` bits are cleared on + /// successful return from `chown` (not implemented). + FileSetID = linux_raw_sys::general::CAP_FSETID, + /// Overrides the restriction that the real or effective user ID of a + /// process sending a signal must match the real or effective user ID of + /// the process receiving the signal. + Kill = linux_raw_sys::general::CAP_KILL, + /// Allows `setgid` manipulation. Allows `setgroups`. Allows forged gids on + /// socket credentials passing. + SetGroupID = linux_raw_sys::general::CAP_SETGID, + /// Allows `set*uid` manipulation (including fsuid). Allows forged pids on + /// socket credentials passing. + SetUserID = linux_raw_sys::general::CAP_SETUID, + /// Without VFS support for capabilities: + /// - Transfer any capability in your permitted set to any pid. + /// - remove any capability in your permitted set from any pid. With VFS + /// support for capabilities (neither of above, but) + /// - Add any capability from current's capability bounding set to the + /// current process' inheritable set. + /// - Allow taking bits out of capability bounding set. + /// - Allow modification of the securebits for a process. + SetPermittedCapabilities = linux_raw_sys::general::CAP_SETPCAP, + /// Allow modification of `S_IMMUTABLE` and `S_APPEND` file attributes. + LinuxImmutable = linux_raw_sys::general::CAP_LINUX_IMMUTABLE, + /// Allows binding to TCP/UDP sockets below 1024. Allows binding to ATM + /// VCIs below 32. + NetBindService = linux_raw_sys::general::CAP_NET_BIND_SERVICE, + /// Allow broadcasting, listen to multicast. + NetBroadcast = linux_raw_sys::general::CAP_NET_BROADCAST, + /// Allow interface configuration. Allow administration of IP firewall, + /// masquerading and accounting. Allow setting debug option on sockets. + /// Allow modification of routing tables. Allow setting arbitrary + /// process / process group ownership on sockets. Allow binding to any + /// address for transparent proxying (also via [`Capability::NetRaw`]). + /// Allow setting TOS (type of service). Allow setting promiscuous + /// mode. Allow clearing driver statistics. Allow multicasting. Allow + /// read/write of device-specific registers. Allow activation of ATM + /// control sockets. + NetAdmin = linux_raw_sys::general::CAP_NET_ADMIN, + /// Allow use of `RAW` sockets. Allow use of `PACKET` sockets. Allow + /// binding to any address for transparent proxying (also via + /// [`Capability::NetAdmin`]). + NetRaw = linux_raw_sys::general::CAP_NET_RAW, + /// Allow locking of shared memory segments. Allow mlock and mlockall + /// (which doesn't really have anything to do with IPC). + IPCLock = linux_raw_sys::general::CAP_IPC_LOCK, + /// Override IPC ownership checks. + IPCOwner = linux_raw_sys::general::CAP_IPC_OWNER, + /// Insert and remove kernel modules - modify kernel without limit. + SystemModule = linux_raw_sys::general::CAP_SYS_MODULE, + /// Allow ioperm/iopl access. Allow sending USB messages to any device via + /// `/dev/bus/usb`. + SystemRawIO = linux_raw_sys::general::CAP_SYS_RAWIO, + /// Allow use of `chroot`. + SystemChangeRoot = linux_raw_sys::general::CAP_SYS_CHROOT, + /// Allow `ptrace` of any process. + SystemProcessTrace = linux_raw_sys::general::CAP_SYS_PTRACE, + /// Allow configuration of process accounting. + SystemProcessAccounting = linux_raw_sys::general::CAP_SYS_PACCT, + /// Allow configuration of the secure attention key. Allow administration + /// of the random device. Allow examination and configuration of disk + /// quotas. Allow setting the domainname. Allow setting the hostname. + /// Allow `mount` and `umount`, setting up new smb connection. + /// Allow some autofs root ioctls. Allow nfsservctl. Allow + /// `VM86_REQUEST_IRQ`. Allow to read/write pci config on alpha. Allow + /// `irix_prctl` on mips (setstacksize). Allow flushing all cache on + /// m68k (`sys_cacheflush`). Allow removing semaphores. Used instead of + /// [`Capability::ChangeOwnership`] to "chown" IPC message queues, + /// semaphores and shared memory. Allow locking/unlocking of shared + /// memory segment. Allow turning swap on/off. Allow forged pids on + /// socket credentials passing. Allow setting readahead and + /// flushing buffers on block devices. Allow setting geometry in floppy + /// driver. Allow turning DMA on/off in `xd` driver. Allow + /// administration of md devices (mostly the above, but some + /// extra ioctls). Allow tuning the ide driver. Allow access to the nvram + /// device. Allow administration of `apm_bios`, serial and bttv (TV) + /// device. Allow manufacturer commands in isdn CAPI support driver. + /// Allow reading non-standardized portions of pci configuration space. + /// Allow DDI debug ioctl on sbpcd driver. Allow setting up serial ports. + /// Allow sending raw qic-117 commands. Allow enabling/disabling tagged + /// queuing on SCSI controllers and sending arbitrary SCSI commands. + /// Allow setting encryption key on loopback filesystem. Allow setting + /// zone reclaim policy. Allow everything under + /// [`Capability::BerkeleyPacketFilters`] and + /// [`Capability::PerformanceMonitoring`] for backward compatibility. + SystemAdmin = linux_raw_sys::general::CAP_SYS_ADMIN, + /// Allow use of `reboot`. + SystemBoot = linux_raw_sys::general::CAP_SYS_BOOT, + /// Allow raising priority and setting priority on other (different UID) + /// processes. Allow use of FIFO and round-robin (realtime) scheduling + /// on own processes and setting the scheduling algorithm used by + /// another process. Allow setting cpu affinity on other processes. + /// Allow setting realtime ioprio class. Allow setting ioprio class on + /// other processes. + SystemNice = linux_raw_sys::general::CAP_SYS_NICE, + /// Override resource limits. Set resource limits. Override quota limits. + /// Override reserved space on ext2 filesystem. Modify data journaling + /// mode on ext3 filesystem (uses journaling resources). NOTE: ext2 + /// honors fsuid when checking for resource overrides, so you can + /// override using fsuid too. Override size restrictions on IPC message + /// queues. Allow more than 64hz interrupts from the real-time clock. + /// Override max number of consoles on console allocation. Override max + /// number of keymaps. Control memory reclaim behavior. + SystemResource = linux_raw_sys::general::CAP_SYS_RESOURCE, + /// Allow manipulation of system clock. Allow `irix_stime` on mips. Allow + /// setting the real-time clock. + SystemTime = linux_raw_sys::general::CAP_SYS_TIME, + /// Allow configuration of tty devices. Allow `vhangup` of tty. + SystemTTYConfig = linux_raw_sys::general::CAP_SYS_TTY_CONFIG, + /// Allow the privileged aspects of `mknod`. + MakeNode = linux_raw_sys::general::CAP_MKNOD, + /// Allow taking of leases on files. + Lease = linux_raw_sys::general::CAP_LEASE, + /// Allow writing the audit log via unicast netlink socket. + AuditWrite = linux_raw_sys::general::CAP_AUDIT_WRITE, + /// Allow configuration of audit via unicast netlink socket. + AuditControl = linux_raw_sys::general::CAP_AUDIT_CONTROL, + /// Set or remove capabilities on files. Map `uid=0` into a child user + /// namespace. + SetFileCapabilities = linux_raw_sys::general::CAP_SETFCAP, + /// Override MAC access. The base kernel enforces no MAC policy. An LSM may + /// enforce a MAC policy, and if it does and it chooses to implement + /// capability based overrides of that policy, this is the capability it + /// should use to do so. + MACOverride = linux_raw_sys::general::CAP_MAC_OVERRIDE, + /// Allow MAC configuration or state changes. The base kernel requires no + /// MAC configuration. An LSM may enforce a MAC policy, and if it does and + /// it chooses to implement capability based checks on modifications to + /// that policy or the data required to maintain it, this is the capability + /// it should use to do so. + MACAdmin = linux_raw_sys::general::CAP_MAC_ADMIN, + /// Allow configuring the kernel's `syslog` (`printk` behaviour). + SystemLog = linux_raw_sys::general::CAP_SYSLOG, + /// Allow triggering something that will wake the system. + WakeAlarm = linux_raw_sys::general::CAP_WAKE_ALARM, + /// Allow preventing system suspends. + BlockSuspend = linux_raw_sys::general::CAP_BLOCK_SUSPEND, + /// Allow reading the audit log via multicast netlink socket. + AuditRead = linux_raw_sys::general::CAP_AUDIT_READ, + /// Allow system performance and observability privileged operations using + /// `perf_events`, `i915_perf` and other kernel subsystems. + PerformanceMonitoring = linux_raw_sys::general::CAP_PERFMON, + /// This capability allows the following BPF operations: + /// - Creating all types of BPF maps + /// - Advanced verifier features + /// - Indirect variable access + /// - Bounded loops + /// - BPF to BPF function calls + /// - Scalar precision tracking + /// - Larger complexity limits + /// - Dead code elimination + /// - And potentially other features + /// - Loading BPF Type Format (BTF) data + /// - Retrieve `xlated` and JITed code of BPF programs + /// - Use `bpf_spin_lock` helper + /// + /// [`Capability::PerformanceMonitoring`] relaxes the verifier checks + /// further: + /// - BPF progs can use of pointer-to-integer conversions + /// - speculation attack hardening measures are bypassed + /// - `bpf_probe_read` to read arbitrary kernel memory is allowed + /// - `bpf_trace_printk` to print kernel memory is allowed + /// + /// [`Capability::SystemAdmin`] is required to use `bpf_probe_write_user`. + /// + /// [`Capability::SystemAdmin`] is required to iterate system-wide loaded + /// programs, maps, links, and BTFs, and convert their IDs to file + /// descriptors. + /// + /// [`Capability::PerformanceMonitoring`] and + /// [`Capability::BerkeleyPacketFilters`] are required to load tracing + /// programs. [`Capability::NetAdmin`] and + /// [`Capability::BerkeleyPacketFilters`] are required to load + /// networking programs. + BerkeleyPacketFilters = linux_raw_sys::general::CAP_BPF, + /// Allow checkpoint/restore related operations. Allow PID selection during + /// `clone3`. Allow writing to `ns_last_pid`. + CheckpointRestore = linux_raw_sys::general::CAP_CHECKPOINT_RESTORE, +} + +/// Check if the specified capability is in the calling thread's capability +/// bounding set. +/// +/// # References +/// - [`prctl(PR_CAPBSET_READ,…)`] +/// +/// [`prctl(PR_CAPBSET_READ,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn capability_is_in_bounding_set(capability: Capability) -> io::Result<bool> { + unsafe { prctl_2args(PR_CAPBSET_READ, capability as usize as *mut _) }.map(|r| r != 0) +} + +const PR_CAPBSET_DROP: c_int = 24; + +/// If the calling thread has the [`Capability::SetPermittedCapabilities`] +/// capability within its user namespace, then drop the specified capability +/// from the thread's capability bounding set. +/// +/// # References +/// - [`prctl(PR_CAPBSET_DROP,…)`] +/// +/// [`prctl(PR_CAPBSET_DROP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn remove_capability_from_bounding_set(capability: Capability) -> io::Result<()> { + unsafe { prctl_2args(PR_CAPBSET_DROP, capability as usize as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_SECUREBITS/PR_SET_SECUREBITS +// + +const PR_GET_SECUREBITS: c_int = 27; + +bitflags! { + /// `SECBIT_*` + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct CapabilitiesSecureBits: u32 { + /// If this bit is set, then the kernel does not grant capabilities + /// when a `set-user-ID-root` program is executed, or when a process + /// with an effective or real UID of [`Uid::ROOT`] calls `execve`. + const NO_ROOT = 1_u32 << 0; + /// Set [`NO_ROOT`] irreversibly. + /// + /// [`NO_ROOT`]: Self::NO_ROOT + const NO_ROOT_LOCKED = 1_u32 << 1; + /// Setting this flag stops the kernel from adjusting the process' + /// permitted, effective, and ambient capability sets when the thread's + /// effective and filesystem UIDs are switched between zero and nonzero + /// values. + const NO_SETUID_FIXUP = 1_u32 << 2; + /// Set [`NO_SETUID_FIXUP`] irreversibly. + /// + /// [`NO_SETUID_FIXUP`]: Self::NO_SETUID_FIXUP + const NO_SETUID_FIXUP_LOCKED = 1_u32 << 3; + /// Setting this flag allows a thread that has one or more 0 UIDs to + /// retain capabilities in its permitted set when it switches all of + /// its UIDs to nonzero values. + const KEEP_CAPS = 1_u32 << 4; + /// Set [`KEEP_CAPS`] irreversibly. + /// + /// [`KEEP_CAPS`]: Self::KEEP_CAPS + const KEEP_CAPS_LOCKED = 1_u32 << 5; + /// Setting this flag disallows raising ambient capabilities via the + /// `prctl`'s `PR_CAP_AMBIENT_RAISE` operation. + const NO_CAP_AMBIENT_RAISE = 1_u32 << 6; + /// Set [`NO_CAP_AMBIENT_RAISE`] irreversibly. + /// + /// [`NO_CAP_AMBIENT_RAISE`]: Self::NO_CAP_AMBIENT_RAISE + const NO_CAP_AMBIENT_RAISE_LOCKED = 1_u32 << 7; + + /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> + const _ = !0; + } +} + +/// Get the `securebits` flags of the calling thread. +/// +/// # References +/// - [`prctl(PR_GET_SECUREBITS,…)`] +/// +/// [`prctl(PR_GET_SECUREBITS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn capabilities_secure_bits() -> io::Result<CapabilitiesSecureBits> { + let r = unsafe { prctl_1arg(PR_GET_SECUREBITS)? } as c_uint; + CapabilitiesSecureBits::from_bits(r).ok_or(io::Errno::RANGE) +} + +const PR_SET_SECUREBITS: c_int = 28; + +/// Set the `securebits` flags of the calling thread. +/// +/// # References +/// - [`prctl(PR_SET_SECUREBITS,…)`] +/// +/// [`prctl(PR_SET_SECUREBITS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_capabilities_secure_bits(bits: CapabilitiesSecureBits) -> io::Result<()> { + unsafe { prctl_2args(PR_SET_SECUREBITS, bits.bits() as usize as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_TIMERSLACK/PR_SET_TIMERSLACK +// + +const PR_GET_TIMERSLACK: c_int = 30; + +/// Get the `current` timer slack value of the calling thread. +/// +/// # References +/// - [`prctl(PR_GET_TIMERSLACK,…)`] +/// +/// [`prctl(PR_GET_TIMERSLACK,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn current_timer_slack() -> io::Result<u64> { + unsafe { prctl_1arg(PR_GET_TIMERSLACK) }.map(|r| r as u64) +} + +const PR_SET_TIMERSLACK: c_int = 29; + +/// Sets the `current` timer slack value for the calling thread. +/// +/// # References +/// - [`prctl(PR_SET_TIMERSLACK,…)`] +/// +/// [`prctl(PR_SET_TIMERSLACK,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_current_timer_slack(value: Option<NonZeroU64>) -> io::Result<()> { + let value = usize::try_from(value.map_or(0, NonZeroU64::get)).map_err(|_r| io::Errno::RANGE)?; + unsafe { prctl_2args(PR_SET_TIMERSLACK, value as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_NO_NEW_PRIVS/PR_SET_NO_NEW_PRIVS +// + +const PR_GET_NO_NEW_PRIVS: c_int = 39; + +/// Get the value of the `no_new_privs` attribute for the calling thread. +/// +/// # References +/// - [`prctl(PR_GET_NO_NEW_PRIVS,…)`] +/// +/// [`prctl(PR_GET_NO_NEW_PRIVS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn no_new_privs() -> io::Result<bool> { + unsafe { prctl_1arg(PR_GET_NO_NEW_PRIVS) }.map(|r| r != 0) +} + +const PR_SET_NO_NEW_PRIVS: c_int = 38; + +/// Set the calling thread's `no_new_privs` attribute. +/// +/// # References +/// - [`prctl(PR_SET_NO_NEW_PRIVS,…)`] +/// +/// [`prctl(PR_SET_NO_NEW_PRIVS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_no_new_privs(no_new_privs: bool) -> io::Result<()> { + unsafe { prctl_2args(PR_SET_NO_NEW_PRIVS, usize::from(no_new_privs) as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_TID_ADDRESS +// + +const PR_GET_TID_ADDRESS: c_int = 40; + +/// Get the `clear_child_tid` address set by `set_tid_address` +/// and `clone`'s `CLONE_CHILD_CLEARTID` flag. +/// +/// # References +/// - [`prctl(PR_GET_TID_ADDRESS,…)`] +/// +/// [`prctl(PR_GET_TID_ADDRESS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn get_clear_child_tid_address() -> io::Result<Option<NonNull<c_void>>> { + unsafe { prctl_get_at_arg2_optional::<*mut c_void>(PR_GET_TID_ADDRESS) }.map(NonNull::new) +} + +// +// PR_GET_THP_DISABLE/PR_SET_THP_DISABLE +// + +const PR_GET_THP_DISABLE: c_int = 42; + +/// Get the current setting of the `THP disable` flag for the calling thread. +/// +/// # References +/// - [`prctl(PR_GET_THP_DISABLE,…)`] +/// +/// [`prctl(PR_GET_THP_DISABLE,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn transparent_huge_pages_are_disabled() -> io::Result<bool> { + unsafe { prctl_1arg(PR_GET_THP_DISABLE) }.map(|r| r != 0) +} + +const PR_SET_THP_DISABLE: c_int = 41; + +/// Set the state of the `THP disable` flag for the calling thread. +/// +/// # References +/// - [`prctl(PR_SET_THP_DISABLE,…)`] +/// +/// [`prctl(PR_SET_THP_DISABLE,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn disable_transparent_huge_pages(thp_disable: bool) -> io::Result<()> { + unsafe { prctl_2args(PR_SET_THP_DISABLE, usize::from(thp_disable) as *mut _) }.map(|_r| ()) +} + +// +// PR_CAP_AMBIENT +// + +const PR_CAP_AMBIENT: c_int = 47; + +const PR_CAP_AMBIENT_IS_SET: usize = 1; + +/// Check if the specified capability is in the ambient set. +/// +/// # References +/// - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,…)`] +/// +/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn capability_is_in_ambient_set(capability: Capability) -> io::Result<bool> { + let cap = capability as usize as *mut _; + unsafe { prctl_3args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET as *mut _, cap) }.map(|r| r != 0) +} + +const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4; + +/// Remove all capabilities from the ambient set. +/// +/// # References +/// - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,…)`] +/// +/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn clear_ambient_capability_set() -> io::Result<()> { + unsafe { prctl_2args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL as *mut _) }.map(|_r| ()) +} + +const PR_CAP_AMBIENT_RAISE: usize = 2; +const PR_CAP_AMBIENT_LOWER: usize = 3; + +/// Add or remove the specified capability to the ambient set. +/// +/// # References +/// - [`prctl(PR_CAP_AMBIENT,…)`] +/// +/// [`prctl(PR_CAP_AMBIENT,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn configure_capability_in_ambient_set(capability: Capability, enable: bool) -> io::Result<()> { + let sub_operation = if enable { + PR_CAP_AMBIENT_RAISE + } else { + PR_CAP_AMBIENT_LOWER + }; + let cap = capability as usize as *mut _; + + unsafe { prctl_3args(PR_CAP_AMBIENT, sub_operation as *mut _, cap) }.map(|_r| ()) +} + +// +// PR_SVE_GET_VL/PR_SVE_SET_VL +// + +const PR_SVE_GET_VL: c_int = 51; + +const PR_SVE_VL_LEN_MASK: u32 = 0xffff; +const PR_SVE_VL_INHERIT: u32 = 1_u32 << 17; + +/// Scalable Vector Extension vector length configuration. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct SVEVectorLengthConfig { + /// Vector length in bytes. + pub vector_length_in_bytes: u32, + /// Vector length inherited across `execve`. + pub vector_length_inherited_across_execve: bool, +} + +/// Get the thread's current SVE vector length configuration. +/// +/// # References +/// - [`prctl(PR_SVE_GET_VL,…)`] +/// +/// [`prctl(PR_SVE_GET_VL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn sve_vector_length_configuration() -> io::Result<SVEVectorLengthConfig> { + let bits = unsafe { prctl_1arg(PR_SVE_GET_VL)? } as c_uint; + Ok(SVEVectorLengthConfig { + vector_length_in_bytes: bits & PR_SVE_VL_LEN_MASK, + vector_length_inherited_across_execve: (bits & PR_SVE_VL_INHERIT) != 0, + }) +} + +const PR_SVE_SET_VL: c_int = 50; + +const PR_SVE_SET_VL_ONEXEC: u32 = 1_u32 << 18; + +/// Configure the thread's vector length of Scalable Vector Extension. +/// +/// # References +/// - [`prctl(PR_SVE_SET_VL,…)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, +/// as detailed in the references above. +/// +/// [`prctl(PR_SVE_SET_VL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn set_sve_vector_length_configuration( + vector_length_in_bytes: usize, + vector_length_inherited_across_execve: bool, + defer_change_to_next_execve: bool, +) -> io::Result<()> { + let vector_length_in_bytes = + u32::try_from(vector_length_in_bytes).map_err(|_r| io::Errno::RANGE)?; + + let mut bits = vector_length_in_bytes & PR_SVE_VL_LEN_MASK; + + if vector_length_inherited_across_execve { + bits |= PR_SVE_VL_INHERIT; + } + + if defer_change_to_next_execve { + bits |= PR_SVE_SET_VL_ONEXEC; + } + + prctl_2args(PR_SVE_SET_VL, bits as usize as *mut _).map(|_r| ()) +} + +// +// PR_PAC_RESET_KEYS +// + +const PR_PAC_RESET_KEYS: c_int = 54; + +/// Securely reset the thread's pointer authentication keys to fresh random +/// values generated by the kernel. +/// +/// # References +/// - [`prctl(PR_PAC_RESET_KEYS,…)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, +/// as detailed in the references above. +/// +/// [`prctl(PR_PAC_RESET_KEYS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn reset_pointer_authentication_keys( + keys: Option<PointerAuthenticationKeys>, +) -> io::Result<()> { + let keys = keys.as_ref().map_or(0_u32, PointerAuthenticationKeys::bits); + prctl_2args(PR_PAC_RESET_KEYS, keys as usize as *mut _).map(|_r| ()) +} + +// +// PR_GET_TAGGED_ADDR_CTRL/PR_SET_TAGGED_ADDR_CTRL +// + +const PR_GET_TAGGED_ADDR_CTRL: c_int = 56; + +const PR_MTE_TAG_SHIFT: u32 = 3; +const PR_MTE_TAG_MASK: u32 = 0xffff_u32 << PR_MTE_TAG_SHIFT; + +bitflags! { + /// Zero means addresses that are passed for the purpose of being + /// dereferenced by the kernel must be untagged. + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct TaggedAddressMode: u32 { + /// Addresses that are passed for the purpose of being dereferenced by + /// the kernel may be tagged. + const ENABLED = 1_u32 << 0; + /// Synchronous tag check fault mode. + const TCF_SYNC = 1_u32 << 1; + /// Asynchronous tag check fault mode. + const TCF_ASYNC = 1_u32 << 2; + + /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> + const _ = !0; + } +} + +/// Get the current tagged address mode for the calling thread. +/// +/// # References +/// - [`prctl(PR_GET_TAGGED_ADDR_CTRL,…)`] +/// +/// [`prctl(PR_GET_TAGGED_ADDR_CTRL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn current_tagged_address_mode() -> io::Result<(Option<TaggedAddressMode>, u32)> { + let r = unsafe { prctl_1arg(PR_GET_TAGGED_ADDR_CTRL)? } as c_uint; + let mode = r & 0b111_u32; + let mte_tag = (r & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; + Ok((TaggedAddressMode::from_bits(mode), mte_tag)) +} + +const PR_SET_TAGGED_ADDR_CTRL: c_int = 55; + +/// Controls support for passing tagged user-space addresses to the kernel. +/// +/// # References +/// - [`prctl(PR_SET_TAGGED_ADDR_CTRL,…)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, as +/// detailed in the references above. +/// +/// [`prctl(PR_SET_TAGGED_ADDR_CTRL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn set_current_tagged_address_mode( + mode: Option<TaggedAddressMode>, + mte_tag: u32, +) -> io::Result<()> { + let config = mode.as_ref().map_or(0_u32, TaggedAddressMode::bits) + | ((mte_tag << PR_MTE_TAG_SHIFT) & PR_MTE_TAG_MASK); + prctl_2args(PR_SET_TAGGED_ADDR_CTRL, config as usize as *mut _).map(|_r| ()) +} + +// +// PR_SET_SYSCALL_USER_DISPATCH +// + +const PR_SET_SYSCALL_USER_DISPATCH: c_int = 59; + +const PR_SYS_DISPATCH_OFF: usize = 0; + +/// Disable Syscall User Dispatch mechanism. +/// +/// # References +/// - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,…)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, as +/// detailed in the references above. +/// +/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn disable_syscall_user_dispatch() -> io::Result<()> { + prctl_2args(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF as *mut _).map(|_r| ()) +} + +const PR_SYS_DISPATCH_ON: usize = 1; + +/// Allow system calls to be executed. +const SYSCALL_DISPATCH_FILTER_ALLOW: u8 = 0; +/// Block system calls from executing. +const SYSCALL_DISPATCH_FILTER_BLOCK: u8 = 1; + +/// Value of the fast switch flag controlling system calls user dispatch +/// mechanism without the need to issue a syscall. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u8)] +pub enum SysCallUserDispatchFastSwitch { + /// System calls are allowed to execute. + Allow = SYSCALL_DISPATCH_FILTER_ALLOW, + /// System calls are blocked from executing. + Block = SYSCALL_DISPATCH_FILTER_BLOCK, +} + +impl TryFrom<u8> for SysCallUserDispatchFastSwitch { + type Error = io::Errno; + + fn try_from(value: u8) -> Result<Self, Self::Error> { + match value { + SYSCALL_DISPATCH_FILTER_ALLOW => Ok(Self::Allow), + SYSCALL_DISPATCH_FILTER_BLOCK => Ok(Self::Block), + _ => Err(io::Errno::RANGE), + } + } +} + +/// Enable Syscall User Dispatch mechanism. +/// +/// # References +/// - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,…)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, as +/// detailed in the references above. +/// +/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn enable_syscall_user_dispatch( + always_allowed_region: &[u8], + fast_switch_flag: &AtomicU8, +) -> io::Result<()> { + syscalls::prctl( + PR_SET_SYSCALL_USER_DISPATCH, + PR_SYS_DISPATCH_ON as *mut _, + always_allowed_region.as_ptr() as *mut _, + always_allowed_region.len() as *mut _, + as_ptr(fast_switch_flag) as *mut _, + ) + .map(|_r| ()) +} + +// +// PR_SCHED_CORE +// + +const PR_SCHED_CORE: c_int = 62; + +const PR_SCHED_CORE_GET: usize = 0; + +const PR_SCHED_CORE_SCOPE_THREAD: u32 = 0; +const PR_SCHED_CORE_SCOPE_THREAD_GROUP: u32 = 1; +const PR_SCHED_CORE_SCOPE_PROCESS_GROUP: u32 = 2; + +/// `PR_SCHED_CORE_SCOPE_*` +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u32)] +pub enum CoreSchedulingScope { + /// Operation will be performed for the thread. + Thread = PR_SCHED_CORE_SCOPE_THREAD, + /// Operation will be performed for all tasks in the task group of the + /// process. + ThreadGroup = PR_SCHED_CORE_SCOPE_THREAD_GROUP, + /// Operation will be performed for all processes in the process group. + ProcessGroup = PR_SCHED_CORE_SCOPE_PROCESS_GROUP, +} + +impl TryFrom<u32> for CoreSchedulingScope { + type Error = io::Errno; + + fn try_from(value: u32) -> Result<Self, Self::Error> { + match value { + PR_SCHED_CORE_SCOPE_THREAD => Ok(Self::Thread), + PR_SCHED_CORE_SCOPE_THREAD_GROUP => Ok(Self::ThreadGroup), + PR_SCHED_CORE_SCOPE_PROCESS_GROUP => Ok(Self::ProcessGroup), + _ => Err(io::Errno::RANGE), + } + } +} + +/// Get core scheduling cookie of a process. +/// +/// # References +/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,…)`] +/// +/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,…)`]: https://www.kernel.org/doc/html/v6.13/admin-guide/hw-vuln/core-scheduling.html +#[inline] +pub fn core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<u64> { + let mut value: MaybeUninit<u64> = MaybeUninit::uninit(); + unsafe { + syscalls::prctl( + PR_SCHED_CORE, + PR_SCHED_CORE_GET as *mut _, + pid.as_raw_nonzero().get() as usize as *mut _, + scope as usize as *mut _, + value.as_mut_ptr().cast(), + )?; + Ok(value.assume_init()) + } +} + +const PR_SCHED_CORE_CREATE: usize = 1; + +/// Create unique core scheduling cookie. +/// +/// # References +/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,…)`] +/// +/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,…)`]: https://www.kernel.org/doc/html/v6.13/admin-guide/hw-vuln/core-scheduling.html +#[inline] +pub fn create_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { + unsafe { + syscalls::prctl( + PR_SCHED_CORE, + PR_SCHED_CORE_CREATE as *mut _, + pid.as_raw_nonzero().get() as usize as *mut _, + scope as usize as *mut _, + ptr::null_mut(), + ) + .map(|_r| ()) + } +} + +const PR_SCHED_CORE_SHARE_TO: usize = 2; + +/// Push core scheduling cookie to a process. +/// +/// # References +/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,…)`] +/// +/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,…)`]: https://www.kernel.org/doc/html/v6.13/admin-guide/hw-vuln/core-scheduling.html +#[inline] +pub fn push_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { + unsafe { + syscalls::prctl( + PR_SCHED_CORE, + PR_SCHED_CORE_SHARE_TO as *mut _, + pid.as_raw_nonzero().get() as usize as *mut _, + scope as usize as *mut _, + ptr::null_mut(), + ) + .map(|_r| ()) + } +} + +const PR_SCHED_CORE_SHARE_FROM: usize = 3; + +/// Pull core scheduling cookie from a process. +/// +/// # References +/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,…)`] +/// +/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,…)`]: https://www.kernel.org/doc/html/v6.13/admin-guide/hw-vuln/core-scheduling.html +#[inline] +pub fn pull_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { + unsafe { + syscalls::prctl( + PR_SCHED_CORE, + PR_SCHED_CORE_SHARE_FROM as *mut _, + pid.as_raw_nonzero().get() as usize as *mut _, + scope as usize as *mut _, + ptr::null_mut(), + ) + .map(|_r| ()) + } +} diff --git a/vendor/rustix/src/thread/sched.rs b/vendor/rustix/src/thread/sched.rs new file mode 100644 index 00000000..034b0261 --- /dev/null +++ b/vendor/rustix/src/thread/sched.rs @@ -0,0 +1,161 @@ +use crate::pid::Pid; +use crate::{backend, io}; +use core::{fmt, hash}; + +/// `CpuSet` represents a bit-mask of CPUs. +/// +/// `CpuSet`s are used by [`sched_setaffinity`] and [`sched_getaffinity`], for +/// example. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man3/CPU_SET.3.html +/// [`sched_setaffinity`]: crate::thread::sched_setaffinity +/// [`sched_getaffinity`]: crate::thread::sched_getaffinity +#[repr(transparent)] +#[derive(Clone, Copy)] +pub struct CpuSet { + cpu_set: backend::thread::types::RawCpuSet, +} + +impl CpuSet { + /// The maximum number of CPU in `CpuSet`. + pub const MAX_CPU: usize = backend::thread::types::CPU_SETSIZE; + + /// Create a new and empty `CpuSet`. + #[inline] + pub fn new() -> Self { + Self { + cpu_set: backend::thread::types::raw_cpu_set_new(), + } + } + + /// Test to see if a CPU is in the `CpuSet`. + /// + /// `field` is the CPU id to test. + #[inline] + pub fn is_set(&self, field: usize) -> bool { + backend::thread::cpu_set::CPU_ISSET(field, &self.cpu_set) + } + + /// Add a CPU to `CpuSet`. + /// + /// `field` is the CPU id to add. + #[inline] + pub fn set(&mut self, field: usize) { + backend::thread::cpu_set::CPU_SET(field, &mut self.cpu_set) + } + + /// Remove a CPU from `CpuSet`. + /// + /// `field` is the CPU id to remove. + #[inline] + pub fn unset(&mut self, field: usize) { + backend::thread::cpu_set::CPU_CLR(field, &mut self.cpu_set) + } + + /// Count the number of CPUs set in the `CpuSet`. + #[cfg(linux_kernel)] + #[inline] + pub fn count(&self) -> u32 { + backend::thread::cpu_set::CPU_COUNT(&self.cpu_set) + } + + /// Zeroes the `CpuSet`. + #[inline] + pub fn clear(&mut self) { + backend::thread::cpu_set::CPU_ZERO(&mut self.cpu_set) + } +} + +impl Default for CpuSet { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl fmt::Debug for CpuSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "CpuSet {{")?; + let mut first = true; + for i in 0..Self::MAX_CPU { + if self.is_set(i) { + if first { + write!(f, " ")?; + first = false; + } else { + write!(f, ", ")?; + } + write!(f, "cpu{}", i)?; + } + } + write!(f, " }}") + } +} + +impl hash::Hash for CpuSet { + fn hash<H: hash::Hasher>(&self, state: &mut H) { + for i in 0..Self::MAX_CPU { + self.is_set(i).hash(state); + } + } +} + +impl Eq for CpuSet {} + +impl PartialEq for CpuSet { + fn eq(&self, other: &Self) -> bool { + backend::thread::cpu_set::CPU_EQUAL(&self.cpu_set, &other.cpu_set) + } +} + +/// `sched_setaffinity(pid, cpuset)`—Set a thread's CPU affinity mask. +/// +/// `pid` is the thread ID to update. If pid is `None`, then the current thread +/// is updated. +/// +/// The `CpuSet` argument specifies the set of CPUs on which the thread will be +/// eligible to run. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/sched_setaffinity.2.html +#[inline] +pub fn sched_setaffinity(pid: Option<Pid>, cpuset: &CpuSet) -> io::Result<()> { + backend::thread::syscalls::sched_setaffinity(pid, &cpuset.cpu_set) +} + +/// `sched_getaffinity(pid)`—Get a thread's CPU affinity mask. +/// +/// `pid` is the thread ID to check. If pid is `None`, then the current thread +/// is checked. +/// +/// Returns the set of CPUs on which the thread is eligible to run. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/sched_getaffinity.2.html +#[inline] +pub fn sched_getaffinity(pid: Option<Pid>) -> io::Result<CpuSet> { + let mut cpuset = CpuSet::new(); + backend::thread::syscalls::sched_getaffinity(pid, &mut cpuset.cpu_set).and(Ok(cpuset)) +} + +/// `sched_getcpu()`—Get the CPU that the current thread is currently on. +/// +/// # References +/// - [Linux] +/// - [DragonFly BSD] +/// +/// [Linux]: https://man7.org/linux/man-pages/man3/sched_getcpu.3.html +/// [DragonFly BSD]: https://man.dragonflybsd.org/?command=sched_getcpu§ion=2 +// FreeBSD added `sched_getcpu` in 13.0. +#[cfg(any(linux_kernel, target_os = "dragonfly"))] +#[inline] +pub fn sched_getcpu() -> usize { + backend::thread::syscalls::sched_getcpu() +} diff --git a/vendor/rustix/src/thread/sched_yield.rs b/vendor/rustix/src/thread/sched_yield.rs new file mode 100644 index 00000000..e630a95c --- /dev/null +++ b/vendor/rustix/src/thread/sched_yield.rs @@ -0,0 +1,16 @@ +use crate::backend; + +/// `sched_yield()`—Hints to the OS that other processes should run. +/// +/// This function always succeeds. +/// +/// # References +/// - [POSIX] +/// - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/sched_yield.html +/// [Linux]: https://man7.org/linux/man-pages/man2/sched_yield.2.html +#[inline] +pub fn sched_yield() { + backend::thread::syscalls::sched_yield() +} diff --git a/vendor/rustix/src/thread/setns.rs b/vendor/rustix/src/thread/setns.rs new file mode 100644 index 00000000..0eaee2f2 --- /dev/null +++ b/vendor/rustix/src/thread/setns.rs @@ -0,0 +1,139 @@ +use bitflags::bitflags; +use linux_raw_sys::general::{ + CLONE_FILES, CLONE_FS, CLONE_NEWCGROUP, CLONE_NEWIPC, CLONE_NEWNET, CLONE_NEWNS, CLONE_NEWPID, + CLONE_NEWTIME, CLONE_NEWUSER, CLONE_NEWUTS, CLONE_SYSVSEM, +}; + +use crate::backend::c::c_int; +use crate::backend::thread::syscalls; +use crate::fd::BorrowedFd; +use crate::io; + +bitflags! { + /// Thread name space type. + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct ThreadNameSpaceType: u32 { + /// Time name space. + const TIME = CLONE_NEWTIME; + /// Mount name space. + const MOUNT = CLONE_NEWNS; + /// Control group (CGroup) name space. + const CONTROL_GROUP = CLONE_NEWCGROUP; + /// `Host name` and `NIS domain name` (UTS) name space. + const HOST_NAME_AND_NIS_DOMAIN_NAME = CLONE_NEWUTS; + /// Inter-process communication (IPC) name space. + const INTER_PROCESS_COMMUNICATION = CLONE_NEWIPC; + /// User name space. + const USER = CLONE_NEWUSER; + /// Process ID name space. + const PROCESS_ID = CLONE_NEWPID; + /// Network name space. + const NETWORK = CLONE_NEWNET; + + /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> + const _ = !0; + } +} + +/// Type of name space referred to by a link. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u32)] +pub enum LinkNameSpaceType { + /// Time name space. + Time = CLONE_NEWTIME, + /// Mount name space. + Mount = CLONE_NEWNS, + /// Control group (CGroup) name space. + ControlGroup = CLONE_NEWCGROUP, + /// `Host name` and `NIS domain name` (UTS) name space. + HostNameAndNISDomainName = CLONE_NEWUTS, + /// Inter-process communication (IPC) name space. + InterProcessCommunication = CLONE_NEWIPC, + /// User name space. + User = CLONE_NEWUSER, + /// Process ID name space. + ProcessID = CLONE_NEWPID, + /// Network name space. + Network = CLONE_NEWNET, +} + +bitflags! { + /// `CLONE_*` for use with [`unshare`]. + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct UnshareFlags: u32 { + /// `CLONE_FILES` + const FILES = CLONE_FILES; + /// `CLONE_FS` + const FS = CLONE_FS; + /// `CLONE_NEWCGROUP` + const NEWCGROUP = CLONE_NEWCGROUP; + /// `CLONE_NEWIPC` + const NEWIPC = CLONE_NEWIPC; + /// `CLONE_NEWNET` + const NEWNET = CLONE_NEWNET; + /// `CLONE_NEWNS` + const NEWNS = CLONE_NEWNS; + /// `CLONE_NEWPID` + const NEWPID = CLONE_NEWPID; + /// `CLONE_NEWTIME` + const NEWTIME = CLONE_NEWTIME; + /// `CLONE_NEWUSER` + const NEWUSER = CLONE_NEWUSER; + /// `CLONE_NEWUTS` + const NEWUTS = CLONE_NEWUTS; + /// `CLONE_SYSVSEM` + const SYSVSEM = CLONE_SYSVSEM; + + /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> + const _ = !0; + } +} + +/// Reassociate the calling thread with the namespace associated with link +/// referred to by `fd`. +/// +/// `fd` must refer to one of the magic links in a `/proc/[pid]/ns/` directory, +/// or a bind mount to such a link. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setns.2.html +#[doc(alias = "setns")] +pub fn move_into_link_name_space( + fd: BorrowedFd<'_>, + allowed_type: Option<LinkNameSpaceType>, +) -> io::Result<()> { + let allowed_type = allowed_type.map_or(0, |t| t as c_int); + syscalls::setns(fd, allowed_type).map(|_r| ()) +} + +/// Atomically move the calling thread into one or more of the same namespaces +/// as the thread referred to by `fd`. +/// +/// `fd` must refer to a thread ID. See: `pidfd_open` and `clone`. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setns.2.html +#[doc(alias = "setns")] +pub fn move_into_thread_name_spaces( + fd: BorrowedFd<'_>, + allowed_types: ThreadNameSpaceType, +) -> io::Result<()> { + syscalls::setns(fd, allowed_types.bits() as c_int).map(|_r| ()) +} + +/// `unshare(flags)`—Disassociate parts of the current thread's execution +/// context with other threads. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/unshare.2.html +pub fn unshare(flags: UnshareFlags) -> io::Result<()> { + syscalls::unshare(flags) +} |
